From 58942fcd44306852b0bb2dffe9dae2a62147dc0f Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Sun, 22 Jul 2018 22:42:51 +0100 Subject: [PATCH 01/86] Working on the assign docstring --- pandas/core/frame.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 74f760f382c76..852092e1c6e74 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3250,38 +3250,24 @@ def assign(self, **kwargs): Examples -------- - >>> df = pd.DataFrame({'A': range(1, 11), 'B': np.random.randn(10)}) + >>> df = pd.DataFrame([('or', 17.),('ca', 25)], + index=['portland', 'berkeley'], + columns=['state', 'temp_c']) Where the value is a callable, evaluated on `df`: - >>> df.assign(ln_A = lambda x: np.log(x.A)) - A B ln_A - 0 1 0.426905 0.000000 - 1 2 -0.780949 0.693147 - 2 3 -0.418711 1.098612 - 3 4 -0.269708 1.386294 - 4 5 -0.274002 1.609438 - 5 6 -0.500792 1.791759 - 6 7 1.649697 1.945910 - 7 8 -1.495604 2.079442 - 8 9 0.549296 2.197225 - 9 10 -0.758542 2.302585 + >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) + state temp_c temp_f + portland or 17.0 62.6 + berkeley ca 25.0 77.0 Where the value already exists and is inserted: - >>> newcol = np.log(df['A']) - >>> df.assign(ln_A=newcol) - A B ln_A - 0 1 0.426905 0.000000 - 1 2 -0.780949 0.693147 - 2 3 -0.418711 1.098612 - 3 4 -0.269708 1.386294 - 4 5 -0.274002 1.609438 - 5 6 -0.500792 1.791759 - 6 7 1.649697 1.945910 - 7 8 -1.495604 2.079442 - 8 9 0.549296 2.197225 - 9 10 -0.758542 2.302585 + >>> newcol = df['temp_c'] * 9 / 5 + 32 + >>> df.assign(temp_f=newcol) + state temp_c temp_f + portland or 17.0 62.6 + berkeley ca 25.0 77.0 Where the keyword arguments depend on each other From de61b3895f19da203b4a074ed2f48fa4cea9b338 Mon Sep 17 00:00:00 2001 From: Abeer Eltanawy Date: Sun, 22 Jul 2018 15:23:51 -0700 Subject: [PATCH 02/86] DOC: cont'd simplified examples in DataFrame.assign docstring --- pandas/core/frame.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 852092e1c6e74..40ae3523e2a22 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3250,24 +3250,23 @@ def assign(self, **kwargs): Examples -------- - >>> df = pd.DataFrame([('or', 17.),('ca', 25)], - index=['portland', 'berkeley'], - columns=['state', 'temp_c']) + >>> df = pd.DataFrame({'temp_c': (17.0, 25.0)}, + index=['portland', 'berkeley']) Where the value is a callable, evaluated on `df`: >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) - state temp_c temp_f - portland or 17.0 62.6 - berkeley ca 25.0 77.0 + temp_c temp_f + portland 17.0 62.6 + berkeley 25.0 77.0 Where the value already exists and is inserted: >>> newcol = df['temp_c'] * 9 / 5 + 32 >>> df.assign(temp_f=newcol) - state temp_c temp_f - portland or 17.0 62.6 - berkeley ca 25.0 77.0 + temp_c temp_f + portland 17.0 62.6 + berkeley 25.0 77.0 Where the keyword arguments depend on each other From ef49f886f18d4adcab13b7d7eaa4a1a29152a96c Mon Sep 17 00:00:00 2001 From: Abeer Eltanawy Date: Mon, 3 Sep 2018 20:52:37 -0700 Subject: [PATCH 03/86] DOC: adjusted docstring examples in DataFrame.assign to illustrate python3.6+ feature. --- pandas/core/frame.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 40ae3523e2a22..b958fa6a26436 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3251,32 +3251,33 @@ def assign(self, **kwargs): Examples -------- >>> df = pd.DataFrame({'temp_c': (17.0, 25.0)}, - index=['portland', 'berkeley']) + index=['Portland', 'Berkeley']) + temp_c + Portland 17.0 + Berkeley 25.0 Where the value is a callable, evaluated on `df`: - >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32) temp_c temp_f - portland 17.0 62.6 - berkeley 25.0 77.0 - - Where the value already exists and is inserted: + Portland 17.0 62.6 + Berkeley 25.0 77.0 + Alternatively, the same behavior can be achieved by directly + referencing an existing Series or list-like: >>> newcol = df['temp_c'] * 9 / 5 + 32 >>> df.assign(temp_f=newcol) temp_c temp_f - portland 17.0 62.6 - berkeley 25.0 77.0 - - Where the keyword arguments depend on each other - - >>> df = pd.DataFrame({'A': [1, 2, 3]}) - - >>> df.assign(B=df.A, C=lambda x:x['A']+ x['B']) - A B C - 0 1 1 2 - 1 2 2 4 - 2 3 3 6 + Portland 17.0 62.6 + Berkeley 25.0 77.0 + + In Python 3.6+, you can create multiple columns within the same assign + where one of the columns depends on another one defined within the same + assign: + >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, + temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + temp_c temp_f temp_k + Portland 17.0 62.6 290.15 + Berkeley 25.0 77.0 298.15 """ data = self.copy() From 1fa9bc5983808d6d62a4625a49af7c8ee1f0079d Mon Sep 17 00:00:00 2001 From: Abeer Eltanawy Date: Sat, 8 Sep 2018 22:06:58 -0700 Subject: [PATCH 04/86] DOC: Adjusted DataFrame.assign docstring --- pandas/core/frame.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b958fa6a26436..09e3432c79b46 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3220,7 +3220,7 @@ def assign(self, **kwargs): Parameters ---------- - kwargs : keyword, value pairs + **kwargs : dict of {str: callable or series} The column names are keywords. If the values are callable, they are computed on the DataFrame and assigned to the new columns. The callable must not @@ -3230,7 +3230,7 @@ def assign(self, **kwargs): Returns ------- - df : DataFrame + DataFrame A new DataFrame with the new columns in addition to all the existing columns. @@ -3250,8 +3250,9 @@ def assign(self, **kwargs): Examples -------- - >>> df = pd.DataFrame({'temp_c': (17.0, 25.0)}, - index=['Portland', 'Berkeley']) + >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]}, + ... index=['Portland', 'Berkeley']) + >>> df temp_c Portland 17.0 Berkeley 25.0 @@ -3264,8 +3265,7 @@ def assign(self, **kwargs): Alternatively, the same behavior can be achieved by directly referencing an existing Series or list-like: - >>> newcol = df['temp_c'] * 9 / 5 + 32 - >>> df.assign(temp_f=newcol) + >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 Berkeley 25.0 77.0 @@ -3274,7 +3274,7 @@ def assign(self, **kwargs): where one of the columns depends on another one defined within the same assign: >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, - temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) temp_c temp_f temp_k Portland 17.0 62.6 290.15 Berkeley 25.0 77.0 298.15 From 4cb55a4fd22d4a9187c8413bb4f713583e9e3368 Mon Sep 17 00:00:00 2001 From: Abeer Eltanawy Date: Tue, 11 Sep 2018 16:49:18 -0700 Subject: [PATCH 05/86] DOC: adjusted the grammer in DataFrame.assign docstring. --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 09e3432c79b46..1be7d86a493c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3220,7 +3220,7 @@ def assign(self, **kwargs): Parameters ---------- - **kwargs : dict of {str: callable or series} + **kwargs : dict of {str: callable or Series} The column names are keywords. If the values are callable, they are computed on the DataFrame and assigned to the new columns. The callable must not @@ -3264,7 +3264,7 @@ def assign(self, **kwargs): Berkeley 25.0 77.0 Alternatively, the same behavior can be achieved by directly - referencing an existing Series or list-like: + referencing an existing Series or sequence: >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 From 7c7bb7a616d896ac96efe058b0ef1f3c1c6be009 Mon Sep 17 00:00:00 2001 From: Alex Rychyk Date: Tue, 4 Sep 2018 14:09:17 +0300 Subject: [PATCH 06/86] Fixed loffset with numpy timedelta (#22482) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/resample.py | 3 ++- pandas/tests/test_resample.py | 19 ++++++------------- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7ed92935a0991..2bfc57d7f5dcd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -707,6 +707,7 @@ Groupby/Resample/Rolling - Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) - Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`). +- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to `loffset` kwarg (:issue:`7687`). Sparse ^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index ae59014ac34f4..2ada4d758d463 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -366,7 +366,8 @@ def _apply_loffset(self, result): """ needs_offset = ( - isinstance(self.loffset, (DateOffset, timedelta)) and + isinstance(self.loffset, (DateOffset, timedelta, + np.timedelta64)) and isinstance(result.index, DatetimeIndex) and len(result.index) > 0 ) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 38801832829b0..b60fd10d745c1 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1173,27 +1173,20 @@ def test_resample_frame_basic(self): df.resample('M', kind='period').mean() df.resample('W-WED', kind='period').mean() - def test_resample_loffset(self): + @pytest.mark.parametrize('loffset', [timedelta(minutes=1), + '1min', Minute(1), + np.timedelta64(1, 'm')]) + def test_resample_loffset(self, loffset): + # GH 7687 rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min') s = Series(np.random.randn(14), index=rng) result = s.resample('5min', closed='right', label='right', - loffset=timedelta(minutes=1)).mean() + loffset=loffset).mean() idx = date_range('1/1/2000', periods=4, freq='5min') expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()], index=idx + timedelta(minutes=1)) assert_series_equal(result, expected) - - expected = s.resample( - '5min', closed='right', label='right', - loffset='1min').mean() - assert_series_equal(result, expected) - - expected = s.resample( - '5min', closed='right', label='right', - loffset=Minute(1)).mean() - assert_series_equal(result, expected) - assert result.index.freq == Minute(5) # from daily From d96a334c4370a15700aa73c7348646c50afa3539 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 4 Sep 2018 04:13:34 -0700 Subject: [PATCH 07/86] CLN: Rename 'n' to 'repeats' in .repeat methods (#22574) For Index and MultiIndex. xref gh-14645. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/base.py | 3 +-- pandas/core/indexes/multi.py | 3 +-- pandas/tests/indexes/multi/test_reshape.py | 4 ---- pandas/tests/indexes/test_base.py | 9 --------- 5 files changed, 3 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2bfc57d7f5dcd..3a360b09ae789 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -527,6 +527,7 @@ Removal of prior version deprecations/changes - Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`) - Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) +- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats``(:issue:`14645`) - .. _whatsnew_0240.performance: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7b7fb968b3050..710c9d0e296c9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -53,7 +53,7 @@ import pandas.core.common as com from pandas.core import ops from pandas.util._decorators import ( - Appender, Substitution, cache_readonly, deprecate_kwarg) + Appender, Substitution, cache_readonly) from pandas.core.indexes.frozen import FrozenList import pandas.core.dtypes.concat as _concat import pandas.core.missing as missing @@ -773,7 +773,6 @@ def memory_usage(self, deep=False): return result # ops compat - @deprecate_kwarg(old_arg_name='n', new_arg_name='repeats') def repeat(self, repeats, *args, **kwargs): """ Repeat elements of an Index. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5b2e3a76adf05..955f1461075f9 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -27,7 +27,7 @@ from pandas.core.dtypes.missing import isna, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError -from pandas.util._decorators import Appender, cache_readonly, deprecate_kwarg +from pandas.util._decorators import Appender, cache_readonly import pandas.core.common as com import pandas.core.missing as missing import pandas.core.algorithms as algos @@ -1646,7 +1646,6 @@ def append(self, other): def argsort(self, *args, **kwargs): return self.values.argsort(*args, **kwargs) - @deprecate_kwarg(old_arg_name='n', new_arg_name='repeats') def repeat(self, repeats, *args, **kwargs): nv.validate_repeat(args, kwargs) return MultiIndex(levels=self.levels, diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index efa9fca752157..7750379bff445 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -100,10 +100,6 @@ def test_repeat(): numbers, names.repeat(reps)], names=names) tm.assert_index_equal(m.repeat(reps), expected) - with tm.assert_produces_warning(FutureWarning): - result = m.repeat(n=reps) - tm.assert_index_equal(result, expected) - def test_insert_base(idx): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c858b4d86cf5e..755b3cc7f1dca 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2402,15 +2402,6 @@ def test_repeat(self): result = index.repeat(repeats) tm.assert_index_equal(result, expected) - def test_repeat_warns_n_keyword(self): - index = pd.Index([1, 2, 3]) - expected = pd.Index([1, 1, 2, 2, 3, 3]) - - with tm.assert_produces_warning(FutureWarning): - result = index.repeat(n=2) - - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("index", [ pd.Index([np.nan]), pd.Index([np.nan, 1]), pd.Index([1, 2, np.nan]), pd.Index(['a', 'b', np.nan]), From 607d646b581177547284490d4b72cf2582a6bb77 Mon Sep 17 00:00:00 2001 From: LeakedMemory Date: Tue, 4 Sep 2018 10:04:54 -0500 Subject: [PATCH 08/86] DOC: Updating DataFrame.merge docstring (#22141) --- pandas/core/frame.py | 74 ++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1be7d86a493c9..08e0c05bdcd9a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -138,12 +138,11 @@ """ _merge_doc = """ -Merge DataFrame or named Series objects by performing a database-style join -operation by columns or indexes. +Merge DataFrame or named Series objects with a database-style join. -If joining columns on columns, the DataFrame indexes *will be -ignored*. Otherwise if joining indexes on indexes or indexes on a column or -columns, the index will be passed on. +The join is done on columns or indexes. If joining columns on +columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes +on indexes or indexes on a column or columns, the index will be passed on. Parameters ----------%s @@ -153,13 +152,13 @@ Type of merge to be performed. * left: use only keys from left frame, similar to a SQL left outer join; - preserve key order + preserve key order. * right: use only keys from right frame, similar to a SQL right outer join; - preserve key order + preserve key order. * outer: use union of keys from both frames, similar to a SQL full outer - join; sort keys lexicographically + join; sort keys lexicographically. * inner: use intersection of keys from both frames, similar to a SQL inner - join; preserve the order of the left keys + join; preserve the order of the left keys. on : label or list Column or index level names to join on. These must be found in both DataFrames. If `on` is None and not merging on indexes then this defaults @@ -172,22 +171,23 @@ Column or index level names to join on in the right DataFrame. Can also be an array or list of arrays of the length of the right DataFrame. These arrays are treated as if they are columns. -left_index : boolean, default False +left_index : bool, default False Use the index from the left DataFrame as the join key(s). If it is a MultiIndex, the number of keys in the other DataFrame (either the index or a number of columns) must match the number of levels. -right_index : boolean, default False +right_index : bool, default False Use the index from the right DataFrame as the join key. Same caveats as left_index. -sort : boolean, default False +sort : bool, default False Sort the join keys lexicographically in the result DataFrame. If False, the order of the join keys depends on the join type (how keyword). -suffixes : 2-length sequence (tuple, list, ...) +suffixes : tuple of (str, str), default ('_x', '_y') Suffix to apply to overlapping column names in the left and right - side, respectively. -copy : boolean, default True + side, respectively. To raise an exception on overlapping columns use + (False, False). +copy : bool, default True If False, avoid copy if possible. -indicator : boolean or string, default False +indicator : bool or str, default False If True, adds a column to output DataFrame called "_merge" with information on the source of each row. If string, column with information on source of each row will be added to @@ -197,7 +197,7 @@ "right_only" for observations whose merge key only appears in 'right' DataFrame, and "both" if the observation's merge key is found in both. -validate : string, default None +validate : str, optional If specified, checks if merge is of specified type. * "one_to_one" or "1:1": check if merge keys are unique in both @@ -213,6 +213,7 @@ Returns ------- DataFrame + A DataFrame of the two merged objects. Notes ----- @@ -229,24 +230,27 @@ Examples -------- ->>> A = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], -... 'value': [1, 2, 3, 5]}) ->>> B = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], -... 'value': [5, 6, 7, 8]}) ->>> A +>>> df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [1, 2, 3, 5]}) +>>> df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'], +... 'value': [5, 6, 7, 8]}) +>>> df1 lkey value 0 foo 1 1 bar 2 2 baz 3 3 foo 5 ->>> B +>>> df2 rkey value 0 foo 5 1 bar 6 2 baz 7 3 foo 8 ->>> A.merge(B, left_on='lkey', right_on='rkey', how='outer') +Merge df1 and df2 on the lkey and rkey columns. The value columns have +the default suffixes, _x and _y, appended. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey') lkey value_x rkey value_y 0 foo 1 foo 5 1 foo 1 foo 8 @@ -254,6 +258,28 @@ 3 foo 5 foo 8 4 bar 2 bar 6 5 baz 3 baz 7 + +Merge DataFrames df1 and df2 with specified left and right suffixes +appended to any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', +... suffixes=('_left', '_right')) + lkey value_left rkey value_right +0 foo 1 foo 5 +1 foo 1 foo 8 +2 foo 5 foo 5 +3 foo 5 foo 8 +4 bar 2 bar 6 +5 baz 3 baz 7 + +Merge DataFrames df1 and df2, but raise an exception if the DataFrames have +any overlapping columns. + +>>> df1.merge(df2, left_on='lkey', right_on='rkey', suffixes=(False, False)) +Traceback (most recent call last): +... +ValueError: columns overlap but no suffix specified: + Index(['value'], dtype='object') """ # ----------------------------------------------------------------------- From 1b11063b0bc1653372a8febf8624465c5ca62fd1 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Tue, 4 Sep 2018 14:47:45 -0700 Subject: [PATCH 09/86] TST: Add capture_stderr decorator to test_validate_docstrings (#22543) --- pandas/util/testing.py | 2 +- scripts/tests/test_validate_docstrings.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 089e35e8e93b2..aee7dba450a30 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -673,7 +673,7 @@ def capture_stderr(f): AssertionError: assert 'foo\n' == 'bar\n' """ - @wraps(f) + @compat.wraps(f) def wrapper(*args, **kwargs): try: sys.stderr = StringIO() diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 933d02cc8c627..0c0757c6963d7 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -6,6 +6,8 @@ import validate_docstrings validate_one = validate_docstrings.validate_one +from pandas.util.testing import capture_stderr + class GoodDocStrings(object): """ @@ -518,10 +520,12 @@ def _import_path(self, klass=None, func=None): return base_path + @capture_stderr def test_good_class(self): assert validate_one(self._import_path( klass='GoodDocStrings')) == 0 + @capture_stderr @pytest.mark.parametrize("func", [ 'plot', 'sample', 'random_letters', 'sample_values', 'head', 'head1', 'contains', 'mode']) @@ -529,10 +533,12 @@ def test_good_functions(self, func): assert validate_one(self._import_path( klass='GoodDocStrings', func=func)) == 0 + @capture_stderr def test_bad_class(self): assert validate_one(self._import_path( klass='BadGenericDocStrings')) > 0 + @capture_stderr @pytest.mark.parametrize("func", [ 'func', 'astype', 'astype1', 'astype2', 'astype3', 'plot', 'method']) def test_bad_generic_functions(self, func): From 3141dfe5d12894a010565fa34e7e4309acf1c945 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 5 Sep 2018 03:52:27 -0700 Subject: [PATCH 10/86] BLD: Fix openpyxl to 2.5.5 (#22601) 2.5.5 --> 2.5.6 broke compatibility with pandas Timestamp objects. Closes gh-22595. --- ci/appveyor-27.yaml | 2 +- ci/appveyor-36.yaml | 2 +- ci/circle-27-compat.yaml | 2 +- ci/circle-36-locale.yaml | 2 +- ci/circle-36-locale_slow.yaml | 2 +- ci/requirements-optional-conda.txt | 2 +- ci/requirements-optional-pip.txt | 4 ++-- ci/travis-35-osx.yaml | 2 +- ci/travis-36-doc.yaml | 2 +- ci/travis-36-slow.yaml | 2 +- ci/travis-36.yaml | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ci/appveyor-27.yaml b/ci/appveyor-27.yaml index 6843c82236a35..bcd9ddee1715e 100644 --- a/ci/appveyor-27.yaml +++ b/ci/appveyor-27.yaml @@ -13,7 +13,7 @@ dependencies: - matplotlib - numexpr - numpy=1.12* - - openpyxl + - openpyxl=2.5.5 - pytables - python=2.7.* - pytz diff --git a/ci/appveyor-36.yaml b/ci/appveyor-36.yaml index 47b14221bb34b..6230e9b6a1885 100644 --- a/ci/appveyor-36.yaml +++ b/ci/appveyor-36.yaml @@ -10,7 +10,7 @@ dependencies: - matplotlib - numexpr - numpy=1.14* - - openpyxl + - openpyxl=2.5.5 - pyarrow - pytables - python-dateutil diff --git a/ci/circle-27-compat.yaml b/ci/circle-27-compat.yaml index 5dee6b0c8ed07..84ec7e20fc8f1 100644 --- a/ci/circle-27-compat.yaml +++ b/ci/circle-27-compat.yaml @@ -8,7 +8,7 @@ dependencies: - jinja2=2.8 - numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr - numpy=1.9.3 - - openpyxl + - openpyxl=2.5.5 - psycopg2 - pytables=3.2.2 - python-dateutil=2.5.0 diff --git a/ci/circle-36-locale.yaml b/ci/circle-36-locale.yaml index 59c8818eaef1e..ef97b85406709 100644 --- a/ci/circle-36-locale.yaml +++ b/ci/circle-36-locale.yaml @@ -13,7 +13,7 @@ dependencies: - nomkl - numexpr - numpy - - openpyxl + - openpyxl=2.5.5 - psycopg2 - pymysql - pytables diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml index 7e40bd1a9979e..14b23dd6f3e4c 100644 --- a/ci/circle-36-locale_slow.yaml +++ b/ci/circle-36-locale_slow.yaml @@ -14,7 +14,7 @@ dependencies: - nomkl - numexpr - numpy - - openpyxl + - openpyxl=2.5.5 - psycopg2 - pymysql - pytables diff --git a/ci/requirements-optional-conda.txt b/ci/requirements-optional-conda.txt index 18aac30f04aea..376fdb1e14e3a 100644 --- a/ci/requirements-optional-conda.txt +++ b/ci/requirements-optional-conda.txt @@ -12,7 +12,7 @@ lxml matplotlib nbsphinx numexpr -openpyxl +openpyxl=2.5.5 pyarrow pymysql pytables diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt index 28dafc43b09c0..2e1bf0ca22bcf 100644 --- a/ci/requirements-optional-pip.txt +++ b/ci/requirements-optional-pip.txt @@ -14,7 +14,7 @@ lxml matplotlib nbsphinx numexpr -openpyxl +openpyxl=2.5.5 pyarrow pymysql tables @@ -28,4 +28,4 @@ statsmodels xarray xlrd xlsxwriter -xlwt \ No newline at end of file +xlwt diff --git a/ci/travis-35-osx.yaml b/ci/travis-35-osx.yaml index 797682bec7208..a36f748ded812 100644 --- a/ci/travis-35-osx.yaml +++ b/ci/travis-35-osx.yaml @@ -12,7 +12,7 @@ dependencies: - nomkl - numexpr - numpy=1.10.4 - - openpyxl + - openpyxl=2.5.5 - pytables - python=3.5* - pytz diff --git a/ci/travis-36-doc.yaml b/ci/travis-36-doc.yaml index 9cbc46d0a70d7..50626088d5bc4 100644 --- a/ci/travis-36-doc.yaml +++ b/ci/travis-36-doc.yaml @@ -22,7 +22,7 @@ dependencies: - notebook - numexpr - numpy=1.13* - - openpyxl + - openpyxl=2.5.5 - pandoc - pyqt - pytables diff --git a/ci/travis-36-slow.yaml b/ci/travis-36-slow.yaml index 3157ecac3a902..1a7bc53e1b74b 100644 --- a/ci/travis-36-slow.yaml +++ b/ci/travis-36-slow.yaml @@ -10,7 +10,7 @@ dependencies: - matplotlib - numexpr - numpy - - openpyxl + - openpyxl=2.5.5 - patsy - psycopg2 - pymysql diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml index 990ad0fe87dd6..3c9daa5f8b73c 100644 --- a/ci/travis-36.yaml +++ b/ci/travis-36.yaml @@ -18,7 +18,7 @@ dependencies: - nomkl - numexpr - numpy - - openpyxl + - openpyxl=2.5.5 - psycopg2 - pyarrow - pymysql From 66d376df98e883a29c6b16a45f8007c4b3db4269 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 5 Sep 2018 04:29:53 -0700 Subject: [PATCH 11/86] Use dispatch_to_series where possible (#22572) --- pandas/core/frame.py | 15 ++++++--------- pandas/core/ops.py | 1 + 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 08e0c05bdcd9a..96ad525355dce 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4818,13 +4818,14 @@ def _arith_op(left, right): return ops.dispatch_to_series(this, other, _arith_op) else: result = _arith_op(this.values, other.values) - - return self._constructor(result, index=new_index, columns=new_columns, - copy=False) + return self._constructor(result, + index=new_index, columns=new_columns, + copy=False) def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join='outer', axis=0, level=level, copy=False) + assert left.index.equals(right.index) new_data = func(left.values.T, right.values).T return self._constructor(new_data, index=left.index, columns=self.columns, @@ -4833,6 +4834,7 @@ def _combine_match_index(self, other, func, level=None): def _combine_match_columns(self, other, func, level=None, try_cast=True): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) + assert left.columns.equals(right.index) new_data = left._data.eval(func=func, other=right, axes=[left.columns, self.index], @@ -4841,12 +4843,7 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): def _combine_const(self, other, func, errors='raise', try_cast=True): if lib.is_scalar(other) or np.ndim(other) == 0: - new_data = {i: func(self.iloc[:, i], other) - for i, col in enumerate(self.columns)} - - result = self._constructor(new_data, index=self.index, copy=False) - result.columns = self.columns - return result + return ops.dispatch_to_series(self, other, func) new_data = self._data.eval(func=func, other=other, errors=errors, diff --git a/pandas/core/ops.py b/pandas/core/ops.py index b25809bf074f7..a86e57fd8876d 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1638,6 +1638,7 @@ def dispatch_to_series(left, right, func): """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. + right = lib.item_from_zerodim(right) if lib.is_scalar(right): new_data = {i: func(left.iloc[:, i], right) for i in range(len(left.columns))} From 2168e4a2a35dbfdacb90381d9936a2d6ad0e8402 Mon Sep 17 00:00:00 2001 From: Alex Rychyk Date: Wed, 5 Sep 2018 14:31:04 +0300 Subject: [PATCH 12/86] BUG: resample with TimedeltaIndex, fenceposts are off (#22488) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/resample.py | 21 +++++++++------------ pandas/tests/test_resample.py | 19 ++++++++++++------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3a360b09ae789..1979bde796452 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -709,6 +709,7 @@ Groupby/Resample/Rolling datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) - Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`). - Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to `loffset` kwarg (:issue:`7687`). +- Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). Sparse ^^^^^^ diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2ada4d758d463..1ef8a0854887b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -963,7 +963,10 @@ def _downsample(self, how, **kwargs): return self._wrap_result(result) def _adjust_binner_for_upsample(self, binner): - """ adjust our binner when upsampling """ + """ + Adjust our binner when upsampling. + The range of a new index should not be outside specified range + """ if self.closed == 'right': binner = binner[1:] else: @@ -1156,17 +1159,11 @@ def _get_binner_for_time(self): return self.groupby._get_time_delta_bins(self.ax) def _adjust_binner_for_upsample(self, binner): - """ adjust our binner when upsampling """ - ax = self.ax - - if is_subperiod(ax.freq, self.freq): - # We are actually downsampling - # but are in the asfreq path - # GH 12926 - if self.closed == 'right': - binner = binner[1:] - else: - binner = binner[:-1] + """ + Adjust our binner when upsampling. + The range of a new index is allowed to be greater than original range + so we don't need to change the length of a binner, GH 13022 + """ return binner diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index b60fd10d745c1..530a683c02f9d 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -26,7 +26,6 @@ from pandas.compat import range, lrange, zip, OrderedDict from pandas.errors import UnsupportedFunctionCall import pandas.tseries.offsets as offsets -from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Minute, BDay from pandas.core.groupby.groupby import DataError @@ -626,12 +625,7 @@ def test_asfreq(self, series_and_frame, freq): obj = series_and_frame result = obj.resample(freq).asfreq() - if freq == '2D': - new_index = obj.index.take(np.arange(0, len(obj.index), 2)) - new_index.freq = to_offset('2D') - else: - new_index = self.create_index(obj.index[0], obj.index[-1], - freq=freq) + new_index = self.create_index(obj.index[0], obj.index[-1], freq=freq) expected = obj.reindex(new_index) assert_almost_equal(result, expected) @@ -2932,6 +2926,17 @@ def test_resample_with_nat(self): freq='1S')) assert_frame_equal(result, expected) + def test_resample_as_freq_with_subperiod(self): + # GH 13022 + index = timedelta_range('00:00:00', '00:10:00', freq='5T') + df = DataFrame(data={'value': [1, 5, 10]}, index=index) + result = df.resample('2T').asfreq() + expected_data = {'value': [1, np.nan, np.nan, np.nan, np.nan, 10]} + expected = DataFrame(data=expected_data, + index=timedelta_range('00:00:00', + '00:10:00', freq='2T')) + tm.assert_frame_equal(result, expected) + class TestResamplerGrouper(object): From 6693d9aa9f9b7a2a96ddb1cbe759a0f863b09aec Mon Sep 17 00:00:00 2001 From: "C.A.M. Gerlach" Date: Wed, 5 Sep 2018 07:09:13 -0500 Subject: [PATCH 13/86] DOC: Update link and description of the Spyder IDE in Ecosystem docs (#22136) --- doc/source/ecosystem.rst | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 82ca3821fc2ed..1014982fea21a 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -14,7 +14,7 @@ development to remain focused around it's original requirements. This is an inexhaustive list of projects that build on pandas in order to provide tools in the PyData space. For a list of projects that depend on pandas, -see the +see the `libraries.io usage page for pandas `_ or `search pypi for pandas `_. @@ -44,7 +44,7 @@ ML pipeline. `Featuretools `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community. +Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community. .. _ecosystem.visualization: @@ -149,13 +149,30 @@ for pandas ``display.`` settings. qgrid is "an interactive grid for sorting and filtering DataFrames in IPython Notebook" built with SlickGrid. -`Spyder `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +`Spyder `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Spyder is a cross-platform PyQt-based IDE combining the editing, analysis, +debugging and profiling functionality of a software development tool with the +data exploration, interactive execution, deep inspection and rich visualization +capabilities of a scientific environment like MATLAB or Rstudio. + +Its `Variable Explorer `__ +allows users to view, manipulate and edit pandas ``Index``, ``Series``, +and ``DataFrame`` objects like a "spreadsheet", including copying and modifying +values, sorting, displaying a "heatmap", converting data types and more. +Pandas objects can also be renamed, duplicated, new columns added, +copyed/pasted to/from the clipboard (as TSV), and saved/loaded to/from a file. +Spyder can also import data from a variety of plain text and binary files +or the clipboard into a new pandas DataFrame via a sophisticated import wizard. + +Most pandas classes, methods and data attributes can be autocompleted in +Spyder's `Editor `__ and +`IPython Console `__, +and Spyder's `Help pane`__ can retrieve +and render Numpydoc documentation on pandas objects in rich text with Sphinx +both automatically and on-demand. -Spyder is a cross-platform Qt-based open-source Python IDE with -editing, testing, debugging, and introspection features. -Spyder can now introspect and display Pandas DataFrames and show -both "column wise min/max and global min/max coloring." .. _ecosystem.api: @@ -205,12 +222,12 @@ This package requires valid credentials for this API (non free). pandaSDMX is a library to retrieve and acquire statistical data and metadata disseminated in `SDMX `_ 2.1, an ISO-standard -widely used by institutions such as statistics offices, central banks, -and international organisations. pandaSDMX can expose datasets and related +widely used by institutions such as statistics offices, central banks, +and international organisations. pandaSDMX can expose datasets and related structural metadata including data flows, code-lists, and data structure definitions as pandas Series or MultiIndexed DataFrames. - + `fredapi `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) `__ From 4ed37601cd728422f8e3ef2867002c29050420f8 Mon Sep 17 00:00:00 2001 From: Sean Chan <10970385+seantchan@users.noreply.github.com> Date: Wed, 5 Sep 2018 19:34:51 -0400 Subject: [PATCH 14/86] DOC: Improve the docstring of DataFrame.equals() (#22539) --- pandas/core/generic.py | 81 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 85bd6065314f4..dd5552151f61b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1303,8 +1303,85 @@ def __invert__(self): def equals(self, other): """ - Determines if two NDFrame objects contain the same elements. NaNs in - the same location are considered equal. + Test whether two objects contain the same elements. + + This function allows two Series or DataFrames to be compared against + each other to see if they have the same shape and elements. NaNs in + the same location are considered equal. The column headers do not + need to have the same type, but the elements within the columns must + be the same dtype. + + Parameters + ---------- + other : Series or DataFrame + The other Series or DataFrame to be compared with the first. + + Returns + ------- + bool + True if all elements are the same in both objects, False + otherwise. + + See Also + -------- + Series.eq : Compare two Series objects of the same length + and return a Series where each element is True if the element + in each Series is equal, False otherwise. + DataFrame.eq : Compare two DataFrame objects of the same shape and + return a DataFrame where each element is True if the respective + element in each DataFrame is equal, False otherwise. + assert_series_equal : Return True if left and right Series are equal, + False otherwise. + assert_frame_equal : Return True if left and right DataFrames are + equal, False otherwise. + numpy.array_equal : Return True if two arrays have the same shape + and elements, False otherwise. + + Notes + ----- + This function requires that the elements have the same dtype as their + respective elements in the other Series or DataFrame. However, the + column labels do not need to have the same type, as long as they are + still considered equal. + + Examples + -------- + >>> df = pd.DataFrame({1: [10], 2: [20]}) + >>> df + 1 2 + 0 10 20 + + DataFrames df and exactly_equal have the same types and values for + their elements and column labels, which will return True. + + >>> exactly_equal = pd.DataFrame({1: [10], 2: [20]}) + >>> exactly_equal + 1 2 + 0 10 20 + >>> df.equals(exactly_equal) + True + + DataFrames df and different_column_type have the same element + types and values, but have different types for the column labels, + which will still return True. + + >>> different_column_type = pd.DataFrame({1.0: [10], 2.0: [20]}) + >>> different_column_type + 1.0 2.0 + 0 10 20 + >>> df.equals(different_column_type) + True + + DataFrames df and different_data_type have different types for the + same values for their elements, and will return False even though + their column labels are the same values and types. + + >>> different_data_type = pd.DataFrame({1: [10.0], 2: [20.0]}) + >>> different_data_type + 1 2 + 0 10.0 20.0 + >>> df.equals(different_data_type) + False """ if not isinstance(other, self._constructor): return False From 25030e2f8eeaffc0f5f137dcd68a5fbda1b47275 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Thu, 6 Sep 2018 01:43:35 +0200 Subject: [PATCH 15/86] TST: fixturize series/test_alter_axes.py (#22526) --- pandas/tests/series/conftest.py | 43 ++++++++++++ pandas/tests/series/test_alter_axes.py | 96 ++++++++++++-------------- 2 files changed, 88 insertions(+), 51 deletions(-) create mode 100644 pandas/tests/series/conftest.py diff --git a/pandas/tests/series/conftest.py b/pandas/tests/series/conftest.py new file mode 100644 index 0000000000000..80a4e81c443ed --- /dev/null +++ b/pandas/tests/series/conftest.py @@ -0,0 +1,43 @@ +import pytest + +import pandas.util.testing as tm + +from pandas import Series + + +@pytest.fixture +def datetime_series(): + """ + Fixture for Series of floats with DatetimeIndex + """ + s = tm.makeTimeSeries() + s.name = 'ts' + return s + + +@pytest.fixture +def string_series(): + """ + Fixture for Series of floats with Index of unique strings + """ + s = tm.makeStringSeries() + s.name = 'series' + return s + + +@pytest.fixture +def object_series(): + """ + Fixture for Series of dtype datetime64[ns] with Index of unique strings + """ + s = tm.makeObjectSeries() + s.name = 'objects' + return s + + +@pytest.fixture +def empty_series(): + """ + Fixture for empty Series + """ + return Series([], index=[]) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index ed3191cf849c0..c3e4cb8bc3abc 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -6,44 +6,39 @@ from datetime import datetime import numpy as np -import pandas as pd -from pandas import Index, Series -from pandas.core.index import MultiIndex, RangeIndex +from pandas import Series, DataFrame, Index, MultiIndex, RangeIndex from pandas.compat import lrange, range, zip -from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm -from .common import TestData +class TestSeriesAlterAxes(object): -class TestSeriesAlterAxes(TestData): - - def test_setindex(self): + def test_setindex(self, string_series): # wrong type - series = self.series.copy() - pytest.raises(TypeError, setattr, series, 'index', None) + pytest.raises(TypeError, setattr, string_series, 'index', None) # wrong length - series = self.series.copy() - pytest.raises(Exception, setattr, series, 'index', - np.arange(len(series) - 1)) + pytest.raises(Exception, setattr, string_series, 'index', + np.arange(len(string_series) - 1)) # works - series = self.series.copy() - series.index = np.arange(len(series)) - assert isinstance(series.index, Index) + string_series.index = np.arange(len(string_series)) + assert isinstance(string_series.index, Index) + + # Renaming - def test_rename(self): + def test_rename(self, datetime_series): + ts = datetime_series renamer = lambda x: x.strftime('%Y%m%d') - renamed = self.ts.rename(renamer) - assert renamed.index[0] == renamer(self.ts.index[0]) + renamed = ts.rename(renamer) + assert renamed.index[0] == renamer(ts.index[0]) # dict - rename_dict = dict(zip(self.ts.index, renamed.index)) - renamed2 = self.ts.rename(rename_dict) - assert_series_equal(renamed, renamed2) + rename_dict = dict(zip(ts.index, renamed.index)) + renamed2 = ts.rename(rename_dict) + tm.assert_series_equal(renamed, renamed2) # partial dict s = Series(np.arange(4), index=['a', 'b', 'c', 'd'], dtype='int64') @@ -105,12 +100,12 @@ def test_set_name(self): assert s.name is None assert s is not s2 - def test_rename_inplace(self): + def test_rename_inplace(self, datetime_series): renamer = lambda x: x.strftime('%Y%m%d') - expected = renamer(self.ts.index[0]) + expected = renamer(datetime_series.index[0]) - self.ts.rename(renamer, inplace=True) - assert self.ts.index[0] == expected + datetime_series.rename(renamer, inplace=True) + assert datetime_series.index[0] == expected def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) @@ -135,7 +130,7 @@ def test_reset_index(self): s = ser.reset_index(drop=True) s2 = ser s2.reset_index(drop=True, inplace=True) - assert_series_equal(s, s2) + tm.assert_series_equal(s, s2) # level index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], @@ -150,8 +145,8 @@ def test_reset_index(self): assert isinstance(rs, Series) def test_reset_index_level(self): - df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], - columns=['A', 'B', 'C']) + df = DataFrame([[1, 2, 3], [4, 5, 6]], + columns=['A', 'B', 'C']) for levels in ['A', 'B'], [0, 1]: # With MultiIndex @@ -189,19 +184,19 @@ def test_reset_index_level(self): s.reset_index(level=[0, 1, 2]) # Check that .reset_index([],drop=True) doesn't fail - result = pd.Series(range(4)).reset_index([], drop=True) - expected = pd.Series(range(4)) - assert_series_equal(result, expected) + result = Series(range(4)).reset_index([], drop=True) + expected = Series(range(4)) + tm.assert_series_equal(result, expected) def test_reset_index_range(self): # GH 12071 - s = pd.Series(range(2), name='A', dtype='int64') + s = Series(range(2), name='A', dtype='int64') series_result = s.reset_index() assert isinstance(series_result.index, RangeIndex) - series_expected = pd.DataFrame([[0, 0], [1, 1]], - columns=['index', 'A'], - index=RangeIndex(stop=2)) - assert_frame_equal(series_result, series_expected) + series_expected = DataFrame([[0, 0], [1, 1]], + columns=['index', 'A'], + index=RangeIndex(stop=2)) + tm.assert_frame_equal(series_result, series_expected) def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], @@ -212,11 +207,11 @@ def test_reorder_levels(self): # no change, position result = s.reorder_levels([0, 1, 2]) - assert_series_equal(s, result) + tm.assert_series_equal(s, result) # no change, labels result = s.reorder_levels(['L0', 'L1', 'L2']) - assert_series_equal(s, result) + tm.assert_series_equal(s, result) # rotate, position result = s.reorder_levels([1, 2, 0]) @@ -225,17 +220,16 @@ def test_reorder_levels(self): [0, 0, 0, 0, 0, 0]], names=['L1', 'L2', 'L0']) expected = Series(np.arange(6), index=e_idx) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) - def test_rename_axis_inplace(self): + def test_rename_axis_inplace(self, datetime_series): # GH 15704 - series = self.ts.copy() - expected = series.rename_axis('foo') - result = series.copy() + expected = datetime_series.rename_axis('foo') + result = datetime_series no_return = result.rename_axis('foo', inplace=True) assert no_return is None - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_set_axis_inplace_axes(self, axis_series): # GH14636 @@ -291,25 +285,25 @@ def test_reset_index_drop_errors(self): # GH 20925 # KeyError raised for series index when passed level name is missing - s = pd.Series(range(4)) + s = Series(range(4)) with tm.assert_raises_regex(KeyError, 'must be same as name'): s.reset_index('wrong', drop=True) with tm.assert_raises_regex(KeyError, 'must be same as name'): s.reset_index('wrong') # KeyError raised for series when level to be dropped is missing - s = pd.Series(range(4), index=pd.MultiIndex.from_product([[1, 2]] * 2)) + s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2)) with tm.assert_raises_regex(KeyError, 'not found'): s.reset_index('wrong', drop=True) def test_droplevel(self): # GH20342 - ser = pd.Series([1, 2, 3, 4]) - ser.index = pd.MultiIndex.from_arrays([(1, 2, 3, 4), (5, 6, 7, 8)], - names=['a', 'b']) + ser = Series([1, 2, 3, 4]) + ser.index = MultiIndex.from_arrays([(1, 2, 3, 4), (5, 6, 7, 8)], + names=['a', 'b']) expected = ser.reset_index('b', drop=True) result = ser.droplevel('b', axis='index') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # test that droplevel raises ValueError on axis != 0 with pytest.raises(ValueError): ser.droplevel(1, axis='columns') From bdca5e9eb7b684480215a0d1633eb583044037c4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 6 Sep 2018 12:11:29 +0200 Subject: [PATCH 16/86] TST: restructure internal extension arrays tests (split between /arrays and /extension) (#22026) --- .../integer => arrays}/test_integer.py | 308 ++++-------------- pandas/tests/arrays/test_interval.py | 72 ++++ pandas/tests/extension/base/methods.py | 9 +- pandas/tests/extension/base/ops.py | 9 +- pandas/tests/extension/category/__init__.py | 0 pandas/tests/extension/integer/__init__.py | 0 pandas/tests/extension/interval/__init__.py | 0 .../{category => }/test_categorical.py | 23 +- pandas/tests/extension/test_integer.py | 229 +++++++++++++ .../extension/{interval => }/test_interval.py | 76 +---- 10 files changed, 413 insertions(+), 313 deletions(-) rename pandas/tests/{extension/integer => arrays}/test_integer.py (70%) create mode 100644 pandas/tests/arrays/test_interval.py delete mode 100644 pandas/tests/extension/category/__init__.py delete mode 100644 pandas/tests/extension/integer/__init__.py delete mode 100644 pandas/tests/extension/interval/__init__.py rename pandas/tests/extension/{category => }/test_categorical.py (85%) create mode 100644 pandas/tests/extension/test_integer.py rename pandas/tests/extension/{interval => }/test_interval.py (54%) diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/arrays/test_integer.py similarity index 70% rename from pandas/tests/extension/integer/test_integer.py rename to pandas/tests/arrays/test_integer.py index 3af127091d2d8..349a6aee5701e 100644 --- a/pandas/tests/extension/integer/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -1,11 +1,10 @@ +# -*- coding: utf-8 -*- import numpy as np import pandas as pd import pandas.util.testing as tm import pytest -from pandas.tests.extension import base -from pandas.api.types import ( - is_integer, is_scalar, is_float, is_float_dtype) +from pandas.api.types import is_integer, is_float, is_float_dtype, is_scalar from pandas.core.dtypes.generic import ABCIndexClass from pandas.core.arrays import ( @@ -14,6 +13,8 @@ Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) +from pandas.tests.extension.base import BaseOpsUtil + def make_data(): return (list(range(8)) + @@ -39,42 +40,13 @@ def data_missing(dtype): return integer_array([np.nan, 1], dtype=dtype) -@pytest.fixture -def data_repeated(data): - def gen(count): - for _ in range(count): - yield data - yield gen - - -@pytest.fixture -def data_for_sorting(dtype): - return integer_array([1, 2, 0], dtype=dtype) - - -@pytest.fixture -def data_missing_for_sorting(dtype): - return integer_array([1, np.nan, 0], dtype=dtype) - - -@pytest.fixture -def na_cmp(): - # we are np.nan - return lambda x, y: np.isnan(x) and np.isnan(y) - - -@pytest.fixture -def na_value(): - return np.nan - - -@pytest.fixture -def data_for_grouping(dtype): - b = 1 - a = 0 - c = 2 - na = np.nan - return integer_array([b, b, na, na, a, a, b, c], dtype=dtype) +@pytest.fixture(params=['data', 'data_missing']) +def all_data(request, data, data_missing): + """Parametrized fixture giving 'data' and 'data_missing'""" + if request.param == 'data': + return data + elif request.param == 'data_missing': + return data_missing def test_dtypes(dtype): @@ -87,61 +59,50 @@ def test_dtypes(dtype): assert dtype.name is not None -class BaseInteger(object): - - def assert_index_equal(self, left, right, *args, **kwargs): - - left_na = left.isna() - right_na = right.isna() +class TestInterface(object): - tm.assert_numpy_array_equal(left_na, right_na) - return tm.assert_index_equal(left[~left_na], - right[~right_na], - *args, **kwargs) - - def assert_series_equal(self, left, right, *args, **kwargs): + def test_repr_array(self, data): + result = repr(data) - left_na = left.isna() - right_na = right.isna() + # not long + assert '...' not in result - tm.assert_series_equal(left_na, right_na) - return tm.assert_series_equal(left[~left_na], - right[~right_na], - *args, **kwargs) + assert 'dtype=' in result + assert 'IntegerArray' in result - def assert_frame_equal(self, left, right, *args, **kwargs): - # TODO(EA): select_dtypes - tm.assert_index_equal( - left.columns, right.columns, - exact=kwargs.get('check_column_type', 'equiv'), - check_names=kwargs.get('check_names', True), - check_exact=kwargs.get('check_exact', False), - check_categorical=kwargs.get('check_categorical', True), - obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame'))) + def test_repr_array_long(self, data): + # some arrays may be able to assert a ... in the repr + with pd.option_context('display.max_seq_items', 1): + result = repr(data) - integers = (left.dtypes == 'integer').index + assert '...' in result + assert 'length' in result - for col in integers: - self.assert_series_equal(left[col], right[col], - *args, **kwargs) - left = left.drop(columns=integers) - right = right.drop(columns=integers) - tm.assert_frame_equal(left, right, *args, **kwargs) +class TestConstructors(object): + def test_from_dtype_from_float(self, data): + # construct from our dtype & string dtype + dtype = data.dtype -class TestDtype(BaseInteger, base.BaseDtypeTests): + # from float + expected = pd.Series(data) + result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) + tm.assert_series_equal(result, expected) - @pytest.mark.skip(reason="using multiple dtypes") - def test_is_dtype_unboxes_dtype(self): - # we have multiple dtypes, so skip - pass + # from int / list + expected = pd.Series(data) + result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) + tm.assert_series_equal(result, expected) - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is IntegerArray + # from int / array + expected = pd.Series(data).dropna().reset_index(drop=True) + dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) + result = pd.Series(dropped, dtype=str(dtype)) + tm.assert_series_equal(result, expected) -class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests): +class TestArithmeticOps(BaseOpsUtil): def _check_divmod_op(self, s, op, other, exc=None): super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) @@ -178,7 +139,7 @@ def _check_op_float(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in float dtypes expected[mask] = np.nan - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def _check_op_integer(self, result, expected, mask, s, op_name, other): # check comparisions that are resulting in integer dtypes @@ -231,10 +192,10 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other): original = original.astype('float') original[mask] = np.nan - self.assert_series_equal(original, expected.astype('float')) + tm.assert_series_equal(original, expected.astype('float')) # assert our expected result - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_arith_integer_array(self, data, all_arithmetic_operators): # we operate with a rhs of an integer array @@ -319,7 +280,7 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) -class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests): +class TestComparisonOps(BaseOpsUtil): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) @@ -345,144 +306,21 @@ def _compare_other(self, s, data, op_name, other): tm.assert_series_equal(result, expected) + def test_compare_scalar(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + self._compare_other(s, data, op_name, 0) -class TestInterface(BaseInteger, base.BaseInterfaceTests): - - def test_repr_array(self, data): - result = repr(data) - - # not long - assert '...' not in result - - assert 'dtype=' in result - assert 'IntegerArray' in result - - def test_repr_array_long(self, data): - # some arrays may be able to assert a ... in the repr - with pd.option_context('display.max_seq_items', 1): - result = repr(data) - - assert '...' in result - assert 'length' in result - - -class TestConstructors(BaseInteger, base.BaseConstructorsTests): - - def test_from_dtype_from_float(self, data): - # construct from our dtype & string dtype - dtype = data.dtype - - # from float - expected = pd.Series(data) - result = pd.Series(np.array(data).astype('float'), dtype=str(dtype)) - self.assert_series_equal(result, expected) - - # from int / list - expected = pd.Series(data) - result = pd.Series(np.array(data).tolist(), dtype=str(dtype)) - self.assert_series_equal(result, expected) - - # from int / array - expected = pd.Series(data).dropna().reset_index(drop=True) - dropped = np.array(data.dropna()).astype(np.dtype((dtype.type))) - result = pd.Series(dropped, dtype=str(dtype)) - self.assert_series_equal(result, expected) - - -class TestReshaping(BaseInteger, base.BaseReshapingTests): - - def test_concat_mixed_dtypes(self, data): - # https://github.com/pandas-dev/pandas/issues/20762 - df1 = pd.DataFrame({'A': data[:3]}) - df2 = pd.DataFrame({"A": [1, 2, 3]}) - df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category') - df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])}) - dfs = [df1, df2, df3, df4] - - # dataframes - result = pd.concat(dfs) - expected = pd.concat([x.astype(object) for x in dfs]) - self.assert_frame_equal(result, expected) - - # series - result = pd.concat([x['A'] for x in dfs]) - expected = pd.concat([x['A'].astype(object) for x in dfs]) - self.assert_series_equal(result, expected) - - result = pd.concat([df1, df2]) - expected = pd.concat([df1.astype('object'), df2.astype('object')]) - self.assert_frame_equal(result, expected) - - # concat of an Integer and Int coerces to object dtype - # TODO(jreback) once integrated this would - # be a result of Integer - result = pd.concat([df1['A'], df2['A']]) - expected = pd.concat([df1['A'].astype('object'), - df2['A'].astype('object')]) - self.assert_series_equal(result, expected) - - -class TestGetitem(BaseInteger, base.BaseGetitemTests): - pass + def test_compare_array(self, data, all_compare_operators): + op_name = all_compare_operators + s = pd.Series(data) + other = pd.Series([0] * len(data)) + self._compare_other(s, data, op_name, other) -class TestMissing(BaseInteger, base.BaseMissingTests): +class TestCasting(object): pass - -class TestMethods(BaseInteger, base.BaseMethodsTests): - - @pytest.mark.parametrize('dropna', [True, False]) - def test_value_counts(self, all_data, dropna): - all_data = all_data[:10] - if dropna: - other = np.array(all_data[~all_data.isna()]) - else: - other = all_data - - result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() - expected = pd.Series(other).value_counts( - dropna=dropna).sort_index() - expected.index = expected.index.astype(all_data.dtype) - - self.assert_series_equal(result, expected) - - def test_combine_add(self, data_repeated): - # GH 20825 - orig_data1, orig_data2 = data_repeated(2) - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) - - # fundamentally this is not a great operation - # as overflow / underflow can easily happen here - # e.g. int8 + int8 - def scalar_add(a, b): - - # TODO; should really be a type specific NA - if pd.isna(a) or pd.isna(b): - return np.nan - if is_integer(a): - a = int(a) - elif is_integer(b): - b = int(b) - return a + b - - result = s1.combine(s2, scalar_add) - expected = pd.Series( - orig_data1._from_sequence([scalar_add(a, b) for (a, b) in - zip(orig_data1, - orig_data2)])) - self.assert_series_equal(result, expected) - - val = s1.iloc[0] - result = s1.combine(val, lambda x1, x2: x1 + x2) - expected = pd.Series( - orig_data1._from_sequence([a + val for a in list(orig_data1)])) - self.assert_series_equal(result, expected) - - -class TestCasting(BaseInteger, base.BaseCastingTests): - @pytest.mark.parametrize('dropna', [True, False]) def test_construct_index(self, all_data, dropna): # ensure that we do not coerce to Float64Index, rather @@ -497,7 +335,7 @@ def test_construct_index(self, all_data, dropna): result = pd.Index(integer_array(other, dtype=all_data.dtype)) expected = pd.Index(other, dtype=object) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('dropna', [True, False]) def test_astype_index(self, all_data, dropna): @@ -515,7 +353,7 @@ def test_astype_index(self, all_data, dropna): result = idx.astype(dtype) expected = idx.astype(object).astype(dtype) - self.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected) def test_astype(self, all_data): all_data = all_data[:10] @@ -528,13 +366,13 @@ def test_astype(self, all_data): s = pd.Series(ints) result = s.astype(all_data.dtype) expected = pd.Series(ints) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same other - ints s = pd.Series(ints) result = s.astype(dtype) expected = pd.Series(ints, dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - ints s = pd.Series(ints) @@ -547,13 +385,13 @@ def test_astype(self, all_data): s = pd.Series(mixed) result = s.astype(all_data.dtype) expected = pd.Series(mixed) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same other - mixed s = pd.Series(mixed) result = s.astype(dtype) expected = pd.Series(mixed, dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # coerce to same numpy_dtype - mixed s = pd.Series(mixed) @@ -572,12 +410,12 @@ def test_astype_specific_casting(self, dtype): s = pd.Series([1, 2, 3], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3], dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) s = pd.Series([1, 2, 3, None], dtype='Int64') result = s.astype(dtype) expected = pd.Series([1, 2, 3, None], dtype=dtype) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_construct_cast_invalid(self, dtype): @@ -597,24 +435,6 @@ def test_construct_cast_invalid(self, dtype): pd.Series(arr).astype(dtype) -class TestGroupby(BaseInteger, base.BaseGroupbyTests): - - @pytest.mark.xfail(reason="groupby not working", strict=True) - def test_groupby_extension_no_sort(self, data_for_grouping): - super(TestGroupby, self).test_groupby_extension_no_sort( - data_for_grouping) - - @pytest.mark.parametrize('as_index', [ - pytest.param(True, - marks=pytest.mark.xfail(reason="groupby not working", - strict=True)), - False - ]) - def test_groupby_extension_agg(self, as_index, data_for_grouping): - super(TestGroupby, self).test_groupby_extension_agg( - as_index, data_for_grouping) - - def test_frame_repr(data_missing): df = pd.DataFrame({'A': data_missing}) diff --git a/pandas/tests/arrays/test_interval.py b/pandas/tests/arrays/test_interval.py new file mode 100644 index 0000000000000..bcf4cea795978 --- /dev/null +++ b/pandas/tests/arrays/test_interval.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +import pytest +import numpy as np + +from pandas import Index, IntervalIndex, date_range, timedelta_range +from pandas.core.arrays import IntervalArray +import pandas.util.testing as tm + + +@pytest.fixture(params=[ + (Index([0, 2, 4]), Index([1, 3, 5])), + (Index([0., 1., 2.]), Index([1., 2., 3.])), + (timedelta_range('0 days', periods=3), + timedelta_range('1 day', periods=3)), + (date_range('20170101', periods=3), date_range('20170102', periods=3)), + (date_range('20170101', periods=3, tz='US/Eastern'), + date_range('20170102', periods=3, tz='US/Eastern'))], + ids=lambda x: str(x[0].dtype)) +def left_right_dtypes(request): + """ + Fixture for building an IntervalArray from various dtypes + """ + return request.param + + +class TestMethods(object): + + @pytest.mark.parametrize('repeats', [0, 1, 5]) + def test_repeat(self, left_right_dtypes, repeats): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right).repeat(repeats) + expected = IntervalArray.from_arrays( + left.repeat(repeats), right.repeat(repeats)) + tm.assert_extension_array_equal(result, expected) + + @pytest.mark.parametrize('bad_repeats, msg', [ + (-1, 'negative dimensions are not allowed'), + ('foo', r'invalid literal for (int|long)\(\) with base 10')]) + def test_repeat_errors(self, bad_repeats, msg): + array = IntervalArray.from_breaks(range(4)) + with tm.assert_raises_regex(ValueError, msg): + array.repeat(bad_repeats) + + @pytest.mark.parametrize('new_closed', [ + 'left', 'right', 'both', 'neither']) + def test_set_closed(self, closed, new_closed): + # GH 21670 + array = IntervalArray.from_breaks(range(10), closed=closed) + result = array.set_closed(new_closed) + expected = IntervalArray.from_breaks(range(10), closed=new_closed) + tm.assert_extension_array_equal(result, expected) + + +class TestSetitem(object): + + def test_set_na(self, left_right_dtypes): + left, right = left_right_dtypes + result = IntervalArray.from_arrays(left, right) + result[0] = np.nan + + expected_left = Index([left._na_value] + list(left[1:])) + expected_right = Index([right._na_value] + list(right[1:])) + expected = IntervalArray.from_arrays(expected_left, expected_right) + + tm.assert_extension_array_equal(result, expected) + + +def test_repr_matches(): + idx = IntervalIndex.from_breaks([1, 2, 3]) + a = repr(idx) + b = repr(idx.values) + assert a.replace("Index", "Array") == b diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c8656808739c4..4e7886dd2e943 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -127,10 +127,11 @@ def test_combine_add(self, data_repeated): s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 + x2) - expected = pd.Series( - orig_data1._from_sequence([a + b for (a, b) in - zip(list(orig_data1), - list(orig_data2))])) + with np.errstate(over='ignore'): + expected = pd.Series( + orig_data1._from_sequence([a + b for (a, b) in + zip(list(orig_data1), + list(orig_data2))])) self.assert_series_equal(result, expected) val = s1.iloc[0] diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index f7bfdb8ec218a..05351c56862b8 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -23,9 +23,9 @@ def get_op_from_name(self, op_name): def check_opname(self, s, op_name, other, exc=NotImplementedError): op = self.get_op_from_name(op_name) - self._check_op(s, op, other, exc) + self._check_op(s, op, other, op_name, exc) - def _check_op(self, s, op, other, exc=NotImplementedError): + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): if exc is None: result = op(s, other) expected = s.combine(other, op) @@ -69,7 +69,8 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators s = pd.Series(data) - self.check_opname(s, op_name, [s.iloc[0]] * len(s), exc=TypeError) + self.check_opname(s, op_name, pd.Series([s.iloc[0]] * len(s)), + exc=TypeError) def test_divmod(self, data): s = pd.Series(data) @@ -113,5 +114,5 @@ def test_compare_scalar(self, data, all_compare_operators): def test_compare_array(self, data, all_compare_operators): op_name = all_compare_operators s = pd.Series(data) - other = [0] * len(data) + other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) diff --git a/pandas/tests/extension/category/__init__.py b/pandas/tests/extension/category/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/interval/__init__.py b/pandas/tests/extension/interval/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/test_categorical.py similarity index 85% rename from pandas/tests/extension/category/test_categorical.py rename to pandas/tests/extension/test_categorical.py index 76f6b03907ef8..b8c73a9efdae8 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -1,3 +1,18 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import string import pytest @@ -204,10 +219,14 @@ class TestComparisonOps(base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): op = self.get_op_from_name(op_name) if op_name == '__eq__': - assert not op(data, other).all() + result = op(s, other) + expected = s.combine(other, lambda x, y: x == y) + assert (result == expected).all() elif op_name == '__ne__': - assert op(data, other).all() + result = op(s, other) + expected = s.combine(other, lambda x, y: x != y) + assert (result == expected).all() else: with pytest.raises(TypeError): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py new file mode 100644 index 0000000000000..50c0e6dd8b347 --- /dev/null +++ b/pandas/tests/extension/test_integer.py @@ -0,0 +1,229 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" +import numpy as np +import pandas as pd +import pytest + +from pandas.tests.extension import base +from pandas.core.dtypes.common import is_extension_array_dtype + +from pandas.core.arrays import IntegerArray, integer_array +from pandas.core.arrays.integer import ( + Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) + + +def make_data(): + return (list(range(1, 9)) + [np.nan] + list(range(10, 98)) + + [np.nan] + [99, 100]) + + +@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, + UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype]) +def dtype(request): + return request.param() + + +@pytest.fixture +def data(dtype): + return integer_array(make_data(), dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return integer_array([np.nan, 1], dtype=dtype) + + +@pytest.fixture +def data_repeated(data): + def gen(count): + for _ in range(count): + yield data + yield gen + + +@pytest.fixture +def data_for_sorting(dtype): + return integer_array([1, 2, 0], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return integer_array([1, np.nan, 0], dtype=dtype) + + +@pytest.fixture +def na_cmp(): + # we are np.nan + return lambda x, y: np.isnan(x) and np.isnan(y) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def data_for_grouping(dtype): + b = 1 + a = 0 + c = 2 + na = np.nan + return integer_array([b, b, na, na, a, a, b, c], dtype=dtype) + + +class TestDtype(base.BaseDtypeTests): + + @pytest.mark.skip(reason="using multiple dtypes") + def test_is_dtype_unboxes_dtype(self): + # we have multiple dtypes, so skip + pass + + def test_array_type_with_arg(self, data, dtype): + assert dtype.construct_array_type() is IntegerArray + + +class TestArithmeticOps(base.BaseArithmeticOpsTests): + + def check_opname(self, s, op_name, other, exc=None): + # overwriting to indicate ops don't raise an error + super(TestArithmeticOps, self).check_opname(s, op_name, + other, exc=None) + + def _check_op(self, s, op, other, op_name, exc=NotImplementedError): + if exc is None: + if s.dtype.is_unsigned_integer and (op_name == '__rsub__'): + # TODO see https://github.com/pandas-dev/pandas/issues/22023 + pytest.skip("unsigned subtraction gives negative values") + + if (hasattr(other, 'dtype') + and not is_extension_array_dtype(other.dtype) + and pd.api.types.is_integer_dtype(other.dtype)): + # other is np.int64 and would therefore always result in + # upcasting, so keeping other as same numpy_dtype + other = other.astype(s.dtype.numpy_dtype) + + result = op(s, other) + expected = s.combine(other, op) + + if op_name == '__rdiv__': + # combine is not giving the correct result for this case + pytest.skip("skipping reverse div in python 2") + elif op_name in ('__rtruediv__', '__truediv__', '__div__'): + expected = expected.astype(float) + if op_name == '__rtruediv__': + # TODO reverse operators result in object dtype + result = result.astype(float) + elif op_name.startswith('__r'): + # TODO reverse operators result in object dtype + # see https://github.com/pandas-dev/pandas/issues/22024 + expected = expected.astype(s.dtype) + result = result.astype(s.dtype) + else: + # combine method result in 'biggest' (int64) dtype + expected = expected.astype(s.dtype) + pass + if (op_name == '__rpow__') and isinstance(other, pd.Series): + # TODO pow on Int arrays gives different result with NA + # see https://github.com/pandas-dev/pandas/issues/22022 + result = result.fillna(1) + + self.assert_series_equal(result, expected) + else: + with pytest.raises(exc): + op(s, other) + + def _check_divmod_op(self, s, op, other, exc=None): + super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None) + + @pytest.mark.skip(reason="intNA does not error on ops") + def test_error(self, data, all_arithmetic_operators): + # other specific errors tested in the integer array specific tests + pass + + +class TestComparisonOps(base.BaseComparisonOpsTests): + + def check_opname(self, s, op_name, other, exc=None): + super(TestComparisonOps, self).check_opname(s, op_name, + other, exc=None) + + def _compare_other(self, s, data, op_name, other): + self.check_opname(s, op_name, other) + + +class TestInterface(base.BaseInterfaceTests): + pass + + +class TestConstructors(base.BaseConstructorsTests): + pass + + +class TestReshaping(base.BaseReshapingTests): + pass + + # for test_concat_mixed_dtypes test + # concat of an Integer and Int coerces to object dtype + # TODO(jreback) once integrated this would + + +class TestGetitem(base.BaseGetitemTests): + pass + + +class TestMissing(base.BaseMissingTests): + pass + + +class TestMethods(base.BaseMethodsTests): + + @pytest.mark.parametrize('dropna', [True, False]) + def test_value_counts(self, all_data, dropna): + all_data = all_data[:10] + if dropna: + other = np.array(all_data[~all_data.isna()]) + else: + other = all_data + + result = pd.Series(all_data).value_counts(dropna=dropna).sort_index() + expected = pd.Series(other).value_counts( + dropna=dropna).sort_index() + expected.index = expected.index.astype(all_data.dtype) + + self.assert_series_equal(result, expected) + + +class TestCasting(base.BaseCastingTests): + pass + + +class TestGroupby(base.BaseGroupbyTests): + + @pytest.mark.xfail(reason="groupby not working", strict=True) + def test_groupby_extension_no_sort(self, data_for_grouping): + super(TestGroupby, self).test_groupby_extension_no_sort( + data_for_grouping) + + @pytest.mark.parametrize('as_index', [ + pytest.param(True, + marks=pytest.mark.xfail(reason="groupby not working", + strict=True)), + False + ]) + def test_groupby_extension_agg(self, as_index, data_for_grouping): + super(TestGroupby, self).test_groupby_extension_agg( + as_index, data_for_grouping) diff --git a/pandas/tests/extension/interval/test_interval.py b/pandas/tests/extension/test_interval.py similarity index 54% rename from pandas/tests/extension/interval/test_interval.py rename to pandas/tests/extension/test_interval.py index a10a56ddfdfac..625619a90ed4c 100644 --- a/pandas/tests/extension/interval/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -1,7 +1,22 @@ +""" +This file contains a minimal set of tests for compliance with the extension +array interface test suite, and should contain no other tests. +The test suite for the full functionality of the array is located in +`pandas/tests/arrays/`. + +The tests in this file are inherited from the BaseExtensionTests, and only +minimal tweaks should be applied to get the tests passing (by overwriting a +parent method). + +Additional tests should either be added to one of the BaseExtensionTests +classes (if they are relevant for the extension interface for all dtypes), or +be added to the array-specific tests in `pandas/tests/arrays/`. + +""" import pytest import numpy as np -from pandas import Index, Interval, IntervalIndex, date_range, timedelta_range +from pandas import Interval from pandas.core.arrays import IntervalArray from pandas.core.dtypes.dtypes import IntervalDtype from pandas.tests.extension import base @@ -15,22 +30,6 @@ def make_data(): return [Interval(l, r) for l, r in zip(left, right)] -@pytest.fixture(params=[ - (Index([0, 2, 4]), Index([1, 3, 5])), - (Index([0., 1., 2.]), Index([1., 2., 3.])), - (timedelta_range('0 days', periods=3), - timedelta_range('1 day', periods=3)), - (date_range('20170101', periods=3), date_range('20170102', periods=3)), - (date_range('20170101', periods=3, tz='US/Eastern'), - date_range('20170102', periods=3, tz='US/Eastern'))], - ids=lambda x: str(x[0].dtype)) -def left_right_dtypes(request): - """ - Fixture for building an IntervalArray from various dtypes - """ - return request.param - - @pytest.fixture def dtype(): return IntervalDtype() @@ -111,30 +110,6 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests): class TestMethods(BaseInterval, base.BaseMethodsTests): - @pytest.mark.parametrize('repeats', [0, 1, 5]) - def test_repeat(self, left_right_dtypes, repeats): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right).repeat(repeats) - expected = IntervalArray.from_arrays( - left.repeat(repeats), right.repeat(repeats)) - tm.assert_extension_array_equal(result, expected) - - @pytest.mark.parametrize('bad_repeats, msg', [ - (-1, 'negative dimensions are not allowed'), - ('foo', r'invalid literal for (int|long)\(\) with base 10')]) - def test_repeat_errors(self, bad_repeats, msg): - array = IntervalArray.from_breaks(range(4)) - with tm.assert_raises_regex(ValueError, msg): - array.repeat(bad_repeats) - - @pytest.mark.parametrize('new_closed', [ - 'left', 'right', 'both', 'neither']) - def test_set_closed(self, closed, new_closed): - # GH 21670 - array = IntervalArray.from_breaks(range(10), closed=closed) - result = array.set_closed(new_closed) - expected = IntervalArray.from_breaks(range(10), closed=new_closed) - tm.assert_extension_array_equal(result, expected) @pytest.mark.skip(reason='addition is not defined for intervals') def test_combine_add(self, data_repeated): @@ -173,21 +148,4 @@ class TestReshaping(BaseInterval, base.BaseReshapingTests): class TestSetitem(BaseInterval, base.BaseSetitemTests): - - def test_set_na(self, left_right_dtypes): - left, right = left_right_dtypes - result = IntervalArray.from_arrays(left, right) - result[0] = np.nan - - expected_left = Index([left._na_value] + list(left[1:])) - expected_right = Index([right._na_value] + list(right[1:])) - expected = IntervalArray.from_arrays(expected_left, expected_right) - - self.assert_extension_array_equal(result, expected) - - -def test_repr_matches(): - idx = IntervalIndex.from_breaks([1, 2, 3]) - a = repr(idx) - b = repr(idx.values) - assert a.replace("Index", "Array") == b + pass From 6c7c97597279769864903c33728e1d0a75c3560e Mon Sep 17 00:00:00 2001 From: Rhys Parry Date: Thu, 6 Sep 2018 12:42:42 -0700 Subject: [PATCH 17/86] TST: Fix skipping test due to lack of connectivity (#22598) `.format()` was expecting keyword arguments. Updated to match other skips nearby. --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index aee7dba450a30..01fafd7219382 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2394,7 +2394,7 @@ def wrapper(*args, **kwargs): raise else: skip("Skipping test due to lack of connectivity" - " and error {error}".format(e)) + " and error {error}".format(error=e)) return wrapper From 2d21d9ba68a1ab6040fc7ca713c9da80808b655a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 7 Sep 2018 05:08:13 -0700 Subject: [PATCH 18/86] API: Add CalendarDay ('CD') offset (#22288) --- doc/source/timeseries.rst | 26 +++- doc/source/whatsnew/v0.24.0.txt | 40 +++++++ pandas/core/arrays/datetimes.py | 112 +++++++++--------- pandas/core/indexes/datetimes.py | 19 +-- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/period.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- pandas/tests/indexes/datetimes/test_astype.py | 86 ++------------ .../indexes/datetimes/test_date_range.py | 55 +++++---- .../tests/indexes/datetimes/test_timezones.py | 36 +++--- .../timedeltas/test_timedelta_range.py | 4 + pandas/tests/series/test_timezones.py | 2 +- pandas/tests/test_resample.py | 8 +- pandas/tests/tseries/offsets/test_offsets.py | 80 ++++++++++++- pandas/tests/tseries/offsets/test_ticks.py | 10 +- pandas/tseries/offsets.py | 54 ++++++++- 16 files changed, 338 insertions(+), 200 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index f5d1007dfbbbb..5dfac98d069e7 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -369,7 +369,7 @@ In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular frequency, we can use the :func:`date_range` and :func:`bdate_range` functions to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a -**calendar day** while the default for ``bdate_range`` is a **business day**: +**day** while the default for ``bdate_range`` is a **business day**: .. ipython:: python @@ -886,6 +886,27 @@ normalized after the function is applied. hour.apply(pd.Timestamp('2014-01-01 23:00')) +.. _timeseries.dayvscalendarday: + +Day vs. CalendarDay +~~~~~~~~~~~~~~~~~~~ + +:class:`Day` (``'D'``) is a timedelta-like offset that respects absolute time +arithmetic and is an alias for 24 :class:`Hour`. This offset is the default +argument to many pandas time related function like :func:`date_range` and :func:`timedelta_range`. + +:class:`CalendarDay` (``'CD'``) is a relativedelta-like offset that respects +calendar time arithmetic. :class:`CalendarDay` is useful preserving calendar day +semantics with date times with have day light savings transitions, i.e. :class:`CalendarDay` +will preserve the hour before the day light savings transition. + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + + Parametric Offsets ~~~~~~~~~~~~~~~~~~ @@ -1176,7 +1197,8 @@ frequencies. We will refer to these aliases as *offset aliases*. "B", "business day frequency" "C", "custom business day frequency" - "D", "calendar day frequency" + "D", "day frequency" + "CD", "calendar day frequency" "W", "weekly frequency" "M", "month end frequency" "SM", "semi-month end frequency (15th and end of month)" diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1979bde796452..4df951ca2c3aa 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -285,6 +285,46 @@ that the dates have been converted to UTC .. ipython:: python pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) +.. _whatsnew_0240.api_breaking.calendarday: + +CalendarDay Offset +^^^^^^^^^^^^^^^^^^ + +:class:`Day` and associated frequency alias ``'D'`` were documented to represent +a calendar day; however, arithmetic and operations with :class:`Day` sometimes +respected absolute time instead (i.e. ``Day(n)`` and acted identically to ``Timedelta(days=n)``). + +*Previous Behavior*: + +.. code-block:: ipython + + + In [2]: ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + + # Respects calendar arithmetic + In [3]: pd.date_range(start=ts, freq='D', periods=3) + Out[3]: + DatetimeIndex(['2016-10-30 00:00:00+03:00', '2016-10-31 00:00:00+02:00', + '2016-11-01 00:00:00+02:00'], + dtype='datetime64[ns, Europe/Helsinki]', freq='D') + + # Respects absolute arithmetic + In [4]: ts + pd.tseries.frequencies.to_offset('D') + Out[4]: Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki') + +:class:`CalendarDay` and associated frequency alias ``'CD'`` are now available +and respect calendar day arithmetic while :class:`Day` and frequency alias ``'D'`` +will now respect absolute time (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`) +See the :ref:`documentation here ` for more information. + +Addition with :class:`CalendarDay` across a daylight savings time transition: + +.. ipython:: python + + ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki') + ts + pd.offsets.Day(1) + ts + pd.offsets.CalendarDay(1) + .. _whatsnew_0240.api_breaking.period_end_time: Time values in ``dt.end_time`` and ``to_timestamp(how='end')`` diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 484eb430c82b1..466cfb296094c 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -32,7 +32,7 @@ from pandas.core import ops from pandas.tseries.frequencies import to_offset -from pandas.tseries.offsets import Tick, Day, generate_range +from pandas.tseries.offsets import Tick, generate_range from pandas.core.arrays import datetimelike as dtl @@ -239,56 +239,33 @@ def _generate_range(cls, start, end, periods, freq, tz=None, start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) - tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) - - if hasattr(freq, 'delta') and freq != Day(): - # sub-Day Tick - if inferred_tz is None and tz is not None: - # naive dates - if start is not None and start.tz is None: - start = start.tz_localize(tz, ambiguous=False) - - if end is not None and end.tz is None: - end = end.tz_localize(tz, ambiguous=False) - - if start and end: - if start.tz is None and end.tz is not None: - start = start.tz_localize(end.tz, ambiguous=False) - - if end.tz is None and start.tz is not None: - end = end.tz_localize(start.tz, ambiguous=False) - + tz, _ = _infer_tz_from_endpoints(start, end, tz) + + if tz is not None: + # Localize the start and end arguments + start = _maybe_localize_point( + start, getattr(start, 'tz', None), start, freq, tz + ) + end = _maybe_localize_point( + end, getattr(end, 'tz', None), end, freq, tz + ) + if start and end: + # Make sure start and end have the same tz + start = _maybe_localize_point( + start, start.tz, end.tz, freq, tz + ) + end = _maybe_localize_point( + end, end.tz, start.tz, freq, tz + ) + if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): + # Currently always False; never hit + # Should be reimplemented as apart of GH 17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) - else: - - if tz is not None: - # naive dates - if start is not None and start.tz is not None: - start = start.replace(tzinfo=None) - - if end is not None and end.tz is not None: - end = end.replace(tzinfo=None) - - if start and end: - if start.tz is None and end.tz is not None: - end = end.replace(tzinfo=None) - - if end.tz is None and start.tz is not None: - start = start.replace(tzinfo=None) - - if freq is not None: - if cls._use_cached_range(freq, _normalized, start, end): - index = cls._cached_range(start, end, periods=periods, - freq=freq) - else: - index = _generate_regular_range(cls, start, end, - periods, freq) - if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), @@ -302,12 +279,12 @@ def _generate_range(cls, start, end, periods, freq, tz=None, start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 - else: - # Create a linearly spaced date_range in local time - start = start.tz_localize(tz) - end = end.tz_localize(tz) - arr = np.linspace(start.value, end.value, periods) - index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) + else: + # Create a linearly spaced date_range in local time + arr = np.linspace(start.value, end.value, periods) + index = cls._simple_new( + arr.astype('M8[ns]', copy=False), freq=None, tz=tz + ) if not left_closed and len(index) and index[0] == start: index = index[1:] @@ -1256,10 +1233,10 @@ def _generate_regular_range(cls, start, end, periods, freq): data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: tz = None + # start and end should have the same timezone by this point if isinstance(start, Timestamp): tz = start.tz - - if isinstance(end, Timestamp): + elif isinstance(end, Timestamp): tz = end.tz xdr = generate_range(start=start, end=end, @@ -1330,3 +1307,32 @@ def _maybe_normalize_endpoints(start, end, normalize): _normalized = _normalized and end.time() == _midnight return start, end, _normalized + + +def _maybe_localize_point(ts, is_none, is_not_none, freq, tz): + """ + Localize a start or end Timestamp to the timezone of the corresponding + start or end Timestamp + + Parameters + ---------- + ts : start or end Timestamp to potentially localize + is_none : argument that should be None + is_not_none : argument that should not be None + freq : Tick, DateOffset, or None + tz : str, timezone object or None + + Returns + ------- + ts : Timestamp + """ + # Make sure start and end are timezone localized if: + # 1) freq = a Timedelta-like frequency (Tick) + # 2) freq = None i.e. generating a linspaced range + if isinstance(freq, Tick) or freq is None: + localize_args = {'tz': tz, 'ambiguous': False} + else: + localize_args = {'tz': None} + if is_none is None and is_not_none is not None: + ts = ts.tz_localize(**localize_args) + return ts diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 019aad4941d26..629660c899a3f 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -385,7 +385,10 @@ def _generate_range(cls, start, end, periods, name=None, freq=None, @classmethod def _use_cached_range(cls, freq, _normalized, start, end): - return _use_cached_range(freq, _normalized, start, end) + # Note: This always returns False + return (freq._should_cache() and + not (freq._normalize_cache and not _normalized) and + _naive_in_cache_range(start, end)) def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ @@ -1580,7 +1583,7 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None, Right bound for generating dates. periods : integer, optional Number of periods to generate. - freq : str or DateOffset, default 'D' (calendar daily) + freq : str or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H'. See :ref:`here ` for a list of frequency aliases. @@ -1861,17 +1864,7 @@ def _naive_in_cache_range(start, end): else: if start.tzinfo is not None or end.tzinfo is not None: return False - return _in_range(start, end, _CACHE_START, _CACHE_END) - - -def _in_range(start, end, rng_start, rng_end): - return start > rng_start and end < rng_end - - -def _use_cached_range(freq, _normalized, start, end): - return (freq._should_cache() and - not (freq._normalize_cache and not _normalized) and - _naive_in_cache_range(start, end)) + return start > _CACHE_START and end < _CACHE_END def _time_to_micros(time): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 364eea8fb8a3a..4b125580bd7e0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1052,7 +1052,7 @@ def interval_range(start=None, end=None, periods=None, freq=None, freq : numeric, string, or DateOffset, default None The length of each interval. Must be consistent with the type of start and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1 - for numeric and 'D' (calendar daily) for datetime-like. + for numeric and 'D' for datetime-like. name : string, default None Name of the resulting IntervalIndex closed : {'left', 'right', 'both', 'neither'}, default 'right' diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 32aa89010b206..3a68c6c26a974 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -840,7 +840,7 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Right bound for generating periods periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' (calendar daily) + freq : string or DateOffset, default 'D' Frequency alias name : string, default None Name of the resulting PeriodIndex diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 9f14d4cfd5863..063b578e512de 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -737,7 +737,7 @@ def timedelta_range(start=None, end=None, periods=None, freq=None, Right bound for generating timedeltas periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' (calendar daily) + freq : string or DateOffset, default 'D' Frequency strings can have multiples, e.g. '5H' name : string, default None Name of the resulting TimedeltaIndex diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 64b8f48f6a4e1..78b669de95598 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -259,88 +259,18 @@ def test_to_period_microsecond(self): assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') - def test_to_period_tz_pytz(self): - from pytz import utc as UTC - - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_explicit_pytz(self): - xp = date_range('1/1/2000', '4/1/2000').to_period() - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - + @pytest.mark.parametrize('tz', [ + 'US/Eastern', pytz.utc, tzlocal(), 'dateutil/US/Eastern', + dateutil.tz.tzutc()]) + def test_to_period_tz(self, tz): + ts = date_range('1/1/2000', '2/1/2000', tz=tz) result = ts.to_period()[0] expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - def test_to_period_tz_dateutil(self): - xp = date_range('1/1/2000', '4/1/2000').to_period() - ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) - - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - - result = ts.to_period()[0] - expected = ts[0].to_period() - - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + expected = date_range('1/1/2000', '2/1/2000').to_period() + result = ts.to_period() + tm.assert_index_equal(result, expected) def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 22fb8b2942bea..e0caf671fc390 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -336,28 +336,28 @@ def test_range_tz_pytz(self): assert dr[0] == start assert dr[2] == end - def test_range_tz_dst_straddle_pytz(self): - tz = timezone('US/Eastern') - dates = [(tz.localize(datetime(2014, 3, 6)), - tz.localize(datetime(2014, 3, 12))), - (tz.localize(datetime(2013, 11, 1)), - tz.localize(datetime(2013, 11, 6)))] - for (start, end) in dates: - dr = date_range(start, end, freq='D') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) - - dr = date_range(start, end, freq='D', tz='US/Eastern') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) - - dr = date_range(start.replace(tzinfo=None), end.replace( - tzinfo=None), freq='D', tz='US/Eastern') - assert dr[0] == start - assert dr[-1] == end - assert np.all(dr.hour == 0) + @pytest.mark.parametrize('start, end', [ + [Timestamp(datetime(2014, 3, 6), tz='US/Eastern'), + Timestamp(datetime(2014, 3, 12), tz='US/Eastern')], + [Timestamp(datetime(2013, 11, 1), tz='US/Eastern'), + Timestamp(datetime(2013, 11, 6), tz='US/Eastern')] + ]) + def test_range_tz_dst_straddle_pytz(self, start, end): + dr = date_range(start, end, freq='CD') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start, end, freq='CD', tz='US/Eastern') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) + + dr = date_range(start.replace(tzinfo=None), end.replace( + tzinfo=None), freq='CD', tz='US/Eastern') + assert dr[0] == start + assert dr[-1] == end + assert np.all(dr.hour == 0) def test_range_tz_dateutil(self): # see gh-2906 @@ -578,6 +578,14 @@ def test_mismatching_tz_raises_err(self, start, end): with pytest.raises(TypeError): pd.DatetimeIndex(start, end, freq=BDay()) + def test_CalendarDay_range_with_dst_crossing(self): + # GH 20596 + result = date_range('2018-10-23', '2018-11-06', freq='7CD', + tz='Europe/Paris') + expected = date_range('2018-10-23', '2018-11-06', + freq=pd.DateOffset(days=7), tz='Europe/Paris') + tm.assert_index_equal(result, expected) + class TestBusinessDateRange(object): @@ -772,7 +780,8 @@ def test_cdaterange_weekmask_and_holidays(self): holidays=['2013-05-01']) @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping - if freq.startswith('C')]) + if freq.startswith('C') + and freq != 'CD']) # CalendarDay def test_all_custom_freq(self, freq): # should not raise bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 95531b2d7a7ae..dc01f7ccbd496 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -429,24 +429,24 @@ def test_dti_tz_localize_utc_conversion(self, tz): with pytest.raises(pytz.NonExistentTimeError): rng.tz_localize(tz) - def test_dti_tz_localize_roundtrip(self, tz_aware_fixture): + @pytest.mark.parametrize('idx', [ + date_range(start='2014-01-01', end='2014-12-31', freq='M'), + date_range(start='2014-01-01', end='2014-12-31', freq='CD'), + date_range(start='2014-01-01', end='2014-03-01', freq='H'), + date_range(start='2014-08-01', end='2014-10-31', freq='T') + ]) + def test_dti_tz_localize_roundtrip(self, tz_aware_fixture, idx): tz = tz_aware_fixture + localized = idx.tz_localize(tz) + expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, + tz=tz) + tm.assert_index_equal(localized, expected) + with pytest.raises(TypeError): + localized.tz_localize(tz) - idx1 = date_range(start='2014-01-01', end='2014-12-31', freq='M') - idx2 = date_range(start='2014-01-01', end='2014-12-31', freq='D') - idx3 = date_range(start='2014-01-01', end='2014-03-01', freq='H') - idx4 = date_range(start='2014-08-01', end='2014-10-31', freq='T') - for idx in [idx1, idx2, idx3, idx4]: - localized = idx.tz_localize(tz) - expected = date_range(start=idx[0], end=idx[-1], freq=idx.freq, - tz=tz) - tm.assert_index_equal(localized, expected) - with pytest.raises(TypeError): - localized.tz_localize(tz) - - reset = localized.tz_localize(None) - tm.assert_index_equal(reset, idx) - assert reset.tzinfo is None + reset = localized.tz_localize(None) + tm.assert_index_equal(reset, idx) + assert reset.tzinfo is None def test_dti_tz_localize_naive(self): rng = date_range('1/1/2011', periods=100, freq='H') @@ -1033,7 +1033,9 @@ def test_date_range_span_dst_transition(self, tzstr): assert (dr.hour == 0).all() dr = date_range('2012-11-02', periods=10, tz=tzstr) - assert (dr.hour == 0).all() + result = dr.hour + expected = Index([0, 0, 0, 23, 23, 23, 23, 23, 23, 23]) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tzstr', ['US/Eastern', 'dateutil/US/Eastern']) def test_date_range_timezone_str_argument(self, tzstr): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 87dff74cd04d7..1d10e63363cc8 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -48,6 +48,10 @@ def test_timedelta_range(self): result = df.loc['0s':, :] tm.assert_frame_equal(expected, result) + with pytest.raises(ValueError): + # GH 22274: CalendarDay is a relative time measurement + timedelta_range('1day', freq='CD', periods=2) + @pytest.mark.parametrize('periods, freq', [ (3, '2D'), (5, 'D'), (6, '19H12T'), (7, '16H'), (9, '12H')]) def test_linspace_behavior(self, periods, freq): diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index d59e7fd445f17..472b2c5644fa5 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -302,7 +302,7 @@ def test_getitem_pydatetime_tz(self, tzstr): def test_series_truncate_datetimeindex_tz(self): # GH 9243 - idx = date_range('4/1/2005', '4/30/2005', freq='D', tz='US/Pacific') + idx = date_range('4/1/2005', '4/30/2005', freq='CD', tz='US/Pacific') s = Series(range(len(idx)), index=idx) result = s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4)) expected = Series([1, 2, 3], index=idx[1:4]) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 530a683c02f9d..669fa9742a705 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2038,7 +2038,7 @@ def test_resample_dst_anchor(self): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') df = DataFrame([5], index=dti) - assert_frame_equal(df.resample(rule='D').sum(), + assert_frame_equal(df.resample(rule='CD').sum(), DataFrame([5], index=df.index.normalize())) df.resample(rule='MS').sum() assert_frame_equal( @@ -2092,14 +2092,14 @@ def test_resample_dst_anchor(self): df_daily = df['10/26/2013':'10/29/2013'] assert_frame_equal( - df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"}) + df_daily.resample("CD").agg({"a": "min", "b": "max", "c": "count"}) [["a", "b", "c"]], DataFrame({"a": [1248, 1296, 1346, 1394], "b": [1295, 1345, 1393, 1441], "c": [48, 50, 48, 48]}, index=date_range('10/26/2013', '10/29/2013', - freq='D', tz='Europe/Paris')), - 'D Frequency') + freq='CD', tz='Europe/Paris')), + 'CD Frequency') def test_downsample_across_dst(self): # GH 8531 diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index e95f1ba11ad5c..f9f5fc2484bda 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -2,6 +2,7 @@ from datetime import date, datetime, timedelta import pytest +import pytz from pandas.compat import range from pandas import compat @@ -16,6 +17,7 @@ from pandas.tseries.frequencies import _offset_map, get_offset from pandas.core.indexes.datetimes import ( _to_m8, DatetimeIndex, _daterange_cache) +from pandas.core.indexes.timedeltas import TimedeltaIndex import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import CacheableOffset from pandas.tseries.offsets import (BDay, CDay, BQuarterEnd, BMonthEnd, @@ -28,11 +30,12 @@ YearEnd, Day, QuarterEnd, BusinessMonthEnd, FY5253, Nano, Easter, FY5253Quarter, - LastWeekOfMonth, Tick) + LastWeekOfMonth, Tick, CalendarDay) import pandas.tseries.offsets as offsets from pandas.io.pickle import read_pickle from pandas._libs.tslibs import timezones from pandas._libs.tslib import NaT, Timestamp +from pandas._libs.tslibs.timedeltas import Timedelta import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.tseries.holiday import USFederalHolidayCalendar @@ -192,6 +195,7 @@ class TestCommon(Base): # are applied to 2011/01/01 09:00 (Saturday) # used for .apply and .rollforward expecteds = {'Day': Timestamp('2011-01-02 09:00:00'), + 'CalendarDay': Timestamp('2011-01-02 09:00:00'), 'DateOffset': Timestamp('2011-01-02 09:00:00'), 'BusinessDay': Timestamp('2011-01-03 09:00:00'), 'CustomBusinessDay': Timestamp('2011-01-03 09:00:00'), @@ -360,7 +364,7 @@ def test_rollforward(self, offset_types): # result will not be changed if the target is on the offset no_changes = ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', - 'Nano', 'DateOffset'] + 'Nano', 'DateOffset', 'CalendarDay'] for n in no_changes: expecteds[n] = Timestamp('2011/01/01 09:00') @@ -373,6 +377,7 @@ def test_rollforward(self, offset_types): norm_expected[k] = Timestamp(norm_expected[k].date()) normalized = {'Day': Timestamp('2011-01-02 00:00:00'), + 'CalendarDay': Timestamp('2011-01-02 00:00:00'), 'DateOffset': Timestamp('2011-01-02 00:00:00'), 'MonthBegin': Timestamp('2011-02-01 00:00:00'), 'SemiMonthBegin': Timestamp('2011-01-15 00:00:00'), @@ -425,7 +430,7 @@ def test_rollback(self, offset_types): # result will not be changed if the target is on the offset for n in ['Day', 'MonthBegin', 'SemiMonthBegin', 'YearBegin', 'Week', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset']: + 'DateOffset', 'CalendarDay']: expecteds[n] = Timestamp('2011/01/01 09:00') # but be changed when normalize=True @@ -434,6 +439,7 @@ def test_rollback(self, offset_types): norm_expected[k] = Timestamp(norm_expected[k].date()) normalized = {'Day': Timestamp('2010-12-31 00:00:00'), + 'CalendarDay': Timestamp('2010-12-31 00:00:00'), 'DateOffset': Timestamp('2010-12-31 00:00:00'), 'MonthBegin': Timestamp('2010-12-01 00:00:00'), 'SemiMonthBegin': Timestamp('2010-12-15 00:00:00'), @@ -3174,3 +3180,71 @@ def test_last_week_of_month_on_offset(): slow = (ts + offset) - offset == ts fast = offset.onOffset(ts) assert fast == slow + + +class TestCalendarDay(object): + + def test_add_across_dst_scalar(self): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + result = ts + CalendarDay(1) + assert result == expected + + result = result - CalendarDay(1) + assert result == ts + + @pytest.mark.parametrize('box', [DatetimeIndex, Series]) + def test_add_across_dst_array(self, box): + # GH 22274 + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + expected = Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki') + arr = box([ts]) + expected = box([expected]) + result = arr + CalendarDay(1) + tm.assert_equal(result, expected) + + result = result - CalendarDay(1) + tm.assert_equal(arr, result) + + @pytest.mark.parametrize('arg', [ + Timestamp("2018-11-03 01:00:00", tz='US/Pacific'), + DatetimeIndex([Timestamp("2018-11-03 01:00:00", tz='US/Pacific')]) + ]) + def test_raises_AmbiguousTimeError(self, arg): + # GH 22274 + with pytest.raises(pytz.AmbiguousTimeError): + arg + CalendarDay(1) + + @pytest.mark.parametrize('arg', [ + Timestamp("2019-03-09 02:00:00", tz='US/Pacific'), + DatetimeIndex([Timestamp("2019-03-09 02:00:00", tz='US/Pacific')]) + ]) + def test_raises_NonExistentTimeError(self, arg): + # GH 22274 + with pytest.raises(pytz.NonExistentTimeError): + arg + CalendarDay(1) + + @pytest.mark.parametrize('arg, exp', [ + [1, 2], + [-1, 0], + [-5, -4] + ]) + def test_arithmetic(self, arg, exp): + # GH 22274 + result = CalendarDay(1) + CalendarDay(arg) + expected = CalendarDay(exp) + assert result == expected + + @pytest.mark.parametrize('arg', [ + timedelta(1), + Day(1), + Timedelta(1), + TimedeltaIndex([timedelta(1)]) + ]) + def test_invalid_arithmetic(self, arg): + # GH 22274 + # CalendarDay (relative time) cannot be added to Timedelta-like objects + # (absolute time) + with pytest.raises(TypeError): + CalendarDay(1) + arg diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 914d61a18ee11..369c0971f1e9a 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -10,7 +10,8 @@ from pandas import Timedelta, Timestamp from pandas.tseries import offsets -from pandas.tseries.offsets import Hour, Minute, Second, Milli, Micro, Nano +from pandas.tseries.offsets import (Day, Hour, Minute, Second, Milli, Micro, + Nano) from .common import assert_offset_equal @@ -211,6 +212,13 @@ def test_Nanosecond(): assert Micro(5) + Nano(1) == Nano(5001) +def test_Day_equals_24_Hours(): + ts = Timestamp('2016-10-30 00:00:00+0300', tz='Europe/Helsinki') + result = ts + Day(1) + expected = ts + Hour(24) + assert result == expected + + @pytest.mark.parametrize('kls, expected', [(Hour, Timedelta(hours=5)), (Minute, Timedelta(hours=2, minutes=3)), diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index c74b7454a67e3..d4a8211c17b87 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -41,7 +41,7 @@ 'LastWeekOfMonth', 'FY5253Quarter', 'FY5253', 'Week', 'WeekOfMonth', 'Easter', 'Hour', 'Minute', 'Second', 'Milli', 'Micro', 'Nano', - 'DateOffset'] + 'DateOffset', 'CalendarDay'] # convert to/from datetime/timestamp to allow invalid Timestamp ranges to # pass thru @@ -2123,6 +2123,54 @@ def onOffset(self, dt): return False return date(dt.year, dt.month, dt.day) == easter(dt.year) + +class CalendarDay(SingleConstructorOffset): + """ + Calendar day offset. Respects calendar arithmetic as opposed to Day which + respects absolute time. + """ + _adjust_dst = True + _inc = Timedelta(days=1) + _prefix = 'CD' + _attributes = frozenset(['n', 'normalize']) + + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n, normalize) + + @apply_wraps + def apply(self, other): + """ + Apply scalar arithmetic with CalendarDay offset. Incoming datetime + objects can be tz-aware or naive. + """ + if type(other) == type(self): + # Add other CalendarDays + return type(self)(self.n + other.n, normalize=self.normalize) + tzinfo = getattr(other, 'tzinfo', None) + if tzinfo is not None: + other = other.replace(tzinfo=None) + + other = other + self.n * self._inc + + if tzinfo is not None: + # This can raise a AmbiguousTimeError or NonExistentTimeError + other = conversion.localize_pydatetime(other, tzinfo) + + try: + return as_timestamp(other) + except TypeError: + raise TypeError("Cannot perform arithmetic between {other} and " + "CalendarDay".format(other=type(other))) + + @apply_index_wraps + def apply_index(self, i): + """ + Apply the CalendarDay offset to a DatetimeIndex. Incoming DatetimeIndex + objects are assumed to be tz_naive + """ + return i + self.n * self._inc + + # --------------------------------------------------------------------- # Ticks @@ -2310,7 +2358,8 @@ def generate_range(start=None, end=None, periods=None, ---------- start : datetime (default None) end : datetime (default None) - periods : int, optional + periods : int, (default None) + offset : DateOffset, (default BDay()) time_rule : (legacy) name of DateOffset object to be used, optional Corresponds with names expected by tseries.frequencies.get_offset @@ -2406,4 +2455,5 @@ def generate_range(start=None, end=None, periods=None, WeekOfMonth, # 'WOM' FY5253, FY5253Quarter, + CalendarDay # 'CD' ]} From 9b92446f1acb8ed6e0062e75830433086df02772 Mon Sep 17 00:00:00 2001 From: HyunTruth <1234hjlee@naver.com> Date: Fri, 7 Sep 2018 21:47:56 +0900 Subject: [PATCH 19/86] CLN/DEPR: removed deprecated as_indexer arg from str.match() (#22626) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/strings.py | 20 +++----------------- pandas/tests/test_strings.py | 15 --------------- 3 files changed, 4 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 4df951ca2c3aa..8445a28a51a5d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -568,7 +568,7 @@ Removal of prior version deprecations/changes - Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) - :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats``(:issue:`14645`) -- +- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`,:issue:`6581`) .. _whatsnew_0240.performance: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index ed1111ed3558a..08709d15c48bf 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -712,7 +712,7 @@ def rep(x, r): return result -def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): +def str_match(arr, pat, case=True, flags=0, na=np.nan): """ Determine if each string matches a regular expression. @@ -725,8 +725,6 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): flags : int, default 0 (no flags) re module flags, e.g. re.IGNORECASE na : default NaN, fill value for missing values. - as_indexer - .. deprecated:: 0.21.0 Returns ------- @@ -744,17 +742,6 @@ def str_match(arr, pat, case=True, flags=0, na=np.nan, as_indexer=None): regex = re.compile(pat, flags=flags) - if (as_indexer is False) and (regex.groups > 0): - raise ValueError("as_indexer=False with a pattern with groups is no " - "longer supported. Use '.str.extract(pat)' instead") - elif as_indexer is not None: - # Previously, this keyword was used for changing the default but - # deprecated behaviour. This keyword is now no longer needed. - warnings.warn("'as_indexer' keyword was specified but is ignored " - "(match now returns a boolean indexer by default), " - "and will be removed in a future version.", - FutureWarning, stacklevel=3) - dtype = bool f = lambda x: bool(regex.match(x)) @@ -2490,9 +2477,8 @@ def contains(self, pat, case=True, flags=0, na=np.nan, regex=True): return self._wrap_result(result) @copy(str_match) - def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=None): - result = str_match(self._parent, pat, case=case, flags=flags, na=na, - as_indexer=as_indexer) + def match(self, pat, case=True, flags=0, na=np.nan): + result = str_match(self._parent, pat, case=case, flags=flags, na=na) return self._wrap_result(result) @copy(str_replace) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index ab508174fa4a9..25e634c21c5ef 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -947,21 +947,6 @@ def test_match(self): exp = Series([True, NA, False]) tm.assert_series_equal(result, exp) - # test passing as_indexer still works but is ignored - values = Series(['fooBAD__barBAD', NA, 'foo']) - exp = Series([True, NA, False]) - with tm.assert_produces_warning(FutureWarning): - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=True) - tm.assert_series_equal(result, exp) - with tm.assert_produces_warning(FutureWarning): - result = values.str.match('.*BAD[_]+.*BAD', as_indexer=False) - tm.assert_series_equal(result, exp) - with tm.assert_produces_warning(FutureWarning): - result = values.str.match('.*(BAD[_]+).*(BAD)', as_indexer=True) - tm.assert_series_equal(result, exp) - pytest.raises(ValueError, values.str.match, '.*(BAD[_]+).*(BAD)', - as_indexer=False) - # mixed mixed = Series(['aBAD_BAD', NA, 'BAD_b_BAD', True, datetime.today(), 'foo', None, 1, 2.]) From ec1f7eb5b4434f401cc994b52a4d3a0307f2d489 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 7 Sep 2018 19:27:24 -0700 Subject: [PATCH 20/86] BUG: NaN should have pct rank of NaN (#22600) Closes gh-22519. --- doc/source/whatsnew/v0.23.5.txt | 3 +++ pandas/_libs/groupby_helper.pxi.in | 7 ++++++- pandas/tests/groupby/test_rank.py | 19 ++++++++++++++++++- 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt index 2a1172c8050ad..8f4b1a13c2e9d 100644 --- a/doc/source/whatsnew/v0.23.5.txt +++ b/doc/source/whatsnew/v0.23.5.txt @@ -23,6 +23,9 @@ Fixed Regressions - Constructing a DataFrame with an index argument that wasn't already an instance of :class:`~pandas.core.Index` was broken in `4efb39f `_ (:issue:`22227`). +- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups + and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9 + `_ (:issue:`22519`) - - diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 0062a6c8d31ab..765381d89705d 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -584,7 +584,12 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out, if pct: for i in range(N): - out[i, 0] = out[i, 0] / grp_sizes[i, 0] + # We don't include NaN values in percentage + # rankings, so we assign them percentages of NaN. + if out[i, 0] != out[i, 0] or out[i, 0] == NAN: + out[i, 0] = NAN + else: + out[i, 0] = out[i, 0] / grp_sizes[i, 0] {{endif}} {{endfor}} diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index f0dcf768e3607..f337af4d39e54 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -1,7 +1,7 @@ import pytest import numpy as np import pandas as pd -from pandas import DataFrame, concat +from pandas import DataFrame, Series, concat from pandas.util import testing as tm @@ -273,3 +273,20 @@ def test_rank_naoption_raises(ties_method, ascending, na_option, pct, vals): df.groupby('key').rank(method=ties_method, ascending=ascending, na_option=na_option, pct=pct) + + +def test_rank_empty_group(): + # see gh-22519 + column = "A" + df = DataFrame({ + "A": [0, 1, 0], + "B": [1., np.nan, 2.] + }) + + result = df.groupby(column).B.rank(pct=True) + expected = Series([0.5, np.nan, 1.0], name="B") + tm.assert_series_equal(result, expected) + + result = df.groupby(column).rank(pct=True) + expected = DataFrame({"B": [0.5, np.nan, 1.0]}) + tm.assert_frame_equal(result, expected) From 1bfe0c43d414010e4cd628a2878e6db231b09757 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Sat, 8 Sep 2018 03:32:37 +0100 Subject: [PATCH 21/86] Set hypothesis healthcheck (#22597) --- pandas/conftest.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/conftest.py b/pandas/conftest.py index a49bab31f0bc8..fdac045e67ffa 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -9,6 +9,11 @@ from pandas.compat import PY3 import pandas.util._test_decorators as td +import hypothesis +hypothesis.settings.suppress_health_check = (hypothesis.HealthCheck.too_slow,) +# HealthCheck.all() to disable all health checks +# https://hypothesis.readthedocs.io/en/latest/healthchecks.html + def pytest_addoption(parser): parser.addoption("--skip-slow", action="store_true", From 0ac130dfd5ef29b6e07c863ef8df6faceb263831 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Sep 2018 19:33:41 -0700 Subject: [PATCH 22/86] Implement delegate_names to allow decorating delegated attributes (#22599) --- pandas/core/accessor.py | 32 ++++++++++++++++++++ pandas/core/arrays/categorical.py | 20 ++++++------- pandas/core/indexes/accessors.py | 50 ++++++++++++------------------- pandas/core/indexes/category.py | 27 +++++++---------- 4 files changed, 72 insertions(+), 57 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 7a853d575aa69..eab529584d1fb 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -105,6 +105,38 @@ def f(self, *args, **kwargs): setattr(cls, name, f) +def delegate_names(delegate, accessors, typ, overwrite=False): + """ + Add delegated names to a class using a class decorator. This provides + an alternative usage to directly calling `_add_delegate_accessors` + below a class definition. + + Parameters + ---------- + delegate : the class to get methods/properties & doc-strings + acccessors : string list of accessors to add + typ : 'property' or 'method' + overwrite : boolean, default False + overwrite the method/property in the target class if it exists + + Returns + ------- + decorator + + Examples + -------- + @delegate_names(Categorical, ["categories", "ordered"], "property") + class CategoricalAccessor(PandasDelegate): + [...] + """ + def add_delegate_accessors(cls): + cls._add_delegate_accessors(delegate, accessors, typ, + overwrite=overwrite) + return cls + + return add_delegate_accessors + + # Ported with modifications from xarray # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py # 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9b7320bf143c2..5410412d5f45b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -34,7 +34,7 @@ is_dict_like) from pandas.core.algorithms import factorize, take_1d, unique1d, take -from pandas.core.accessor import PandasDelegate +from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.base import (PandasObject, NoNewAttributesMixin, _shared_docs) import pandas.core.common as com @@ -2365,6 +2365,15 @@ def isin(self, values): # The Series.cat accessor +@delegate_names(delegate=Categorical, + accessors=["categories", "ordered"], + typ="property") +@delegate_names(delegate=Categorical, + accessors=["rename_categories", "reorder_categories", + "add_categories", "remove_categories", + "remove_unused_categories", "set_categories", + "as_ordered", "as_unordered"], + typ="method") class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): """ Accessor object for categorical properties of the Series values. @@ -2424,15 +2433,6 @@ def _delegate_method(self, name, *args, **kwargs): return Series(res, index=self.index, name=self.name) -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, - accessors=["categories", - "ordered"], - typ='property') -CategoricalAccessor._add_delegate_accessors(delegate=Categorical, accessors=[ - "rename_categories", "reorder_categories", "add_categories", - "remove_categories", "remove_unused_categories", "set_categories", - "as_ordered", "as_unordered"], typ='method') - # utility routines diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 6ab8c4659c31e..a1868980faed3 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -12,7 +12,7 @@ is_timedelta64_dtype, is_categorical_dtype, is_list_like) -from pandas.core.accessor import PandasDelegate +from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.base import NoNewAttributesMixin, PandasObject from pandas.core.indexes.datetimes import DatetimeIndex from pandas.core.indexes.period import PeriodIndex @@ -110,6 +110,12 @@ def _delegate_method(self, name, *args, **kwargs): return result +@delegate_names(delegate=DatetimeIndex, + accessors=DatetimeIndex._datetimelike_ops, + typ="property") +@delegate_names(delegate=DatetimeIndex, + accessors=DatetimeIndex._datetimelike_methods, + typ="method") class DatetimeProperties(Properties): """ Accessor object for datetimelike properties of the Series values. @@ -175,16 +181,12 @@ def freq(self): return self._get_values().inferred_freq -DatetimeProperties._add_delegate_accessors( - delegate=DatetimeIndex, - accessors=DatetimeIndex._datetimelike_ops, - typ='property') -DatetimeProperties._add_delegate_accessors( - delegate=DatetimeIndex, - accessors=DatetimeIndex._datetimelike_methods, - typ='method') - - +@delegate_names(delegate=TimedeltaIndex, + accessors=TimedeltaIndex._datetimelike_ops, + typ="property") +@delegate_names(delegate=TimedeltaIndex, + accessors=TimedeltaIndex._datetimelike_methods, + typ="method") class TimedeltaProperties(Properties): """ Accessor object for datetimelike properties of the Series values. @@ -268,16 +270,12 @@ def freq(self): return self._get_values().inferred_freq -TimedeltaProperties._add_delegate_accessors( - delegate=TimedeltaIndex, - accessors=TimedeltaIndex._datetimelike_ops, - typ='property') -TimedeltaProperties._add_delegate_accessors( - delegate=TimedeltaIndex, - accessors=TimedeltaIndex._datetimelike_methods, - typ='method') - - +@delegate_names(delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_ops, + typ="property") +@delegate_names(delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_methods, + typ="method") class PeriodProperties(Properties): """ Accessor object for datetimelike properties of the Series values. @@ -293,16 +291,6 @@ class PeriodProperties(Properties): """ -PeriodProperties._add_delegate_accessors( - delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_ops, - typ='property') -PeriodProperties._add_delegate_accessors( - delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_methods, - typ='method') - - class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): def __new__(cls, data): diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index e3a21efe269ce..45703c220a4be 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -30,6 +30,17 @@ _index_doc_kwargs.update(dict(target_klass='CategoricalIndex')) +@accessor.delegate_names( + delegate=Categorical, + accessors=["rename_categories", + "reorder_categories", + "add_categories", + "remove_categories", + "remove_unused_categories", + "set_categories", + "as_ordered", "as_unordered", + "min", "max"], + typ='method', overwrite=True) class CategoricalIndex(Index, accessor.PandasDelegate): """ @@ -835,24 +846,8 @@ def _delegate_method(self, name, *args, **kwargs): return res return CategoricalIndex(res, name=self.name) - @classmethod - def _add_accessors(cls): - """ add in Categorical accessor methods """ - - CategoricalIndex._add_delegate_accessors( - delegate=Categorical, accessors=["rename_categories", - "reorder_categories", - "add_categories", - "remove_categories", - "remove_unused_categories", - "set_categories", - "as_ordered", "as_unordered", - "min", "max"], - typ='method', overwrite=True) - CategoricalIndex._add_numeric_methods_add_sub_disabled() CategoricalIndex._add_numeric_methods_disabled() CategoricalIndex._add_logical_methods_disabled() CategoricalIndex._add_comparison_methods() -CategoricalIndex._add_accessors() From 1faac78b72580164ff449b273d20f74fd338cce7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Sep 2018 19:42:46 -0700 Subject: [PATCH 23/86] [PERF] use numexpr in dispatch_to_series (#22284) --- pandas/core/frame.py | 31 ++++++++++++------------------- pandas/core/ops.py | 34 ++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 27 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 96ad525355dce..5261d0ea94c7e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4823,15 +4823,23 @@ def _arith_op(left, right): copy=False) def _combine_match_index(self, other, func, level=None): + assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=0, level=level, copy=False) assert left.index.equals(right.index) - new_data = func(left.values.T, right.values).T - return self._constructor(new_data, - index=left.index, columns=self.columns, - copy=False) + + if left._is_mixed_type or right._is_mixed_type: + # operate column-wise; avoid costly object-casting in `.values` + return ops.dispatch_to_series(left, right, func) + else: + # fastpath --> operate directly on values + new_data = func(left.values.T, right.values).T + return self._constructor(new_data, + index=left.index, columns=self.columns, + copy=False) def _combine_match_columns(self, other, func, level=None, try_cast=True): + assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) @@ -4850,21 +4858,6 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): try_cast=try_cast) return self._constructor(new_data) - def _compare_frame(self, other, func, str_rep): - # compare_frame assumes self._indexed_same(other) - - import pandas.core.computation.expressions as expressions - - def _compare(a, b): - return {i: func(a.iloc[:, i], b.iloc[:, i]) - for i in range(len(a.columns))} - - new_data = expressions.evaluate(_compare, str_rep, self, other) - result = self._constructor(data=new_data, index=self.index, - copy=False) - result.columns = self.columns - return result - def combine(self, other, func, fill_value=None, overwrite=True): """ Perform column-wise combine with another DataFrame based on a diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a86e57fd8876d..ca9c2528f0aef 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1621,7 +1621,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame -def dispatch_to_series(left, right, func): +def dispatch_to_series(left, right, func, str_rep=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -1631,6 +1631,7 @@ def dispatch_to_series(left, right, func): left : DataFrame right : scalar or DataFrame func : arithmetic or comparison operator + str_rep : str or None, default None Returns ------- @@ -1638,17 +1639,34 @@ def dispatch_to_series(left, right, func): """ # Note: we use iloc to access columns for compat with cases # with non-unique columns. + import pandas.core.computation.expressions as expressions + right = lib.item_from_zerodim(right) if lib.is_scalar(right): - new_data = {i: func(left.iloc[:, i], right) - for i in range(len(left.columns))} + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) + for i in range(len(a.columns))} + elif isinstance(right, ABCDataFrame): assert right._indexed_same(left) - new_data = {i: func(left.iloc[:, i], right.iloc[:, i]) - for i in range(len(left.columns))} + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[:, i]) + for i in range(len(a.columns))} + + elif isinstance(right, ABCSeries): + assert right.index.equals(left.index) # Handle other cases later + + def column_op(a, b): + return {i: func(a.iloc[:, i], b) + for i in range(len(a.columns))} + else: # Remaining cases have less-obvious dispatch rules - raise NotImplementedError + raise NotImplementedError(right) + + new_data = expressions.evaluate(column_op, str_rep, left, right) result = left._constructor(new_data, index=left.index, copy=False) # Pin columns instead of passing to constructor for compat with @@ -1818,7 +1836,7 @@ def f(self, other, axis=default_axis, level=None): if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) - return self._compare_frame(other, na_op, str_rep) + return dispatch_to_series(self, other, na_op, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, @@ -1843,7 +1861,7 @@ def f(self, other): if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') - return self._compare_frame(other, func, str_rep) + return dispatch_to_series(self, other, func, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, From 24501d995e269f66a534b8e4eb4f3848c1ece4c3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Sep 2018 19:44:21 -0700 Subject: [PATCH 24/86] Fix incorrect DTI/TDI indexing; warn before dropping tzinfo (#22549) --- doc/source/whatsnew/v0.24.0.txt | 5 ++++- pandas/_libs/tslibs/timestamps.pyx | 6 +++++ pandas/core/indexes/datetimes.py | 14 ++++++++++-- pandas/core/indexes/timedeltas.py | 7 +++++- pandas/tests/indexes/datetimes/test_astype.py | 22 ++++++++++++++----- .../tests/indexes/datetimes/test_indexing.py | 14 ++++++++++++ .../tests/indexes/timedeltas/test_indexing.py | 11 +++++++++- .../tests/scalar/timestamp/test_timestamp.py | 8 +++++++ 8 files changed, 77 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 8445a28a51a5d..7d7dc7f0f17b5 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -525,6 +525,7 @@ Datetimelike API Changes - :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) +- :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) .. _whatsnew_0240.api.other: @@ -626,6 +627,8 @@ Datetimelike - Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`) - Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`) - Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise `OverflowError` (:issue:`22492`, :issue:`22508`) +- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) +- Timedelta ^^^^^^^^^ @@ -634,7 +637,7 @@ Timedelta - Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`) - Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) -- +- Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - - diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ab1396c0fe38..52343593d1cc1 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -737,6 +737,12 @@ class Timestamp(_Timestamp): """ from pandas import Period + if self.tz is not None: + # GH#21333 + warnings.warn("Converting to Period representation will " + "drop timezone information.", + UserWarning) + if freq is None: freq = self.freq diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 629660c899a3f..f780b68a536a1 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2,7 +2,7 @@ from __future__ import division import operator import warnings -from datetime import time, datetime +from datetime import time, datetime, timedelta import numpy as np from pytz import utc @@ -730,6 +730,10 @@ def to_period(self, freq=None): """ from pandas.core.indexes.period import PeriodIndex + if self.tz is not None: + warnings.warn("Converting to PeriodIndex representation will " + "drop timezone information.", UserWarning) + if freq is None: freq = self.freqstr or self.inferred_freq @@ -740,7 +744,7 @@ def to_period(self, freq=None): freq = get_period_alias(freq) - return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz) + return PeriodIndex(self.values, name=self.name, freq=freq) def snap(self, freq='S'): """ @@ -1204,6 +1208,12 @@ def get_loc(self, key, method=None, tolerance=None): key = Timestamp(key, tz=self.tz) return Index.get_loc(self, key, method, tolerance) + elif isinstance(key, timedelta): + # GH#20464 + raise TypeError("Cannot index {cls} with {other}" + .format(cls=type(self).__name__, + other=type(key).__name__)) + if isinstance(key, time): if method is not None: raise NotImplementedError('cannot yet lookup inexact labels ' diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 063b578e512de..e0c78d6a1c518 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,5 +1,6 @@ """ implement the TimedeltaIndex """ import operator +from datetime import datetime import numpy as np from pandas.core.dtypes.common import ( @@ -487,7 +488,11 @@ def get_loc(self, key, method=None, tolerance=None): ------- loc : int """ - if is_list_like(key): + if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): + # GH#20464 datetime check here is to ensure we don't allow + # datetime objects to be incorrectly treated as timedelta + # objects; NaT is a special case because it plays a double role + # as Not-A-Timedelta raise TypeError if isna(key): diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 78b669de95598..be22d80a862e1 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -246,7 +246,9 @@ def setup_method(self, method): def test_to_period_millisecond(self): index = self.index - period = index.to_period(freq='L') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq='L') assert 2 == len(period) assert period[0] == Period('2007-01-01 10:11:12.123Z', 'L') assert period[1] == Period('2007-01-01 10:11:13.789Z', 'L') @@ -254,7 +256,9 @@ def test_to_period_millisecond(self): def test_to_period_microsecond(self): index = self.index - period = index.to_period(freq='U') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq='U') assert 2 == len(period) assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') @@ -264,12 +268,20 @@ def test_to_period_microsecond(self): dateutil.tz.tzutc()]) def test_to_period_tz(self, tz): ts = date_range('1/1/2000', '2/1/2000', tz=tz) - result = ts.to_period()[0] - expected = ts[0].to_period() + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + result = ts.to_period()[0] + expected = ts[0].to_period() + assert result == expected expected = date_range('1/1/2000', '2/1/2000').to_period() - result = ts.to_period() + + with tm.assert_produces_warning(UserWarning): + # GH#21333 warning that timezone info will be lost + result = ts.to_period() + tm.assert_index_equal(result, expected) def test_to_period_nofreq(self): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 8cffa035721b0..601a7b13e370a 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -586,3 +586,17 @@ def test_reasonable_keyerror(self): with pytest.raises(KeyError) as excinfo: index.get_loc('1/1/2000') assert '2000' in str(excinfo.value) + + @pytest.mark.parametrize('key', [pd.Timedelta(0), + pd.Timedelta(1), + timedelta(0)]) + def test_timedelta_invalid_key(self, key): + # GH#20464 + dti = pd.date_range('1970-01-01', periods=10) + with pytest.raises(TypeError): + dti.get_loc(key) + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(['1/3/2000', 'NaT']) + assert index.get_loc(pd.NaT) == 1 diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 08992188265bd..8ba2c81f429d8 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,4 +1,4 @@ -from datetime import timedelta +from datetime import datetime, timedelta import pytest import numpy as np @@ -41,6 +41,15 @@ def test_getitem(self): tm.assert_index_equal(result, expected) assert result.freq == expected.freq + @pytest.mark.parametrize('key', [pd.Timestamp('1970-01-01'), + pd.Timestamp('1970-01-02'), + datetime(1970, 1, 1)]) + def test_timestamp_invalid_key(self, key): + # GH#20464 + tdi = pd.timedelta_range(0, periods=10) + with pytest.raises(TypeError): + tdi.get_loc(key) + class TestWhere(object): # placeholder for symmetry with DatetimeIndex and PeriodIndex tests diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 58146cae587fe..872c510094a4f 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -929,3 +929,11 @@ def test_to_datetime_bijective(self): with tm.assert_produces_warning(exp_warning, check_stacklevel=False): assert (Timestamp(Timestamp.min.to_pydatetime()).value / 1000 == Timestamp.min.value / 1000) + + def test_to_period_tz_warning(self): + # GH#21333 make sure a warning is issued when timezone + # info is lost + ts = Timestamp('2009-04-15 16:17:18', tz='US/Eastern') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + ts.to_period('D') From 52b1bf5af0c12cf3c042d8cb63c347d53709dda3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Sep 2018 19:45:29 -0700 Subject: [PATCH 25/86] [CLN] More cython cleanups, with bonus type annotations (#22283) --- pandas/_libs/algos_common_helper.pxi.in | 8 +++--- pandas/_libs/groupby.pyx | 1 + pandas/_libs/hashing.pyx | 1 + pandas/_libs/index.pyx | 2 +- pandas/_libs/internals.pyx | 4 +-- pandas/_libs/interval.pyx | 5 ++-- pandas/_libs/lib.pyx | 38 ++++++++++++------------- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/writers.pyx | 11 ++++--- 9 files changed, 36 insertions(+), 36 deletions(-) diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index 1efef480f3a29..ed4c0e4c59609 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -45,7 +45,7 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -cpdef map_indices_{{name}}(ndarray[{{c_type}}] index): +def map_indices_{{name}}(ndarray[{{c_type}}] index): """ Produce a dict mapping the values of the input array to their respective locations. @@ -542,7 +542,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values, cdef int PLATFORM_INT = ( np.arange(0, dtype=np.intp)).descr.type_num -cpdef ensure_platform_int(object arr): +def ensure_platform_int(object arr): # GH3033, GH1392 # platform int is the size of the int pointer, e.g. np.intp if util.is_array(arr): @@ -554,7 +554,7 @@ cpdef ensure_platform_int(object arr): return np.array(arr, dtype=np.intp) -cpdef ensure_object(object arr): +def ensure_object(object arr): if util.is_array(arr): if ( arr).descr.type_num == NPY_OBJECT: return arr @@ -587,7 +587,7 @@ def get_dispatch(dtypes): {{for name, c_type, dtype in get_dispatch(dtypes)}} -cpdef ensure_{{name}}(object arr, copy=True): +def ensure_{{name}}(object arr, copy=True): if util.is_array(arr): if ( arr).descr.type_num == NPY_{{c_type}}: return arr diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index d8feda9ef27ef..d683c93c9b32e 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -67,6 +67,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result +# TODO: Is this redundant with algos.kth_smallest? cdef inline float64_t kth_smallest_c(float64_t* a, Py_ssize_t k, Py_ssize_t n) nogil: diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 557e3e34aee25..88b4d97de492c 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -132,6 +132,7 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v2[0] = _rotl(v2[0], 32) +# TODO: This appears unused; remove? cpdef uint64_t siphash(bytes data, bytes key) except? 0: if len(key) != 16: raise ValueError("key should be a 16-byte bytestring, " diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 9c906a00bd4fe..d5846f2b42378 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -49,7 +49,7 @@ cpdef get_value_at(ndarray arr, object loc, object tz=None): return util.get_value_at(arr, loc) -cpdef object get_value_box(ndarray arr, object loc): +def get_value_box(arr: ndarray, loc: object) -> object: return get_value_at(arr, loc, tz=None) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 97cc7f96cb24f..996570dae3302 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -184,7 +184,7 @@ cdef class BlockPlacement: return self._as_slice -cpdef slice_canonize(slice s): +cdef slice_canonize(slice s): """ Convert slice to canonical bounded form. """ @@ -255,7 +255,7 @@ cpdef Py_ssize_t slice_len( return length -cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): +cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): """ Get (start, stop, step, length) tuple for a slice. diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 5ae20a27c2381..d8e2e8eb4b4ea 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -362,8 +362,8 @@ cdef class Interval(IntervalMixin): @cython.wraparound(False) @cython.boundscheck(False) -cpdef intervals_to_interval_bounds(ndarray intervals, - bint validate_closed=True): +def intervals_to_interval_bounds(ndarray intervals, + bint validate_closed=True): """ Parameters ---------- @@ -415,4 +415,5 @@ cpdef intervals_to_interval_bounds(ndarray intervals, return left, right, closed + include "intervaltree.pxi" diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a6078da28a3ba..6b425d7022ecd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -107,7 +107,7 @@ def memory_usage_of_objects(object[:] arr): # ---------------------------------------------------------------------- -cpdef bint is_scalar(object val): +def is_scalar(val: object) -> bint: """ Return True if given value is scalar. @@ -137,7 +137,7 @@ cpdef bint is_scalar(object val): or util.is_period_object(val) or is_decimal(val) or is_interval(val) - or is_offset(val)) + or util.is_offset_object(val)) def item_from_zerodim(object val): @@ -457,7 +457,7 @@ def maybe_booleans_to_slice(ndarray[uint8_t] mask): @cython.wraparound(False) @cython.boundscheck(False) -cpdef bint array_equivalent_object(object[:] left, object[:] right): +def array_equivalent_object(left: object[:], right: object[:]) -> bint: """ perform an element by element comparion on 1-d object arrays taking into account nan positions """ cdef: @@ -497,7 +497,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): return result -cpdef ndarray[object] astype_unicode(ndarray arr): +def astype_unicode(arr: ndarray) -> ndarray[object]: cdef: Py_ssize_t i, n = arr.size ndarray[object] result = np.empty(n, dtype=object) @@ -508,7 +508,7 @@ cpdef ndarray[object] astype_unicode(ndarray arr): return result -cpdef ndarray[object] astype_str(ndarray arr): +def astype_str(arr: ndarray) -> ndarray[object]: cdef: Py_ssize_t i, n = arr.size ndarray[object] result = np.empty(n, dtype=object) @@ -791,19 +791,19 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, # core.common import for fast inference checks -cpdef bint is_float(object obj): +def is_float(obj: object) -> bint: return util.is_float_object(obj) -cpdef bint is_integer(object obj): +def is_integer(obj: object) -> bint: return util.is_integer_object(obj) -cpdef bint is_bool(object obj): +def is_bool(obj: object) -> bint: return util.is_bool_object(obj) -cpdef bint is_complex(object obj): +def is_complex(obj: object) -> bint: return util.is_complex_object(obj) @@ -815,15 +815,11 @@ cpdef bint is_interval(object obj): return getattr(obj, '_typ', '_typ') == 'interval' -cpdef bint is_period(object val): +def is_period(val: object) -> bint: """ Return a boolean if this is a Period object """ return util.is_period_object(val) -cdef inline bint is_offset(object val): - return getattr(val, '_typ', '_typ') == 'dateoffset' - - _TYPE_MAP = { 'categorical': 'categorical', 'category': 'categorical', @@ -1225,7 +1221,7 @@ def infer_dtype(object value, bint skipna=False): if is_bytes_array(values, skipna=skipna): return 'bytes' - elif is_period(val): + elif util.is_period_object(val): if is_period_array(values): return 'period' @@ -1243,7 +1239,7 @@ def infer_dtype(object value, bint skipna=False): return 'mixed' -cpdef object infer_datetimelike_array(object arr): +def infer_datetimelike_array(arr: object) -> object: """ infer if we have a datetime or timedelta array - date: we have *only* date and maybe strings, nulls @@ -1580,7 +1576,7 @@ cpdef bint is_datetime64_array(ndarray values): return validator.validate(values) -cpdef bint is_datetime_with_singletz_array(ndarray values): +def is_datetime_with_singletz_array(values: ndarray) -> bint: """ Check values have the same tzinfo attribute. Doesn't check values are datetime-like types. @@ -1616,7 +1612,8 @@ cdef class TimedeltaValidator(TemporalValidator): return is_null_timedelta64(value) -cpdef bint is_timedelta_array(ndarray values): +# TODO: Not used outside of tests; remove? +def is_timedelta_array(values: ndarray) -> bint: cdef: TimedeltaValidator validator = TimedeltaValidator(len(values), skipna=True) @@ -1628,7 +1625,8 @@ cdef class Timedelta64Validator(TimedeltaValidator): return util.is_timedelta64_object(value) -cpdef bint is_timedelta64_array(ndarray values): +# TODO: Not used outside of tests; remove? +def is_timedelta64_array(values: ndarray) -> bint: cdef: Timedelta64Validator validator = Timedelta64Validator(len(values), skipna=True) @@ -1672,7 +1670,7 @@ cpdef bint is_time_array(ndarray values, bint skipna=False): cdef class PeriodValidator(TemporalValidator): cdef inline bint is_value_typed(self, object value) except -1: - return is_period(value) + return util.is_period_object(value) cdef inline bint is_valid_null(self, object value) except -1: return is_null_period(value) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index bdd279b19208b..93fae695d51fd 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -300,7 +300,7 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): +def array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): """ convert the ndarray according to the unit if errors: diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 796f4b754857e..8e55ffad8d231 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -3,8 +3,7 @@ cimport cython from cython cimport Py_ssize_t -from cpython cimport (PyString_Check, PyBytes_Check, PyUnicode_Check, - PyBytes_GET_SIZE, PyUnicode_GET_SIZE) +from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE try: from cpython cimport PyString_GET_SIZE @@ -124,7 +123,7 @@ def convert_json_to_lines(object arr): # stata, pytables @cython.boundscheck(False) @cython.wraparound(False) -cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): +def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: """ return the maximum size of elements in a 1-dim string array """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] @@ -132,11 +131,11 @@ cpdef Py_ssize_t max_len_string_array(pandas_string[:] arr): for i in range(length): v = arr[i] - if PyString_Check(v): + if isinstance(v, str): l = PyString_GET_SIZE(v) - elif PyBytes_Check(v): + elif isinstance(v, bytes): l = PyBytes_GET_SIZE(v) - elif PyUnicode_Check(v): + elif isinstance(v, unicode): l = PyUnicode_GET_SIZE(v) if l > m: From 2e21bd0c06784930eea4cabc6b0ecdb277ef5f4e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Sep 2018 19:46:53 -0700 Subject: [PATCH 26/86] move rename functionality out of internals (#21924) --- .coveragerc | 1 + pandas/core/generic.py | 18 ++++++++++-------- pandas/core/internals/managers.py | 15 ++------------- 3 files changed, 13 insertions(+), 21 deletions(-) diff --git a/.coveragerc b/.coveragerc index f5c8b701a79a8..13baa100b84b7 100644 --- a/.coveragerc +++ b/.coveragerc @@ -17,6 +17,7 @@ exclude_lines = # Don't complain if tests don't hit defensive assertion code: raise AssertionError raise NotImplementedError + AbstractMethodError # Don't complain if non-runnable code isn't run: if 0: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index dd5552151f61b..71dac0ea2e98a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -289,10 +289,7 @@ def set_axis(a, i): for i, a in cls._AXIS_NAMES.items(): set_axis(a, i) - # addtl parms - if isinstance(ns, dict): - for k, v in ns.items(): - setattr(cls, k, v) + assert not isinstance(ns, dict) def _construct_axes_dict(self, axes=None, **kwargs): """Return an axes dictionary for myself.""" @@ -3483,8 +3480,10 @@ def add_prefix(self, prefix): 2 3 5 3 4 6 """ - new_data = self._data.add_prefix(prefix) - return self._constructor(new_data).__finalize__(self) + f = functools.partial('{prefix}{}'.format, prefix=prefix) + + mapper = {self._info_axis_name: f} + return self.rename(**mapper) def add_suffix(self, suffix): """ @@ -3540,8 +3539,10 @@ def add_suffix(self, suffix): 2 3 5 3 4 6 """ - new_data = self._data.add_suffix(suffix) - return self._constructor(new_data).__finalize__(self) + f = functools.partial('{}{suffix}'.format, suffix=suffix) + + mapper = {self._info_axis_name: f} + return self.rename(**mapper) _shared_docs['sort_values'] = """ Sort by the values along either axis @@ -4057,6 +4058,7 @@ def _reindex_with_indexers(self, reindexers, fill_value=None, copy=False, return self._constructor(new_data).__finalize__(self) + # TODO: unused; remove? def _reindex_axis(self, new_index, fill_method, axis, copy): new_data = self._data.reindex_axis(new_index, axis=axis, method=fill_method, copy=copy) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e64ba44bb8a92..63738594799f5 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -176,20 +176,11 @@ def rename_axis(self, mapper, axis, copy=True, level=None): axis : int copy : boolean, default True level : int, default None - """ obj = self.copy(deep=copy) obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level)) return obj - def add_prefix(self, prefix): - f = partial('{prefix}{}'.format, prefix=prefix) - return self.rename_axis(f, axis=0) - - def add_suffix(self, suffix): - f = partial('{}{suffix}'.format, suffix=suffix) - return self.rename_axis(f, axis=0) - @property def _is_single_block(self): if self.ndim == 1: @@ -222,12 +213,10 @@ def _rebuild_blknos_and_blklocs(self): self._blknos = new_blknos self._blklocs = new_blklocs - # make items read only for now - def _get_items(self): + @property + def items(self): return self.axes[0] - items = property(fget=_get_items) - def _get_counts(self, f): """ return a dict of the counts of the function in BlockManager """ self._consolidate_inplace() From 1a2b52407b81a999beaf7f7276129a8eb9c5030e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 7 Sep 2018 19:48:04 -0700 Subject: [PATCH 27/86] TST: Continue collecting arithmetic tests (#22559) --- pandas/tests/arithmetic/test_numeric.py | 69 +++++++++++++++++++ pandas/tests/arithmetic/test_object.py | 33 +++++++++ .../test_timedelta64.py} | 0 pandas/tests/frame/test_arithmetic.py | 16 ----- pandas/tests/indexes/test_numeric.py | 34 --------- .../indexes/timedeltas/test_arithmetic.py | 32 --------- pandas/tests/series/test_arithmetic.py | 8 --- 7 files changed, 102 insertions(+), 90 deletions(-) rename pandas/tests/{test_arithmetic.py => arithmetic/test_timedelta64.py} (100%) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 9ede1a62aaf2e..d3957330f11e4 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -42,6 +42,30 @@ def test_operator_series_comparison_zerorank(self): expected = 0.0 > pd.Series([1, 2, 3]) tm.assert_series_equal(result, expected) + def test_df_numeric_cmp_dt64_raises(self): + # GH#8932, GH#22163 + ts = pd.Timestamp.now() + df = pd.DataFrame({'x': range(5)}) + with pytest.raises(TypeError): + df > ts + with pytest.raises(TypeError): + df < ts + with pytest.raises(TypeError): + ts < df + with pytest.raises(TypeError): + ts > df + + assert not (df == ts).any().any() + assert (df != ts).all().all() + + def test_compare_invalid(self): + # GH#8058 + # ops testing + a = pd.Series(np.random.randn(5), name=0) + b = pd.Series(np.random.randn(5)) + b.name = pd.Timestamp('2000-01-01') + tm.assert_series_equal(a / b, 1 / (b / a)) + # ------------------------------------------------------------------ # Numeric dtypes Arithmetic with Timedelta Scalar @@ -754,6 +778,51 @@ def check(series, other): check(tser, 5) +class TestUFuncCompat(object): + @pytest.mark.parametrize('holder', [pd.Int64Index, pd.UInt64Index, + pd.Float64Index, pd.Series]) + def test_ufunc_coercions(self, holder): + idx = holder([1, 2, 3, 4, 5], name='x') + box = pd.Series if holder is pd.Series else pd.Index + + result = np.sqrt(idx) + assert result.dtype == 'f8' and isinstance(result, box) + exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = np.divide(idx, 2.) + assert result.dtype == 'f8' and isinstance(result, box) + exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + # _evaluate_numeric_binop + result = idx + 2. + assert result.dtype == 'f8' and isinstance(result, box) + exp = pd.Float64Index([3., 4., 5., 6., 7.], name='x') + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx - 2. + assert result.dtype == 'f8' and isinstance(result, box) + exp = pd.Float64Index([-1., 0., 1., 2., 3.], name='x') + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx * 1. + assert result.dtype == 'f8' and isinstance(result, box) + exp = pd.Float64Index([1., 2., 3., 4., 5.], name='x') + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + result = idx / 2. + assert result.dtype == 'f8' and isinstance(result, box) + exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + exp = tm.box_expected(exp, box) + tm.assert_equal(result, exp) + + class TestObjectDtypeEquivalence(object): # Tests that arithmetic operations match operations executed elementwise diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 2c1cc83c09f88..64d7cbc47fddd 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -180,3 +180,36 @@ def test_series_with_dtype_radd_timedelta(self, dtype): result = ser + pd.Timedelta('3 days') tm.assert_series_equal(result, expected) + + # TODO: cleanup & parametrize over box + def test_mixed_timezone_series_ops_object(self): + # GH#13043 + ser = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), + pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], + name='xxx') + assert ser.dtype == object + + exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), + pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], + name='xxx') + tm.assert_series_equal(ser + pd.Timedelta('1 days'), exp) + tm.assert_series_equal(pd.Timedelta('1 days') + ser, exp) + + # object series & object series + ser2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), + pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], + name='xxx') + assert ser2.dtype == object + exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], + name='xxx') + tm.assert_series_equal(ser2 - ser, exp) + tm.assert_series_equal(ser - ser2, -exp) + + ser = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], + name='xxx', dtype=object) + assert ser.dtype == object + + exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], + name='xxx') + tm.assert_series_equal(ser + pd.Timedelta('00:30:00'), exp) + tm.assert_series_equal(pd.Timedelta('00:30:00') + ser, exp) diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/arithmetic/test_timedelta64.py similarity index 100% rename from pandas/tests/test_arithmetic.py rename to pandas/tests/arithmetic/test_timedelta64.py diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f142f770a0c54..a6f4e0e38ec5d 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -48,22 +48,6 @@ def test_mixed_comparison(self): result = df != other assert result.all().all() - def test_df_numeric_cmp_dt64_raises(self): - # GH#8932, GH#22163 - ts = pd.Timestamp.now() - df = pd.DataFrame({'x': range(5)}) - with pytest.raises(TypeError): - df > ts - with pytest.raises(TypeError): - df < ts - with pytest.raises(TypeError): - ts < df - with pytest.raises(TypeError): - ts > df - - assert not (df == ts).any().any() - assert (df != ts).all().all() - def test_df_boolean_comparison_error(self): # GH#4576 # boolean comparisons with a tuple/list give unexpected results diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index c8aa7f8fd50fd..1cb2cd46a65db 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -565,40 +565,6 @@ def test_slice_keep_name(self): idx = self._holder([1, 2], name='asdf') assert idx.name == idx[1:].name - def test_ufunc_coercions(self): - idx = self._holder([1, 2, 3, 4, 5], name='x') - - result = np.sqrt(idx) - assert isinstance(result, Float64Index) - exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') - tm.assert_index_equal(result, exp) - - result = np.divide(idx, 2.) - assert isinstance(result, Float64Index) - exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') - tm.assert_index_equal(result, exp) - - # _evaluate_numeric_binop - result = idx + 2. - assert isinstance(result, Float64Index) - exp = Float64Index([3., 4., 5., 6., 7.], name='x') - tm.assert_index_equal(result, exp) - - result = idx - 2. - assert isinstance(result, Float64Index) - exp = Float64Index([-1., 0., 1., 2., 3.], name='x') - tm.assert_index_equal(result, exp) - - result = idx * 1. - assert isinstance(result, Float64Index) - exp = Float64Index([1., 2., 3., 4., 5.], name='x') - tm.assert_index_equal(result, exp) - - result = idx / 2. - assert isinstance(result, Float64Index) - exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') - tm.assert_index_equal(result, exp) - class TestInt64Index(NumericInt): _dtype = 'int64' diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index f3bc523ca525e..e425937fedf4b 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -430,38 +430,6 @@ def test_ops_ndarray(self): if LooseVersion(np.__version__) >= LooseVersion('1.8'): tm.assert_numpy_array_equal(other - td, expected) - def test_ops_series_object(self): - # GH 13043 - s = pd.Series([pd.Timestamp('2015-01-01', tz='US/Eastern'), - pd.Timestamp('2015-01-01', tz='Asia/Tokyo')], - name='xxx') - assert s.dtype == object - - exp = pd.Series([pd.Timestamp('2015-01-02', tz='US/Eastern'), - pd.Timestamp('2015-01-02', tz='Asia/Tokyo')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('1 days'), exp) - tm.assert_series_equal(pd.Timedelta('1 days') + s, exp) - - # object series & object series - s2 = pd.Series([pd.Timestamp('2015-01-03', tz='US/Eastern'), - pd.Timestamp('2015-01-05', tz='Asia/Tokyo')], - name='xxx') - assert s2.dtype == object - exp = pd.Series([pd.Timedelta('2 days'), pd.Timedelta('4 days')], - name='xxx') - tm.assert_series_equal(s2 - s, exp) - tm.assert_series_equal(s - s2, -exp) - - s = pd.Series([pd.Timedelta('01:00:00'), pd.Timedelta('02:00:00')], - name='xxx', dtype=object) - assert s.dtype == object - - exp = pd.Series([pd.Timedelta('01:30:00'), pd.Timedelta('02:30:00')], - name='xxx') - tm.assert_series_equal(s + pd.Timedelta('00:30:00'), exp) - tm.assert_series_equal(pd.Timedelta('00:30:00') + s, exp) - def test_timedelta_ops_with_missing_values(self): # setup s1 = pd.to_timedelta(Series(['00:00:01'])) diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 41064b84abc36..37ba1c91368b3 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import operator -import numpy as np import pytest from pandas import Series @@ -14,13 +13,6 @@ # Comparisons class TestSeriesComparison(object): - def test_compare_invalid(self): - # GH#8058 - # ops testing - a = pd.Series(np.random.randn(5), name=0) - b = pd.Series(np.random.randn(5)) - b.name = pd.Timestamp('2000-01-01') - tm.assert_series_equal(a / b, 1 / (b / a)) @pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le']) def test_ser_flex_cmp_return_dtypes(self, opname): From 09a3d6b9c2a5397cdb34f4d11ff51556e1e0c91f Mon Sep 17 00:00:00 2001 From: sideeye Date: Sat, 8 Sep 2018 05:52:58 +0300 Subject: [PATCH 28/86] BUG: fix failing DataFrame.loc when indexing with an IntervalIndex (#22576) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexing.py | 2 +- pandas/tests/frame/test_indexing.py | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7d7dc7f0f17b5..fb7af00f61534 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -712,6 +712,7 @@ Indexing - Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`) - Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`) - ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) +- Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a245ecfa007f3..b63f874abff85 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1491,7 +1491,7 @@ def __getitem__(self, key): try: if self._is_scalar_access(key): return self._getitem_scalar(key) - except (KeyError, IndexError): + except (KeyError, IndexError, AttributeError): pass return self._getitem_tuple(key) else: diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 6a4cf1ffc6071..f0c4d7be2f293 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -3099,6 +3099,28 @@ def test_type_error_multiindex(self): result = dg['x', 0] assert_series_equal(result, expected) + def test_interval_index(self): + # GH 19977 + index = pd.interval_range(start=0, periods=3) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=index, + columns=['A', 'B', 'C']) + + expected = 1 + result = df.loc[0.5, 'A'] + assert_almost_equal(result, expected) + + index = pd.interval_range(start=0, periods=3, closed='both') + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=index, + columns=['A', 'B', 'C']) + + index_exp = pd.interval_range(start=0, periods=2, + freq=1, closed='both') + expected = pd.Series([1, 4], index=index_exp, name='A') + result = df.loc[1, 'A'] + assert_series_equal(result, expected) + class TestDataFrameIndexingDatetimeWithTZ(TestData): From 128cbd9db94d6687631518b8a9e0e684967ec700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20S=C3=A1nchez=20de=20Le=C3=B3n=20Peque?= Date: Sat, 8 Sep 2018 05:02:22 +0200 Subject: [PATCH 29/86] DOC: Update `month_name` and `day_name` docstrings (#22544) --- pandas/core/arrays/datetimes.py | 40 ++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 466cfb296094c..a0a9b57712249 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -771,17 +771,27 @@ def month_name(self, locale=None): """ Return the month names of the DateTimeIndex with specified locale. + .. versionadded:: 0.23.0 + Parameters ---------- - locale : string, default None (English locale) - locale determining the language in which to return the month name + locale : str, optional + Locale determining the language in which to return the month name. + Default is English locale. Returns ------- - month_names : Index - Index of month names + Index + Index of month names. - .. versionadded:: 0.23.0 + Examples + -------- + >>> idx = pd.DatetimeIndex(start='2018-01', freq='M', periods=3) + >>> idx + DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], + dtype='datetime64[ns]', freq='M') + >>> idx.month_name() + Index(['January', 'February', 'March'], dtype='object') """ if self.tz is not None and self.tz is not utc: values = self._local_timestamps() @@ -797,17 +807,27 @@ def day_name(self, locale=None): """ Return the day names of the DateTimeIndex with specified locale. + .. versionadded:: 0.23.0 + Parameters ---------- - locale : string, default None (English locale) - locale determining the language in which to return the day name + locale : str, optional + Locale determining the language in which to return the day name. + Default is English locale. Returns ------- - month_names : Index - Index of day names + Index + Index of day names. - .. versionadded:: 0.23.0 + Examples + -------- + >>> idx = pd.DatetimeIndex(start='2018-01-01', freq='D', periods=3) + >>> idx + DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], + dtype='datetime64[ns]', freq='D') + >>> idx.day_name() + Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object') """ if self.tz is not None and self.tz is not utc: values = self._local_timestamps() From f2af1c61e5a00459be48ae55b4104522dba592a1 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Sat, 8 Sep 2018 05:05:49 +0200 Subject: [PATCH 30/86] CLN: tests for str.cat (#22575) --- pandas/tests/test_strings.py | 287 ++++++++++++++++------------------- 1 file changed, 127 insertions(+), 160 deletions(-) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 25e634c21c5ef..bd450cdcf8054 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -144,71 +144,50 @@ def test_cat(self): with tm.assert_raises_regex(ValueError, rgx): strings.str_cat(one, 'three') - @pytest.mark.parametrize('container', [Series, Index]) + @pytest.mark.parametrize('box', [Series, Index]) @pytest.mark.parametrize('other', [None, Series, Index]) - def test_str_cat_name(self, container, other): - # https://github.com/pandas-dev/pandas/issues/21053 + def test_str_cat_name(self, box, other): + # GH 21053 values = ['a', 'b'] if other: other = other(values) else: other = values - result = container(values, name='name').str.cat(other, sep=',', - join='left') + result = box(values, name='name').str.cat(other, sep=',', join='left') assert result.name == 'name' - @pytest.mark.parametrize('series_or_index', ['series', 'index']) - def test_str_cat(self, series_or_index): - # test_cat above tests "str_cat" from ndarray to ndarray; - # here testing "str.cat" from Series/Index to Series/Index/ndarray/list - s = Index(['a', 'a', 'b', 'b', 'c', np.nan]) - if series_or_index == 'series': - s = Series(s) - t = Index(['a', np.nan, 'b', 'd', 'foo', np.nan]) + @pytest.mark.parametrize('box', [Series, Index]) + def test_str_cat(self, box): + # test_cat above tests "str_cat" from ndarray; + # here testing "str.cat" from Series/Indext to ndarray/list + s = box(['a', 'a', 'b', 'b', 'c', np.nan]) # single array result = s.str.cat() - exp = 'aabbc' - assert result == exp + expected = 'aabbc' + assert result == expected result = s.str.cat(na_rep='-') - exp = 'aabbc-' - assert result == exp + expected = 'aabbc-' + assert result == expected result = s.str.cat(sep='_', na_rep='NA') - exp = 'a_a_b_b_c_NA' - assert result == exp - - # Series/Index with Index - exp = Index(['aa', 'a-', 'bb', 'bd', 'cfoo', '--']) - if series_or_index == 'series': - exp = Series(exp) - # s.index / s is different from t (as Index) -> warning - with tm.assert_produces_warning(expected_warning=FutureWarning): - # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp) - - # Series/Index with Series - t = Series(t) - # s as Series has same index as t -> no warning - # s as Index is different from t.index -> warning (tested below) - if series_or_index == 'series': - assert_series_equal(s.str.cat(t, na_rep='-'), exp) + expected = 'a_a_b_b_c_NA' + assert result == expected - # Series/Index with Series: warning if different indexes - t.index = t.index + 1 - with tm.assert_produces_warning(expected_warning=FutureWarning): - # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp) + t = np.array(['a', np.nan, 'b', 'd', 'foo', np.nan], dtype=object) + expected = box(['aa', 'a-', 'bb', 'bd', 'cfoo', '--']) # Series/Index with array - assert_series_or_index_equal(s.str.cat(t.values, na_rep='-'), exp) + result = s.str.cat(t, na_rep='-') + assert_series_or_index_equal(result, expected) # Series/Index with list - assert_series_or_index_equal(s.str.cat(list(t), na_rep='-'), exp) + result = s.str.cat(list(t), na_rep='-') + assert_series_or_index_equal(result, expected) # errors for incorrect lengths - rgx = 'All arrays must be same length, except.*' + rgx = 'All arrays must be same length, except those having an index.*' z = Series(['1', '2', '3']) with tm.assert_raises_regex(ValueError, rgx): @@ -220,122 +199,111 @@ def test_str_cat(self, series_or_index): with tm.assert_raises_regex(ValueError, rgx): s.str.cat(list(z)) - @pytest.mark.parametrize('series_or_index', ['series', 'index']) - def test_str_cat_raises_intuitive_error(self, series_or_index): - # https://github.com/pandas-dev/pandas/issues/11334 - s = Index(['a', 'b', 'c', 'd']) - if series_or_index == 'series': - s = Series(s) + @pytest.mark.parametrize('box', [Series, Index]) + def test_str_cat_raises_intuitive_error(self, box): + # GH 11334 + s = box(['a', 'b', 'c', 'd']) message = "Did you mean to supply a `sep` keyword?" with tm.assert_raises_regex(ValueError, message): s.str.cat('|') with tm.assert_raises_regex(ValueError, message): s.str.cat(' ') - @pytest.mark.parametrize('series_or_index, dtype_caller, dtype_target', [ - ('series', 'object', 'object'), - ('series', 'object', 'category'), - ('series', 'category', 'object'), - ('series', 'category', 'category'), - ('index', 'object', 'object'), - ('index', 'object', 'category'), - ('index', 'category', 'object'), - ('index', 'category', 'category') - ]) - def test_str_cat_categorical(self, series_or_index, - dtype_caller, dtype_target): + @pytest.mark.parametrize('dtype_target', ['object', 'category']) + @pytest.mark.parametrize('dtype_caller', ['object', 'category']) + @pytest.mark.parametrize('box', [Series, Index]) + def test_str_cat_categorical(self, box, dtype_caller, dtype_target): s = Index(['a', 'a', 'b', 'a'], dtype=dtype_caller) - if series_or_index == 'series': - s = Series(s) + s = s if box == Index else Series(s, index=s) t = Index(['b', 'a', 'b', 'c'], dtype=dtype_target) - exp = Index(['ab', 'aa', 'bb', 'ac']) - if series_or_index == 'series': - exp = Series(exp) + expected = Index(['ab', 'aa', 'bb', 'ac']) + expected = expected if box == Index else Series(expected, index=s) - # Series/Index with Index - # s.index / s is different from t (as Index) -> warning + # Series/Index with unaligned Index with tm.assert_produces_warning(expected_warning=FutureWarning): # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat(t), exp) + result = s.str.cat(t) + assert_series_or_index_equal(result, expected) + + # Series/Index with Series having matching Index + t = Series(t, index=s) + result = s.str.cat(t) + assert_series_or_index_equal(result, expected) - # Series/Index with Series - t = Series(t) - # s as Series has same index as t -> no warning - # s as Index is different from t.index -> warning (tested below) - if series_or_index == 'series': - assert_series_equal(s.str.cat(t), exp) + # Series/Index with Series.values + result = s.str.cat(t.values) + assert_series_or_index_equal(result, expected) - # Series/Index with Series: warning if different indexes - t.index = t.index + 1 + # Series/Index with Series having different Index + t = Series(t.values, index=t) with tm.assert_produces_warning(expected_warning=FutureWarning): # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat(t, na_rep='-'), exp) + result = s.str.cat(t) + assert_series_or_index_equal(result, expected) - @pytest.mark.parametrize('series_or_index', ['series', 'index']) - def test_str_cat_mixed_inputs(self, series_or_index): + @pytest.mark.parametrize('box', [Series, Index]) + def test_str_cat_mixed_inputs(self, box): s = Index(['a', 'b', 'c', 'd']) - if series_or_index == 'series': - s = Series(s) - t = Series(['A', 'B', 'C', 'D']) - d = concat([t, Series(s)], axis=1) + s = s if box == Index else Series(s, index=s) - exp = Index(['aAa', 'bBb', 'cCc', 'dDd']) - if series_or_index == 'series': - exp = Series(exp) + t = Series(['A', 'B', 'C', 'D'], index=s.values) + d = concat([t, Series(s, index=s)], axis=1) - # Series/Index with DataFrame - # s as Series has same index as d -> no warning - # s as Index is different from d.index -> warning (tested below) - if series_or_index == 'series': - assert_series_equal(s.str.cat(d), exp) + expected = Index(['aAa', 'bBb', 'cCc', 'dDd']) + expected = expected if box == Index else Series(expected.values, + index=s.values) - # Series/Index with DataFrame: warning if different indexes - d.index = d.index + 1 - with tm.assert_produces_warning(expected_warning=FutureWarning): - # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat(d), exp) + # Series/Index with DataFrame + result = s.str.cat(d) + assert_series_or_index_equal(result, expected) # Series/Index with two-dimensional ndarray - assert_series_or_index_equal(s.str.cat(d.values), exp) + result = s.str.cat(d.values) + assert_series_or_index_equal(result, expected) # Series/Index with list of Series - # s as Series has same index as t, s -> no warning - # s as Index is different from t.index -> warning (tested below) - if series_or_index == 'series': - assert_series_equal(s.str.cat([t, s]), exp) - - # Series/Index with list of Series: warning if different indexes - tt = t.copy() - tt.index = tt.index + 1 - with tm.assert_produces_warning(expected_warning=FutureWarning): - # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat([tt, s]), exp) + result = s.str.cat([t, s]) + assert_series_or_index_equal(result, expected) + + # Series/Index with mixed list of Series/array + result = s.str.cat([t, s.values]) + assert_series_or_index_equal(result, expected) # Series/Index with list of list-likes with tm.assert_produces_warning(expected_warning=FutureWarning): - # nested lists will be deprecated - assert_series_or_index_equal(s.str.cat([t.values, list(s)]), exp) + # nested list-likes will be deprecated + result = s.str.cat([t.values, list(s)]) + assert_series_or_index_equal(result, expected) + + # Series/Index with list of Series; different indexes + t.index = ['b', 'c', 'd', 'a'] + with tm.assert_produces_warning(expected_warning=FutureWarning): + # FutureWarning to switch to alignment by default + result = s.str.cat([t, s]) + assert_series_or_index_equal(result, expected) - # Series/Index with mixed list of Series/list-like - # s as Series has same index as t -> no warning - # s as Index is different from t.index -> warning (tested below) - if series_or_index == 'series': - assert_series_equal(s.str.cat([t, s.values]), exp) + # Series/Index with mixed list; different indexes + with tm.assert_produces_warning(expected_warning=FutureWarning): + # FutureWarning to switch to alignment by default + result = s.str.cat([t, s.values]) + assert_series_or_index_equal(result, expected) - # Series/Index with mixed list: warning if different indexes + # Series/Index with DataFrame; different indexes + d.index = ['b', 'c', 'd', 'a'] with tm.assert_produces_warning(expected_warning=FutureWarning): # FutureWarning to switch to alignment by default - assert_series_or_index_equal(s.str.cat([tt, s.values]), exp) + result = s.str.cat(d) + assert_series_or_index_equal(result, expected) # Series/Index with iterator of list-likes with tm.assert_produces_warning(expected_warning=FutureWarning): # nested list-likes will be deprecated - assert_series_or_index_equal(s.str.cat(iter([t.values, list(s)])), - exp) + result = s.str.cat(iter([t.values, list(s)])) + assert_series_or_index_equal(result, expected) # errors for incorrect lengths - rgx = 'All arrays must be same length, except.*' + rgx = 'All arrays must be same length, except those having an index.*' z = Series(['1', '2', '3']) e = concat([z, z], axis=1) @@ -357,7 +325,7 @@ def test_str_cat_mixed_inputs(self, series_or_index): # mixed list of Series/list-like with tm.assert_raises_regex(ValueError, rgx): - s.str.cat([z, s.values]) + s.str.cat([z.values, s]) # errors for incorrect arguments in list-like rgx = 'others must be Series, Index, DataFrame,.*' @@ -384,26 +352,23 @@ def test_str_cat_mixed_inputs(self, series_or_index): with tm.assert_raises_regex(TypeError, rgx): s.str.cat(1) - @pytest.mark.parametrize('series_or_index, join', [ - ('series', 'left'), ('series', 'outer'), - ('series', 'inner'), ('series', 'right'), - ('index', 'left'), ('index', 'outer'), - ('index', 'inner'), ('index', 'right') - ]) - def test_str_cat_align_indexed(self, series_or_index, join): + @pytest.mark.parametrize('join', ['left', 'outer', 'inner', 'right']) + @pytest.mark.parametrize('box', [Series, Index]) + def test_str_cat_align_indexed(self, box, join): # https://github.com/pandas-dev/pandas/issues/18657 s = Series(['a', 'b', 'c', 'd'], index=['a', 'b', 'c', 'd']) t = Series(['D', 'A', 'E', 'B'], index=['d', 'a', 'e', 'b']) sa, ta = s.align(t, join=join) # result after manual alignment of inputs - exp = sa.str.cat(ta, na_rep='-') + expected = sa.str.cat(ta, na_rep='-') - if series_or_index == 'index': + if box == Index: s = Index(s) sa = Index(sa) - exp = Index(exp) + expected = Index(expected) - assert_series_or_index_equal(s.str.cat(t, join=join, na_rep='-'), exp) + result = s.str.cat(t, join=join, na_rep='-') + assert_series_or_index_equal(result, expected) @pytest.mark.parametrize('join', ['left', 'outer', 'inner', 'right']) def test_str_cat_align_mixed_inputs(self, join): @@ -411,31 +376,34 @@ def test_str_cat_align_mixed_inputs(self, join): t = Series(['d', 'a', 'e', 'b'], index=[3, 0, 4, 1]) d = concat([t, t], axis=1) - exp_outer = Series(['aaa', 'bbb', 'c--', 'ddd', '-ee']) - sa, ta = s.align(t, join=join) - exp = exp_outer.loc[ta.index] + expected_outer = Series(['aaa', 'bbb', 'c--', 'ddd', '-ee']) + expected = expected_outer.loc[s.index.join(t.index, how=join)] # list of Series - tm.assert_series_equal(s.str.cat([t, t], join=join, na_rep='-'), exp) + result = s.str.cat([t, t], join=join, na_rep='-') + tm.assert_series_equal(result, expected) # DataFrame - tm.assert_series_equal(s.str.cat(d, join=join, na_rep='-'), exp) + result = s.str.cat(d, join=join, na_rep='-') + tm.assert_series_equal(result, expected) # mixed list of indexed/unindexed - u = ['A', 'B', 'C', 'D'] - exp_outer = Series(['aaA', 'bbB', 'c-C', 'ddD', '-e-']) - # u will be forced have index of s -> use s here as placeholder - e = concat([t, s], axis=1, join=(join if join == 'inner' else 'outer')) - sa, ea = s.align(e, join=join) - exp = exp_outer.loc[ea.index] + u = np.array(['A', 'B', 'C', 'D']) + expected_outer = Series(['aaA', 'bbB', 'c-C', 'ddD', '-e-']) + # joint index of rhs [t, u]; u will be forced have index of s + rhs_idx = t.index & s.index if join == 'inner' else t.index | s.index + + expected = expected_outer.loc[s.index.join(rhs_idx, how=join)] + result = s.str.cat([t, u], join=join, na_rep='-') + tm.assert_series_equal(result, expected) with tm.assert_produces_warning(expected_warning=FutureWarning): - # nested lists will be deprecated - tm.assert_series_equal(s.str.cat([t, u], join=join, na_rep='-'), - exp) + # nested list-likes will be deprecated + result = s.str.cat([t, list(u)], join=join, na_rep='-') + tm.assert_series_equal(result, expected) # errors for incorrect lengths - rgx = 'If `others` contains arrays or lists.*' + rgx = r'If `others` contains arrays or lists \(or other list-likes.*' z = Series(['1', '2', '3']).values # unindexed object of wrong length @@ -451,14 +419,14 @@ def test_str_cat_special_cases(self): t = Series(['d', 'a', 'e', 'b'], index=[3, 0, 4, 1]) # iterator of elements with different types - exp = Series(['aaa', 'bbb', 'c-c', 'ddd', '-e-']) - tm.assert_series_equal(s.str.cat(iter([t, s.values]), - join='outer', na_rep='-'), exp) + expected = Series(['aaa', 'bbb', 'c-c', 'ddd', '-e-']) + result = s.str.cat(iter([t, s.values]), join='outer', na_rep='-') + tm.assert_series_equal(result, expected) # right-align with different indexes in others - exp = Series(['aa-', 'd-d'], index=[0, 3]) - tm.assert_series_equal(s.str.cat([t.loc[[0]], t.loc[[3]]], - join='right', na_rep='-'), exp) + expected = Series(['aa-', 'd-d'], index=[0, 3]) + result = s.str.cat([t.loc[[0]], t.loc[[3]]], join='right', na_rep='-') + tm.assert_series_equal(result, expected) def test_cat_on_filtered_index(self): df = DataFrame(index=MultiIndex.from_product( @@ -469,12 +437,11 @@ def test_cat_on_filtered_index(self): str_year = df.year.astype('str') str_month = df.month.astype('str') - str_both = str_year.str.cat(str_month, sep=' ', join='left') + str_both = str_year.str.cat(str_month, sep=' ') assert str_both.loc[1] == '2011 2' - str_multiple = str_year.str.cat([str_month, str_month], - sep=' ', join='left') + str_multiple = str_year.str.cat([str_month, str_month], sep=' ') assert str_multiple.loc[1] == '2011 2 2' @@ -1601,7 +1568,7 @@ def test_empty_str_methods(self): # GH7241 # (extract) on empty series - tm.assert_series_equal(empty_str, empty.str.cat(empty, join='left')) + tm.assert_series_equal(empty_str, empty.str.cat(empty)) assert '' == empty.str.cat() tm.assert_series_equal(empty_str, empty.str.title()) tm.assert_series_equal(empty_int, empty.str.count('a')) @@ -3169,9 +3136,9 @@ def test_method_on_bytes(self): lhs = Series(np.array(list('abc'), 'S1').astype(object)) rhs = Series(np.array(list('def'), 'S1').astype(object)) if compat.PY3: - pytest.raises(TypeError, lhs.str.cat, rhs, join='left') + pytest.raises(TypeError, lhs.str.cat, rhs) else: - result = lhs.str.cat(rhs, join='left') + result = lhs.str.cat(rhs) expected = Series(np.array( ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) From 338683e9b7403cf44528668eaa2b49c70c385167 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Fri, 7 Sep 2018 23:10:32 -0400 Subject: [PATCH 31/86] DOC: Fix to_latex docstring. (#22516) --- pandas/core/generic.py | 122 +++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 42 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 71dac0ea2e98a..2e5da21f573b0 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2595,69 +2595,107 @@ def to_xarray(self): coords=coords, ) - _shared_docs['to_latex'] = r""" + def to_latex(self, buf=None, columns=None, col_space=None, header=True, + index=True, na_rep='NaN', formatters=None, float_format=None, + sparsify=None, index_names=True, bold_rows=False, + column_format=None, longtable=None, escape=None, + encoding=None, decimal='.', multicolumn=None, + multicolumn_format=None, multirow=None): + r""" + Render an object to a LaTeX tabular environment table. + Render an object to a tabular environment table. You can splice - this into a LaTeX document. Requires \\usepackage{booktabs}. + this into a LaTeX document. Requires \usepackage{booktabs}. .. versionchanged:: 0.20.2 Added to Series - `to_latex`-specific options: - - bold_rows : boolean, default False - Make the row labels bold in the output - column_format : str, default None + Parameters + ---------- + buf : file descriptor or None + Buffer to write to. If None, the output is returned as a string. + columns : list of label, optional + The subset of columns to write. Writes all columns by default. + col_space : int, optional + The minimum width of each column. + header : bool or list of str, default True + Write out the column names. If a list of strings is given, + it is assumed to be aliases for the column names. + index : bool, default True + Write row names (index). + na_rep : str, default 'NaN' + Missing data representation. + formatters : list of functions or dict of {str: function}, optional + Formatter functions to apply to columns' elements by position or + name. The result of each function must be a unicode string. + List must be of length equal to the number of columns. + float_format : str, optional + Format string for floating point numbers. + sparsify : bool, optional + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. By default, the value will be + read from the config module. + index_names : bool, default True + Prints the names of the indexes. + bold_rows : bool, default False + Make the row labels bold in the output. + column_format : str, optional The columns format as specified in `LaTeX table format - `__ e.g 'rcl' for 3 - columns - longtable : boolean, default will be read from the pandas config module - Default: False. - Use a longtable environment instead of tabular. Requires adding - a \\usepackage{longtable} to your LaTeX preamble. - escape : boolean, default will be read from the pandas config module - Default: True. - When set to False prevents from escaping latex special + `__ e.g. 'rcl' for 3 + columns. By default, 'l' will be used for all columns except + columns of numbers, which default to 'r'. + longtable : bool, optional + By default, the value will be read from the pandas config + module. Use a longtable environment instead of tabular. Requires + adding a \usepackage{longtable} to your LaTeX preamble. + escape : bool, optional + By default, the value will be read from the pandas config + module. When set to False prevents from escaping latex special characters in column names. - encoding : str, default None + encoding : str, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - decimal : string, default '.' + decimal : str, default '.' Character recognized as decimal separator, e.g. ',' in Europe. - .. versionadded:: 0.18.0 - - multicolumn : boolean, default True + multicolumn : bool, default True Use \multicolumn to enhance MultiIndex columns. The default will be read from the config module. - .. versionadded:: 0.20.0 - multicolumn_format : str, default 'l' The alignment for multicolumns, similar to `column_format` The default will be read from the config module. - + .. versionadded:: 0.20.0 + multirow : bool, default False + Use \multirow to enhance MultiIndex rows. Requires adding a + \usepackage{multirow} to your LaTeX preamble. Will print + centered labels (instead of top-aligned) across the contained + rows, separating groups via clines. The default will be read + from the pandas config module. .. versionadded:: 0.20.0 - multirow : boolean, default False - Use \multirow to enhance MultiIndex rows. - Requires adding a \\usepackage{multirow} to your LaTeX preamble. - Will print centered labels (instead of top-aligned) - across the contained rows, separating groups via clines. - The default will be read from the pandas config module. + Returns + ------- + str or None + If buf is None, returns the resulting LateX format as a + string. Otherwise returns None. - .. versionadded:: 0.20.0 - """ + See Also + -------- + DataFrame.to_string : Render a DataFrame to a console-friendly + tabular output. + DataFrame.to_html : Render a DataFrame as an HTML table. - @Substitution(header='Write out the column names. If a list of strings ' - 'is given, it is assumed to be aliases for the ' - 'column names.') - @Appender(_shared_docs['to_latex'] % _shared_doc_kwargs) - def to_latex(self, buf=None, columns=None, col_space=None, header=True, - index=True, na_rep='NaN', formatters=None, float_format=None, - sparsify=None, index_names=True, bold_rows=False, - column_format=None, longtable=None, escape=None, - encoding=None, decimal='.', multicolumn=None, - multicolumn_format=None, multirow=None): + Examples + -------- + >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'], + ... 'mask': ['red', 'purple'], + ... 'weapon': ['sai', 'bo staff']}) + >>> df.to_latex(index=False) # doctest: +NORMALIZE_WHITESPACE + '\\begin{tabular}{lll}\n\\toprule\n name & mask & weapon + \\\\\n\\midrule\n Raphael & red & sai \\\\\n Donatello & + purple & bo staff \\\\\n\\bottomrule\n\\end{tabular}\n' + """ # Get defaults from the pandas config if self.ndim == 1: self = self.to_frame() From 2fda626ff9eecf1bfd80a751434c87867d926224 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 9 Sep 2018 18:11:29 +0100 Subject: [PATCH 32/86] TST: add test to io/formats/test_to_html.py to close GH6131 (#22588) --- pandas/tests/io/formats/test_to_html.py | 61 +++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index f69cac62513d4..845fb1ee3dc3a 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1844,6 +1844,67 @@ def test_to_html_no_index_max_rows(self): """) assert result == expected + def test_to_html_multiindex_max_cols(self): + # GH 6131 + index = MultiIndex(levels=[['ba', 'bb', 'bc'], ['ca', 'cb', 'cc']], + labels=[[0, 1, 2], [0, 1, 2]], + names=['b', 'c']) + columns = MultiIndex(levels=[['d'], ['aa', 'ab', 'ac']], + labels=[[0, 0, 0], [0, 1, 2]], + names=[None, 'a']) + data = np.array( + [[1., np.nan, np.nan], [np.nan, 2., np.nan], [np.nan, np.nan, 3.]]) + df = DataFrame(data, index, columns) + result = df.to_html(max_cols=2) + expected = dedent("""\ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
d
aaa...ac
bc
baca1.0...NaN
bbcbNaN...NaN
bcccNaN...3.0
""") + assert result == expected + def test_to_html_notebook_has_style(self): df = pd.DataFrame({"A": [1, 2, 3]}) result = df.to_html(notebook=True) From 49b560e71b8bcd8aa190987d93d9c8b1e9bcbe96 Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Tue, 11 Sep 2018 12:24:13 -0600 Subject: [PATCH 33/86] DOC/CLN: small whatsnew fixes (#22659) --- doc/source/whatsnew/v0.24.0.txt | 64 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index fb7af00f61534..3660c1e843f6c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -164,7 +164,7 @@ This is the same behavior as ``Series.values`` for categorical data. See Other Enhancements ^^^^^^^^^^^^^^^^^^ - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`) -- :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether NaN/NaT values should be considered (:issue:`17534`) +- :func:`Series.mode` and :func:`DataFrame.mode` now support the ``dropna`` parameter which can be used to specify whether ``NaN``/``NaT`` values should be considered (:issue:`17534`) - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`) - :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with :class:`MultiIndex` (:issue:`21115`) - :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`) @@ -183,7 +183,7 @@ Other Enhancements - :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`) - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). -- :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). .. _whatsnew_0240.api_breaking: @@ -369,7 +369,7 @@ Tick DateOffset Normalize Restrictions Creating a ``Tick`` object (:class:`Day`, :class:`Hour`, :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano`) with -`normalize=True` is no longer supported. This prevents unexpected behavior +``normalize=True`` is no longer supported. This prevents unexpected behavior where addition could fail to be monotone or associative. (:issue:`21427`) *Previous Behavior*: @@ -451,11 +451,11 @@ Previous Behavior: .. _whatsnew_0240.api.timedelta64_subtract_nan -Addition/Subtraction of ``NaN`` from :class:``DataFrame`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Addition/Subtraction of ``NaN`` from :class:`DataFrame` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Adding or subtracting ``NaN`` from a :class:`DataFrame` column with -`timedelta64[ns]` dtype will now raise a ``TypeError`` instead of returning +``timedelta64[ns]`` dtype will now raise a ``TypeError`` instead of returning all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and ``Series`` behavior (:issue:`22163`) @@ -534,7 +534,7 @@ Other API Changes - :class:`DatetimeIndex` now accepts :class:`Int64Index` arguments as epoch timestamps (:issue:`20997`) - Accessing a level of a ``MultiIndex`` with a duplicate name (e.g. in - :meth:~MultiIndex.get_level_values) now raises a ``ValueError`` instead of + :meth:`~MultiIndex.get_level_values`) now raises a ``ValueError`` instead of a ``KeyError`` (:issue:`21678`). - Invalid construction of ``IntervalDtype`` will now always raise a ``TypeError`` rather than a ``ValueError`` if the subdtype is invalid (:issue:`21185`) - Trying to reindex a ``DataFrame`` with a non unique ``MultiIndex`` now raises a ``ValueError`` instead of an ``Exception`` (:issue:`21770`) @@ -552,7 +552,7 @@ Deprecations - :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`) - :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`) - :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`) -- The signature of :meth:`Series.to_csv` has been uniformed to that of doc:meth:`DataFrame.to_csv`: the name of the first argument is now 'path_or_buf', the order of subsequent arguments has changed, the 'header' argument now defaults to True. (:issue:`19715`) +- The signature of :meth:`Series.to_csv` has been uniformed to that of :meth:`DataFrame.to_csv`: the name of the first argument is now ``path_or_buf``, the order of subsequent arguments has changed, the ``header`` argument now defaults to ``True``. (:issue:`19715`) - :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`) - :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) - :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain @@ -568,8 +568,8 @@ Removal of prior version deprecations/changes - Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`) - Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`) - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) -- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats``(:issue:`14645`) -- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`,:issue:`6581`) +- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`) +- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`) .. _whatsnew_0240.performance: @@ -581,7 +581,7 @@ Performance Improvements Likewise, slicing a ``CategoricalIndex`` itself (i.e. ``ci[100:200]``) shows similar speed improvements (:issue:`21659`) - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`) - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`) -- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`,:issue:`21606`) +- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`) - Improved performance of membership checks in :class:`Categorical` and :class:`CategoricalIndex` (i.e. ``x in cat``-style checks are much faster). :meth:`CategoricalIndex.contains` is likewise much faster (:issue:`21369`, :issue:`21508`) @@ -607,32 +607,32 @@ Bug Fixes Categorical ^^^^^^^^^^^ -- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`. +- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in ``codes`` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of ``.from_codes([1.1, 2.0])``. Datetimelike ^^^^^^^^^^^^ - Fixed bug where two :class:`DateOffset` objects with different ``normalize`` attributes could evaluate as equal (:issue:`21404`) -- Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`,:issue:`21365`) +- Fixed bug where :meth:`Timestamp.resolution` incorrectly returned 1-microsecond ``timedelta`` instead of 1-nanosecond :class:`Timedelta` (:issue:`21336`, :issue:`21365`) - Bug in :func:`to_datetime` that did not consistently return an :class:`Index` when ``box=True`` was specified (:issue:`21864`) - Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`) - Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`) - Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`) -- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`,:issue:`22163`) -- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`,:issue:`22163`) -- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`,:issue:`22163`) -- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`,:issue:`22163`) -- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`,:issue:`22163`) -- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`) -- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`) -- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`) -- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise `OverflowError` (:issue:`22492`, :issue:`22508`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`, :issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`, :issue:`22163`) +- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`, :issue:`22163`) +- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`, :issue:`22163`) +- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`, :issue:`22163`) +- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`) - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) - Timedelta ^^^^^^^^^ -- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`) +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`, :issue:`22163`) - Bug in adding a :class:`Index` with object dtype to a :class:`Series` with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`22390`) - Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`) - Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) @@ -680,7 +680,7 @@ Numeric when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). - Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) -- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`) +- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`, :issue:`22163`) - Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`) - @@ -703,7 +703,7 @@ Indexing ^^^^^^^^ - The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`) -- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index, rather than falling back to positional indexing (:issue:`21593`) +- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index`, rather than falling back to positional indexing (:issue:`21593`) - Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`) - Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`) - ``DataFrame.__getitem__`` now accepts dictionaries and dictionary keys as list-likes of labels, consistently with ``Series.__getitem__`` (:issue:`21294`) @@ -719,13 +719,13 @@ Missing - Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`) - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`) -- :func:`Series.isin` now treats all nans as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`) +- :func:`Series.isin` now treats all nans as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`) MultiIndex ^^^^^^^^^^ - Removed compatibility for :class:`MultiIndex` pickles prior to version 0.8.0; compatibility with :class:`MultiIndex` pickles from version 0.13 forward is maintained (:issue:`21654`) -- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a :class:``MultiIndex``ed object) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) +- :meth:`MultiIndex.get_loc_level` (and as a consequence, ``.loc`` on a :class:`MultiIndex`ed object) will now raise a ``KeyError``, rather than returning an empty ``slice``, if asked a label which is present in the ``levels`` but is unused (:issue:`22221`) - Fix ``TypeError`` in Python 3 when creating :class:`MultiIndex` in which some levels have mixed types, e.g. when some labels are tuples (:issue:`15457`) I/O @@ -749,10 +749,10 @@ Groupby/Resample/Rolling - Bug in :meth:`DatetimeIndex.resample` when downsampling across a DST boundary (:issue:`8531`) - Bug where ``ValueError`` is wrongly raised when calling :func:`~pandas.core.groupby.SeriesGroupBy.count` method of a ``SeriesGroupBy`` when the grouping variable only contains NaNs and numpy version < 1.13 (:issue:`21956`). -- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a +- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'`` and a datetime-like index leading to incorrect results and also segfault. (:issue:`21704`) - Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`). -- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to `loffset` kwarg (:issue:`7687`). +- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`). - Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). Sparse @@ -773,14 +773,14 @@ Reshaping - :func:`pandas.core.groupby.GroupBy.rank` now raises a ``ValueError`` when an invalid value is passed for argument ``na_option`` (:issue:`22124`) - Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`) - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) -- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) +- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the ``to_replace`` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) - Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) Build Changes ^^^^^^^^^^^^^ - Building pandas for development now requires ``cython >= 0.28.2`` (:issue:`21688`) -- Testing pandas now requires ``hypothesis>=3.58`` (:issue:22280). You can find `the Hypothesis docs here `_, and a pandas-specific introduction :ref:`in the contributing guide ` . +- Testing pandas now requires ``hypothesis>=3.58``. You can find `the Hypothesis docs here `_, and a pandas-specific introduction :ref:`in the contributing guide `. (:issue:`22280`) - Other @@ -789,7 +789,7 @@ Other - :meth:`~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth:`~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) -- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. +- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax``. ``NaN`` values are also handled properly. (:issue:`21548`, :issue:`21526`) - - - From 688c8a495b09ce852cb828ab0b5d195391dab4d4 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 12 Sep 2018 07:01:12 +0100 Subject: [PATCH 34/86] DOC: Add cross references to advanced.rst (#22671) --- doc/source/advanced.rst | 53 +++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 2be1a53aa6c93..611afb3670ebc 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -15,7 +15,7 @@ MultiIndex / Advanced Indexing ****************************** -This section covers indexing with a ``MultiIndex`` and more advanced indexing features. +This section covers indexing with a ``MultiIndex`` and :ref:`more advanced indexing features `. See the :ref:`Indexing and Selecting Data ` for general indexing documentation. @@ -51,13 +51,13 @@ See the :ref:`cookbook` for some advanced strategies. Creating a MultiIndex (hierarchical index) object ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``MultiIndex`` object is the hierarchical analogue of the standard -``Index`` object which typically stores the axis labels in pandas objects. You +The :class:`MultiIndex` object is the hierarchical analogue of the standard +:class:`Index` object which typically stores the axis labels in pandas objects. You can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A ``MultiIndex`` can be created from a list of arrays (using -``MultiIndex.from_arrays``), an array of tuples (using -``MultiIndex.from_tuples``), or a crossed set of iterables (using -``MultiIndex.from_product``). The ``Index`` constructor will attempt to return +:meth:`MultiIndex.from_arrays`), an array of tuples (using +:meth:`MultiIndex.from_tuples`), or a crossed set of iterables (using +:meth:`MultiIndex.from_product`). The ``Index`` constructor will attempt to return a ``MultiIndex`` when it is passed a list of tuples. The following examples demonstrate different ways to initialize MultiIndexes. @@ -76,7 +76,7 @@ demonstrate different ways to initialize MultiIndexes. s When you want every pairing of the elements in two iterables, it can be easier -to use the ``MultiIndex.from_product`` function: +to use the :meth:`MultiIndex.from_product` method: .. ipython:: python @@ -84,7 +84,7 @@ to use the ``MultiIndex.from_product`` function: pd.MultiIndex.from_product(iterables, names=['first', 'second']) As a convenience, you can pass a list of arrays directly into Series or -DataFrame to construct a MultiIndex automatically: +DataFrame to construct a ``MultiIndex`` automatically: .. ipython:: python @@ -140,7 +140,7 @@ may wish to generate your own ``MultiIndex`` when preparing the data set. Reconstructing the level labels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The method ``get_level_values`` will return a vector of the labels for each +The method :meth:`~MultiIndex.get_level_values` will return a vector of the labels for each location at a particular level: .. ipython:: python @@ -183,7 +183,7 @@ For example: This is done to avoid a recomputation of the levels in order to make slicing highly performant. If you want to see only the used levels, you can use the -:func:`MultiIndex.get_level_values` method. +:meth:`~MultiIndex.get_level_values` method. .. ipython:: python @@ -193,7 +193,7 @@ highly performant. If you want to see only the used levels, you can use the df[['foo','qux']].columns.get_level_values(0) To reconstruct the ``MultiIndex`` with only the used levels, the -``remove_unused_levels`` method may be used. +:meth:`~MultiIndex.remove_unused_levels` method may be used. .. versionadded:: 0.20.0 @@ -400,8 +400,8 @@ You can use a right-hand-side of an alignable object as well. Cross-section ~~~~~~~~~~~~~ -The ``xs`` method of ``DataFrame`` additionally takes a level argument to make -selecting data at a particular level of a MultiIndex easier. +The :meth:`~DataFrame.xs` method of ``DataFrame`` additionally takes a level argument to make +selecting data at a particular level of a ``MultiIndex`` easier. .. ipython:: python @@ -519,7 +519,7 @@ to be sorted. As with any index, you can use ``sort_index``. .. _advanced.sortlevel_byname: -You may also pass a level name to ``sort_index`` if the MultiIndex levels +You may also pass a level name to ``sort_index`` if the ``MultiIndex`` levels are named. .. ipython:: python @@ -566,7 +566,8 @@ Furthermore, if you try to index something that is not fully lexsorted, this can In [5]: dfm.loc[(0,'y'):(1, 'z')] UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' -The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth: +The :meth:`~MultiIndex.is_lexsorted` method on a ``MultiIndex`` shows if the +index is sorted, and the ``lexsort_depth`` property returns the sort depth: .. ipython:: python @@ -591,8 +592,8 @@ Take Methods .. _advanced.take: -Similar to NumPy ndarrays, pandas Index, Series, and DataFrame also provides -the ``take`` method that retrieves elements along a given axis at the given +Similar to NumPy ndarrays, pandas ``Index``, ``Series``, and ``DataFrame`` also provides +the :meth:`~DataFrame.take` method that retrieves elements along a given axis at the given indices. The given indices must be either a list or an ndarray of integer index positions. ``take`` will also accept negative integers as relative positions to the end of the object. @@ -668,8 +669,8 @@ In the following sub-sections we will highlight some other index types. CategoricalIndex ~~~~~~~~~~~~~~~~ -``CategoricalIndex`` is a type of index that is useful for supporting -indexing with duplicates. This is a container around a ``Categorical`` +:class:`CategoricalIndex` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a :class:`Categorical` and allows efficient indexing and storage of an index with a large number of duplicated elements. .. ipython:: python @@ -758,11 +759,11 @@ Int64Index and RangeIndex Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. -``Int64Index`` is a fundamental basic index in pandas. -This is an Immutable array implementing an ordered, sliceable set. +:class:`Int64Index` is a fundamental basic index in pandas. +This is an immutable array implementing an ordered, sliceable set. Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. -``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects. +:class:`RangeIndex` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects. ``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types `__. .. _indexing.float64index: @@ -770,7 +771,7 @@ Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``ND Float64Index ~~~~~~~~~~~~ -By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. +By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation. This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. @@ -875,9 +876,9 @@ IntervalIndex .. versionadded:: 0.20.0 -:class:`IntervalIndex` together with its own dtype, ``interval`` as well as the -:class:`Interval` scalar type, allow first-class support in pandas for interval -notation. +:class:`IntervalIndex` together with its own dtype, :class:`~pandas.api.types.IntervalDtype` +as well as the :class:`Interval` scalar type, allow first-class support in pandas +for interval notation. The ``IntervalIndex`` allows some unique indexing and is also used as a return type for the categories in :func:`cut` and :func:`qcut`. From f3b3694a671e98059ffcd1670e6cd4d5f1b731eb Mon Sep 17 00:00:00 2001 From: Matthew Gilbert Date: Wed, 12 Sep 2018 07:11:53 -0400 Subject: [PATCH 35/86] DOC: Add section on MultiIndex.to_frame() ordering (#22674) --- pandas/core/indexes/multi.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 955f1461075f9..ac83deee49b09 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1130,6 +1130,9 @@ def to_frame(self, index=True): """ Create a DataFrame with the levels of the MultiIndex as columns. + Column ordering is determined by the DataFrame constructor with data as + a dict. + .. versionadded:: 0.20.0 Parameters @@ -1140,6 +1143,10 @@ def to_frame(self, index=True): Returns ------- DataFrame : a DataFrame containing the original MultiIndex data. + + See also + -------- + DataFrame """ from pandas import DataFrame From 6b3e3c2c744a1f95ca7a461894d1a4743895603f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Sep 2018 04:26:12 -0700 Subject: [PATCH 36/86] TST: Avoid DeprecationWarnings (#22646) --- pandas/core/common.py | 18 ++++++++++++++++++ pandas/core/indexes/base.py | 1 + pandas/core/indexes/multi.py | 2 ++ pandas/io/formats/style.py | 8 ++++++-- pandas/util/testing.py | 2 +- 5 files changed, 28 insertions(+), 3 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index a3fba762509f1..92e4e23ce958e 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -122,6 +122,24 @@ def is_bool_indexer(key): return False +def cast_scalar_indexer(val): + """ + To avoid numpy DeprecationWarnings, cast float to integer where valid. + + Parameters + ---------- + val : scalar + + Returns + ------- + outval : scalar + """ + # assumes lib.is_scalar(val) + if lib.is_float(val) and val == int(val): + return int(val) + return val + + def _not_none(*args): """Returns a generator consisting of the arguments that are not None""" return (arg for arg in args if arg is not None) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 710c9d0e296c9..b2b6e02e908c5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2047,6 +2047,7 @@ def __getitem__(self, key): promote = self._shallow_copy if is_scalar(key): + key = com.cast_scalar_indexer(key) return getitem(key) if isinstance(key, slice): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ac83deee49b09..4f38f61f7b0e4 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1558,6 +1558,8 @@ def __setstate__(self, state): def __getitem__(self, key): if is_scalar(key): + key = com.cast_scalar_indexer(key) + retval = [] for lev, lab in zip(self.levels, self.labels): if lab[key] == -1: diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 6501717f715cb..b175dd540a518 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1033,10 +1033,14 @@ def css_bar(start, end, color): def css(x): if pd.isna(x): return '' + + # avoid deprecated indexing `colors[x > zero]` + color = colors[1] if x > zero else colors[0] + if align == 'left': - return css_bar(0, x, colors[x > zero]) + return css_bar(0, x, color) else: - return css_bar(min(x, zero), max(x, zero), colors[x > zero]) + return css_bar(min(x, zero), max(x, zero), color) if s.ndim == 1: return [css(x) for x in normed] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 01fafd7219382..f785ec35f52db 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2431,7 +2431,7 @@ def assert_raises_regex(_exception, _regexp, _callable=None, You can also use this in a with statement. - >>> with assert_raises_regex(TypeError, 'unsupported operand type\(s\)'): + >>> with assert_raises_regex(TypeError, r'unsupported operand type\(s\)'): ... 1 + {} >>> with assert_raises_regex(TypeError, 'banana'): ... 'apple'[0] = 'b' From 16725cf60adaaaf6542ec5028f167e7670a0d753 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Sep 2018 04:28:47 -0700 Subject: [PATCH 37/86] TST: Collect/Use arithmetic test fixtures (#22645) --- pandas/tests/arithmetic/conftest.py | 100 +++++++++- pandas/tests/arithmetic/test_datetime64.py | 156 ++++++---------- pandas/tests/arithmetic/test_numeric.py | 103 ++++++----- pandas/tests/arithmetic/test_period.py | 93 ++-------- pandas/tests/arithmetic/test_timedelta64.py | 192 ++++++-------------- 5 files changed, 279 insertions(+), 365 deletions(-) diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py index 844472b8bcf0d..b800b66e8edea 100644 --- a/pandas/tests/arithmetic/conftest.py +++ b/pandas/tests/arithmetic/conftest.py @@ -28,14 +28,32 @@ def zero(request): return request.param +# ------------------------------------------------------------------ +# Vector Fixtures + @pytest.fixture(params=[pd.Float64Index(np.arange(5, dtype='float64')), pd.Int64Index(np.arange(5, dtype='int64')), - pd.UInt64Index(np.arange(5, dtype='uint64'))], + pd.UInt64Index(np.arange(5, dtype='uint64')), + pd.RangeIndex(5)], ids=lambda x: type(x).__name__) -def idx(request): +def numeric_idx(request): + """ + Several types of numeric-dtypes Index objects + """ return request.param +@pytest.fixture +def tdser(): + """ + Return a Series with dtype='timedelta64[ns]', including a NaT. + """ + return pd.Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') + + +# ------------------------------------------------------------------ +# Scalar Fixtures + @pytest.fixture(params=[pd.Timedelta('5m4s').to_pytimedelta(), pd.Timedelta('5m4s'), pd.Timedelta('5m4s').to_timedelta64()], @@ -47,6 +65,72 @@ def scalar_td(request): return request.param +@pytest.fixture(params=[pd.offsets.Day(3), + pd.offsets.Hour(72), + pd.Timedelta(days=3).to_pytimedelta(), + pd.Timedelta('72:00:00'), + np.timedelta64(3, 'D'), + np.timedelta64(72, 'h')]) +def three_days(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 3-day timedelta + """ + return request.param + + +@pytest.fixture(params=[pd.offsets.Hour(2), + pd.offsets.Minute(120), + pd.Timedelta(hours=2).to_pytimedelta(), + pd.Timedelta(seconds=2 * 3600), + np.timedelta64(2, 'h'), + np.timedelta64(120, 'm')]) +def two_hours(request): + """ + Several timedelta-like and DateOffset objects that each represent + a 2-hour timedelta + """ + return request.param + + +_common_mismatch = [pd.offsets.YearBegin(2), + pd.offsets.MonthBegin(1), + pd.offsets.Minute()] + + +@pytest.fixture(params=[pd.Timedelta(minutes=30).to_pytimedelta(), + np.timedelta64(30, 's'), + pd.Timedelta(seconds=30)] + _common_mismatch) +def not_hourly(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Hourly frequencies. + """ + return request.param + + +@pytest.fixture(params=[np.timedelta64(4, 'h'), + pd.Timedelta(hours=23).to_pytimedelta(), + pd.Timedelta('23:00:00')] + _common_mismatch) +def not_daily(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Daily frequencies. + """ + return request.param + + +@pytest.fixture(params=[np.timedelta64(365, 'D'), + pd.Timedelta(days=365).to_pytimedelta(), + pd.Timedelta(days=365)] + _common_mismatch) +def mismatched_freq(request): + """ + Several timedelta-like and DateOffset instances that are _not_ + compatible with Monthly or Annual frequencies. + """ + return request.param + + # ------------------------------------------------------------------ @pytest.fixture(params=[pd.Index, pd.Series, pd.DataFrame], @@ -59,6 +143,18 @@ def box(request): return request.param +@pytest.fixture(params=[pd.Index, + pd.Series, + pytest.param(pd.DataFrame, + marks=pytest.mark.xfail(strict=True))], + ids=lambda x: x.__name__) +def box_df_fail(request): + """ + Fixture equivalent to `box` fixture but xfailing the DataFrame case. + """ + return request.param + + @pytest.fixture(params=[ pd.Index, pd.Series, diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index d597ea834f097..a3fa4e6b88256 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -27,29 +27,6 @@ DatetimeIndex, TimedeltaIndex) -# ------------------------------------------------------------------ -# Fixtures - -@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)], - ids=str) -def delta(request): - # Several ways of representing two hours - return request.param - - -@pytest.fixture( - params=[ - datetime(2011, 1, 1), - DatetimeIndex(['2011-01-01', '2011-01-02']), - DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'), - np.datetime64('2011-01-01'), - Timestamp('2011-01-01')], - ids=lambda x: type(x).__name__) -def addend(request): - return request.param - - # ------------------------------------------------------------------ # Comparisons @@ -697,23 +674,20 @@ def test_dt64ser_sub_datetime_dtype(self): # TODO: This next block of tests came from tests.series.test_operators, # needs to be de-duplicated and parametrized over `box` classes - @pytest.mark.parametrize( - 'box, assert_func', - [(Series, tm.assert_series_equal), - (pd.Index, tm.assert_index_equal)]) - def test_sub_datetime64_not_ns(self, box, assert_func): + @pytest.mark.parametrize('klass', [Series, pd.Index]) + def test_sub_datetime64_not_ns(self, klass): # GH#7996 dt64 = np.datetime64('2013-01-01') assert dt64.dtype == 'datetime64[D]' - obj = box(date_range('20130101', periods=3)) + obj = klass(date_range('20130101', periods=3)) res = obj - dt64 - expected = box([Timedelta(days=0), Timedelta(days=1), - Timedelta(days=2)]) - assert_func(res, expected) + expected = klass([Timedelta(days=0), Timedelta(days=1), + Timedelta(days=2)]) + tm.assert_equal(res, expected) res = dt64 - obj - assert_func(res, -expected) + tm.assert_equal(res, -expected) def test_sub_single_tz(self): # GH12290 @@ -1113,40 +1087,40 @@ def test_dti_add_intarray_no_freq(self, box): # ------------------------------------------------------------- # Binary operations DatetimeIndex and timedelta-like - def test_dti_add_timedeltalike(self, tz_naive_fixture, delta, box): + def test_dti_add_timedeltalike(self, tz_naive_fixture, two_hours, box): # GH#22005, GH#22163 check DataFrame doesn't raise TypeError tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) rng = tm.box_expected(rng, box) - result = rng + delta + result = rng + two_hours expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) expected = tm.box_expected(expected, box) tm.assert_equal(result, expected) - def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta): + def test_dti_iadd_timedeltalike(self, tz_naive_fixture, two_hours): tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - rng += delta + rng += two_hours tm.assert_index_equal(rng, expected) - def test_dti_sub_timedeltalike(self, tz_naive_fixture, delta): + def test_dti_sub_timedeltalike(self, tz_naive_fixture, two_hours): tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - result = rng - delta + result = rng - two_hours tm.assert_index_equal(result, expected) - def test_dti_isub_timedeltalike(self, tz_naive_fixture, delta): + def test_dti_isub_timedeltalike(self, tz_naive_fixture, two_hours): tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) expected = pd.date_range('1999-12-31 22:00', '2000-01-31 22:00', tz=tz) - rng -= delta + rng -= two_hours tm.assert_index_equal(rng, expected) # ------------------------------------------------------------- @@ -1252,27 +1226,23 @@ def test_dti_isub_tdi(self, tz_naive_fixture): # TODO: A couple other tests belong in this section. Move them in # A PR where there isn't already a giant diff. - def test_add_datetimelike_and_dti(self, addend): + @pytest.mark.parametrize('addend', [ + datetime(2011, 1, 1), + DatetimeIndex(['2011-01-01', '2011-01-02']), + DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize('US/Eastern'), + np.datetime64('2011-01-01'), + Timestamp('2011-01-01') + ], ids=lambda x: type(x).__name__) + @pytest.mark.parametrize('tz', [None, 'US/Eastern']) + def test_add_datetimelike_and_dti(self, addend, tz): # GH#9631 - dti = DatetimeIndex(['2011-01-01', '2011-01-02']) - msg = 'cannot add DatetimeIndex and {0}'.format( - type(addend).__name__) + dti = DatetimeIndex(['2011-01-01', '2011-01-02']).tz_localize(tz) + msg = 'cannot add DatetimeIndex and {0}'.format(type(addend).__name__) with tm.assert_raises_regex(TypeError, msg): dti + addend with tm.assert_raises_regex(TypeError, msg): addend + dti - def test_add_datetimelike_and_dti_tz(self, addend): - # GH#9631 - dti_tz = DatetimeIndex(['2011-01-01', - '2011-01-02']).tz_localize('US/Eastern') - msg = 'cannot add DatetimeIndex and {0}'.format( - type(addend).__name__) - with tm.assert_raises_regex(TypeError, msg): - dti_tz + addend - with tm.assert_raises_regex(TypeError, msg): - addend + dti_tz - # ------------------------------------------------------------- # __add__/__sub__ with ndarray[datetime64] and ndarray[timedelta64] @@ -1391,21 +1361,14 @@ def test_sub_period(self, freq, box): with pytest.raises(TypeError): p - idx - @pytest.mark.parametrize('box', [ - pd.Index, - pd.Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Tries to broadcast " - "incorrectly", - strict=True, - raises=ValueError)) - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('op', [operator.add, ops.radd, operator.sub, ops.rsub]) @pytest.mark.parametrize('pi_freq', ['D', 'W', 'Q', 'H']) @pytest.mark.parametrize('dti_freq', [None, 'D']) - def test_dti_sub_pi(self, dti_freq, pi_freq, op, box): + def test_dti_sub_pi(self, dti_freq, pi_freq, op, box_df_broadcast_failure): # GH#20049 subtracting PeriodIndex should raise TypeError + box = box_df_broadcast_failure + dti = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=dti_freq) pi = dti.to_period(pi_freq) @@ -1748,31 +1711,30 @@ def test_dti_add_offset_tzaware(self, tz_aware_fixture, box): tm.assert_equal(offset, expected) -@pytest.mark.parametrize('klass,assert_func', [ - (Series, tm.assert_series_equal), - (DatetimeIndex, tm.assert_index_equal)]) -def test_dt64_with_offset_array(klass, assert_func): +@pytest.mark.parametrize('klass', [Series, DatetimeIndex]) +def test_dt64_with_offset_array(klass): # GH#10699 # array of offsets box = Series if klass is Series else pd.Index + dti = DatetimeIndex([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) + + s = klass(dti) + with tm.assert_produces_warning(PerformanceWarning): - s = klass([Timestamp('2000-1-1'), Timestamp('2000-2-1')]) result = s + box([pd.offsets.DateOffset(years=1), pd.offsets.MonthEnd()]) exp = klass([Timestamp('2001-1-1'), Timestamp('2000-2-29')]) - assert_func(result, exp) + tm.assert_equal(result, exp) # same offset result = s + box([pd.offsets.DateOffset(years=1), pd.offsets.DateOffset(years=1)]) exp = klass([Timestamp('2001-1-1'), Timestamp('2001-2-1')]) - assert_func(result, exp) + tm.assert_equal(result, exp) -@pytest.mark.parametrize('klass,assert_func', [ - (Series, tm.assert_series_equal), - (DatetimeIndex, tm.assert_index_equal)]) -def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): +@pytest.mark.parametrize('klass', [Series, DatetimeIndex]) +def test_dt64_with_DateOffsets_relativedelta(klass): # GH#10699 vec = klass([Timestamp('2000-01-05 00:15:00'), Timestamp('2000-01-31 00:23:00'), @@ -1789,11 +1751,11 @@ def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): ('microseconds', 5)] for i, kwd in enumerate(relative_kwargs): op = pd.DateOffset(**dict([kwd])) - assert_func(klass([x + op for x in vec]), vec + op) - assert_func(klass([x - op for x in vec]), vec - op) + tm.assert_equal(klass([x + op for x in vec]), vec + op) + tm.assert_equal(klass([x - op for x in vec]), vec - op) op = pd.DateOffset(**dict(relative_kwargs[:i + 1])) - assert_func(klass([x + op for x in vec]), vec + op) - assert_func(klass([x - op for x in vec]), vec - op) + tm.assert_equal(klass([x + op for x in vec]), vec + op) + tm.assert_equal(klass([x - op for x in vec]), vec - op) @pytest.mark.parametrize('cls_and_kwargs', [ @@ -1816,10 +1778,8 @@ def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): 'Easter', ('DateOffset', {'day': 4}), ('DateOffset', {'month': 5})]) @pytest.mark.parametrize('normalize', [True, False]) -@pytest.mark.parametrize('klass,assert_func', [ - (Series, tm.assert_series_equal), - (DatetimeIndex, tm.assert_index_equal)]) -def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs): +@pytest.mark.parametrize('klass', [Series, DatetimeIndex]) +def test_dt64_with_DateOffsets(klass, normalize, cls_and_kwargs): # GH#10699 # assert these are equal on a piecewise basis vec = klass([Timestamp('2000-01-05 00:15:00'), @@ -1849,26 +1809,24 @@ def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs): continue offset = offset_cls(n, normalize=normalize, **kwargs) - assert_func(klass([x + offset for x in vec]), vec + offset) - assert_func(klass([x - offset for x in vec]), vec - offset) - assert_func(klass([offset + x for x in vec]), offset + vec) + tm.assert_equal(klass([x + offset for x in vec]), vec + offset) + tm.assert_equal(klass([x - offset for x in vec]), vec - offset) + tm.assert_equal(klass([offset + x for x in vec]), offset + vec) -@pytest.mark.parametrize('klass,assert_func', zip([Series, DatetimeIndex], - [tm.assert_series_equal, - tm.assert_index_equal])) -def test_datetime64_with_DateOffset(klass, assert_func): +@pytest.mark.parametrize('klass', [Series, DatetimeIndex]) +def test_datetime64_with_DateOffset(klass): # GH#10699 s = klass(date_range('2000-01-01', '2000-01-31'), name='a') result = s + pd.DateOffset(years=1) result2 = pd.DateOffset(years=1) + s exp = klass(date_range('2001-01-01', '2001-01-31'), name='a') - assert_func(result, exp) - assert_func(result2, exp) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) result = s - pd.DateOffset(years=1) exp = klass(date_range('1999-01-01', '1999-01-31'), name='a') - assert_func(result, exp) + tm.assert_equal(result, exp) s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), pd.Timestamp('2000-02-15', tz='US/Central')], name='a') @@ -1876,8 +1834,8 @@ def test_datetime64_with_DateOffset(klass, assert_func): result2 = pd.offsets.Day() + s exp = klass([Timestamp('2000-01-16 00:15:00', tz='US/Central'), Timestamp('2000-02-16', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) s = klass([Timestamp('2000-01-15 00:15:00', tz='US/Central'), pd.Timestamp('2000-02-15', tz='US/Central')], name='a') @@ -1885,8 +1843,8 @@ def test_datetime64_with_DateOffset(klass, assert_func): result2 = pd.offsets.MonthEnd() + s exp = klass([Timestamp('2000-01-31 00:15:00', tz='US/Central'), Timestamp('2000-02-29', tz='US/Central')], name='a') - assert_func(result, exp) - assert_func(result2, exp) + tm.assert_equal(result, exp) + tm.assert_equal(result2, exp) @pytest.mark.parametrize('years', [-1, 0, 1]) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index d3957330f11e4..fcfc3994a88c8 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -17,15 +17,6 @@ from pandas import Timedelta, Series, Index, TimedeltaIndex -@pytest.fixture(params=[pd.Float64Index(np.arange(5, dtype='float64')), - pd.UInt64Index(np.arange(5, dtype='uint64')), - pd.Int64Index(np.arange(5, dtype='int64')), - pd.RangeIndex(5)], - ids=lambda x: type(x).__name__) -def idx(request): - return request.param - - # ------------------------------------------------------------------ # Comparisons @@ -135,20 +126,18 @@ def test_ops_series(self): tm.assert_series_equal(expected, td * other) tm.assert_series_equal(expected, other * td) - @pytest.mark.parametrize('index', [ - pd.Int64Index(range(1, 11)), - pd.UInt64Index(range(1, 11)), - pd.Float64Index(range(1, 11)), - pd.RangeIndex(1, 11)], - ids=lambda x: type(x).__name__) + # TODO: also test non-nanosecond timedelta64 and Tick objects; + # see test_numeric_arr_rdiv_tdscalar for note on these failing @pytest.mark.parametrize('scalar_td', [ Timedelta(days=1), Timedelta(days=1).to_timedelta64(), Timedelta(days=1).to_pytimedelta()], ids=lambda x: type(x).__name__) - def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box): + def test_numeric_arr_mul_tdscalar(self, scalar_td, numeric_idx, box): # GH#19333 - expected = pd.timedelta_range('1 days', '10 days') + index = numeric_idx + + expected = pd.timedelta_range('0 days', '4 days') index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) @@ -159,28 +148,27 @@ def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box): commute = scalar_td * index tm.assert_equal(commute, expected) - @pytest.mark.parametrize('index', [ - pd.Int64Index(range(1, 3)), - pd.UInt64Index(range(1, 3)), - pd.Float64Index(range(1, 3)), - pd.RangeIndex(1, 3)], - ids=lambda x: type(x).__name__) - @pytest.mark.parametrize('scalar_td', [ - Timedelta(days=1), - Timedelta(days=1).to_timedelta64(), - Timedelta(days=1).to_pytimedelta()], - ids=lambda x: type(x).__name__) - def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box): - expected = TimedeltaIndex(['1 Day', '12 Hours']) + def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box): + index = numeric_idx[1:3] + + broken = (isinstance(three_days, np.timedelta64) and + three_days.dtype != 'm8[ns]') + broken = broken or isinstance(three_days, pd.offsets.Tick) + if box is not pd.Index and broken: + # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D') + raise pytest.xfail("timedelta64 not converted to nanos; " + "Tick division not imlpemented") + + expected = TimedeltaIndex(['3 Days', '36 Hours']) index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) - result = scalar_td / index + result = three_days / index tm.assert_equal(result, expected) with pytest.raises(TypeError): - index / scalar_td + index / three_days # ------------------------------------------------------------------ @@ -188,7 +176,9 @@ def test_numeric_arr_rdiv_tdscalar(self, scalar_td, index, box): class TestDivisionByZero(object): - def test_div_zero(self, zero, idx): + def test_div_zero(self, zero, numeric_idx): + idx = numeric_idx + expected = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) result = idx / zero @@ -196,7 +186,9 @@ def test_div_zero(self, zero, idx): ser_compat = Series(idx).astype('i8') / np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - def test_floordiv_zero(self, zero, idx): + def test_floordiv_zero(self, zero, numeric_idx): + idx = numeric_idx + expected = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) @@ -205,7 +197,9 @@ def test_floordiv_zero(self, zero, idx): ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - def test_mod_zero(self, zero, idx): + def test_mod_zero(self, zero, numeric_idx): + idx = numeric_idx + expected = pd.Index([np.nan, np.nan, np.nan, np.nan, np.nan], dtype=np.float64) result = idx % zero @@ -213,7 +207,8 @@ def test_mod_zero(self, zero, idx): ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8') tm.assert_series_equal(ser_compat, Series(result)) - def test_divmod_zero(self, zero, idx): + def test_divmod_zero(self, zero, numeric_idx): + idx = numeric_idx exleft = pd.Index([np.nan, np.inf, np.inf, np.inf, np.inf], dtype=np.float64) @@ -430,8 +425,9 @@ def test_div_equiv_binop(self): result = second / first tm.assert_series_equal(result, expected) - def test_div_int(self, idx): + def test_div_int(self, numeric_idx): # truediv under PY3 + idx = numeric_idx result = idx / 1 expected = idx if PY3: @@ -445,13 +441,15 @@ def test_div_int(self, idx): tm.assert_index_equal(result, expected) @pytest.mark.parametrize('op', [operator.mul, ops.rmul, operator.floordiv]) - def test_mul_int_identity(self, op, idx, box): + def test_mul_int_identity(self, op, numeric_idx, box): + idx = numeric_idx idx = tm.box_expected(idx, box) result = op(idx, 1) tm.assert_equal(result, idx) - def test_mul_int_array(self, idx): + def test_mul_int_array(self, numeric_idx): + idx = numeric_idx didx = idx * idx result = idx * np.array(5, dtype='int64') @@ -461,39 +459,45 @@ def test_mul_int_array(self, idx): result = idx * np.arange(5, dtype=arr_dtype) tm.assert_index_equal(result, didx) - def test_mul_int_series(self, idx): + def test_mul_int_series(self, numeric_idx): + idx = numeric_idx didx = idx * idx arr_dtype = 'uint64' if isinstance(idx, pd.UInt64Index) else 'int64' result = idx * Series(np.arange(5, dtype=arr_dtype)) tm.assert_series_equal(result, Series(didx)) - def test_mul_float_series(self, idx): + def test_mul_float_series(self, numeric_idx): + idx = numeric_idx rng5 = np.arange(5, dtype='float64') result = idx * Series(rng5 + 0.1) expected = Series(rng5 * (rng5 + 0.1)) tm.assert_series_equal(result, expected) - def test_mul_index(self, idx): + def test_mul_index(self, numeric_idx): # in general not true for RangeIndex + idx = numeric_idx if not isinstance(idx, pd.RangeIndex): result = idx * idx tm.assert_index_equal(result, idx ** 2) - def test_mul_datelike_raises(self, idx): + def test_mul_datelike_raises(self, numeric_idx): + idx = numeric_idx with pytest.raises(TypeError): idx * pd.date_range('20130101', periods=5) - def test_mul_size_mismatch_raises(self, idx): + def test_mul_size_mismatch_raises(self, numeric_idx): + idx = numeric_idx with pytest.raises(ValueError): idx * idx[0:3] with pytest.raises(ValueError): idx * np.array([1, 2]) @pytest.mark.parametrize('op', [operator.pow, ops.rpow]) - def test_pow_float(self, op, idx, box): + def test_pow_float(self, op, numeric_idx, box): # test power calculations both ways, GH#14973 + idx = numeric_idx expected = pd.Float64Index(op(idx.values, 2.0)) idx = tm.box_expected(idx, box) @@ -502,8 +506,9 @@ def test_pow_float(self, op, idx, box): result = op(idx, 2.0) tm.assert_equal(result, expected) - def test_modulo(self, idx, box): + def test_modulo(self, numeric_idx, box): # GH#9244 + idx = numeric_idx expected = Index(idx.values % 2) idx = tm.box_expected(idx, box) @@ -512,7 +517,8 @@ def test_modulo(self, idx, box): result = idx % 2 tm.assert_equal(result, expected) - def test_divmod(self, idx): + def test_divmod(self, numeric_idx): + idx = numeric_idx result = divmod(idx, 2) with np.errstate(all='ignore'): div, mod = divmod(idx.values, 2) @@ -530,7 +536,8 @@ def test_divmod(self, idx): @pytest.mark.xfail(reason='GH#19252 Series has no __rdivmod__', strict=True) - def test_divmod_series(self, idx): + def test_divmod_series(self, numeric_idx): + idx = numeric_idx other = np.ones(idx.values.shape, dtype=idx.values.dtype) * 2 result = divmod(idx, Series(other)) with np.errstate(all='ignore'): diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 92123bf48bb47..3210290b9c5c8 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -3,7 +3,6 @@ # behave identically. # Specifically for Period dtype import operator -from datetime import timedelta import numpy as np import pytest @@ -17,80 +16,10 @@ import pandas.core.indexes.period as period from pandas.core import ops from pandas import ( - Period, PeriodIndex, period_range, Timedelta, Series, + Period, PeriodIndex, period_range, Series, _np_version_under1p10) -# ------------------------------------------------------------------ -# Fixtures - -_common_mismatch = [pd.offsets.YearBegin(2), - pd.offsets.MonthBegin(1), - pd.offsets.Minute()] - - -@pytest.fixture(params=[timedelta(minutes=30), - np.timedelta64(30, 's'), - Timedelta(seconds=30)] + _common_mismatch) -def not_hourly(request): - """ - Several timedelta-like and DateOffset instances that are _not_ - compatible with Hourly frequencies. - """ - return request.param - - -@pytest.fixture(params=[np.timedelta64(4, 'h'), - timedelta(hours=23), - Timedelta('23:00:00')] + _common_mismatch) -def not_daily(request): - """ - Several timedelta-like and DateOffset instances that are _not_ - compatible with Daily frequencies. - """ - return request.param - - -@pytest.fixture(params=[np.timedelta64(365, 'D'), - timedelta(365), - Timedelta(days=365)] + _common_mismatch) -def mismatched(request): - """ - Several timedelta-like and DateOffset instances that are _not_ - compatible with Monthly or Annual frequencies. - """ - return request.param - - -@pytest.fixture(params=[pd.offsets.Day(3), - timedelta(days=3), - np.timedelta64(3, 'D'), - pd.offsets.Hour(72), - timedelta(minutes=60 * 24 * 3), - np.timedelta64(72, 'h'), - Timedelta('72:00:00')]) -def three_days(request): - """ - Several timedelta-like and DateOffset objects that each represent - a 3-day timedelta - """ - return request.param - - -@pytest.fixture(params=[pd.offsets.Hour(2), - timedelta(hours=2), - np.timedelta64(2, 'h'), - pd.offsets.Minute(120), - timedelta(minutes=120), - np.timedelta64(120, 'm')]) -def two_hours(request): - """ - Several timedelta-like and DateOffset objects that each represent - a 2-hour timedelta - """ - return request.param - - # ------------------------------------------------------------------ # Comparisons @@ -752,8 +681,9 @@ def test_add_iadd_timedeltalike_annual(self): rng += pd.offsets.YearEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, mismatched): - other = mismatched + def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, + mismatched_freq): + other = mismatched_freq rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' 'from PeriodIndex\\(freq=A-DEC\\)') @@ -762,8 +692,9 @@ def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, mismatched): with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng += other - def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, mismatched): - other = mismatched + def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, + mismatched_freq): + other = mismatched_freq rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' 'from PeriodIndex\\(freq=A-DEC\\)') @@ -782,8 +713,9 @@ def test_pi_add_iadd_timedeltalike_M(self): rng += pd.offsets.MonthEnd(5) tm.assert_index_equal(rng, expected) - def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched): - other = mismatched + def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, + mismatched_freq): + other = mismatched_freq rng = pd.period_range('2014-01', '2016-12', freq='M') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -791,8 +723,9 @@ def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched): with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng += other - def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, mismatched): - other = mismatched + def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, + mismatched_freq): + other = mismatched_freq rng = pd.period_range('2014-01', '2016-12', freq='M') msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index def7a8be95fc8..5050922173564 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -18,60 +18,6 @@ DataFrame) -# ------------------------------------------------------------------ -# Fixtures - -@pytest.fixture -def tdser(): - """ - Return a Series with dtype='timedelta64[ns]', including a NaT. - """ - return Series(['59 Days', '59 Days', 'NaT'], dtype='timedelta64[ns]') - - -@pytest.fixture(params=[pd.offsets.Hour(2), timedelta(hours=2), - np.timedelta64(2, 'h'), Timedelta(hours=2)], - ids=lambda x: type(x).__name__) -def delta(request): - """ - Several ways of representing two hours - """ - return request.param - - -@pytest.fixture(params=[timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()], - ids=lambda x: type(x).__name__) -def scalar_td(request): - """ - Several variants of Timedelta scalars representing 5 minutes and 4 seconds - """ - return request.param - - -@pytest.fixture(params=[pd.Index, Series, pd.DataFrame], - ids=lambda x: x.__name__) -def box(request): - """ - Several array-like containers that should have effectively identical - behavior with respect to arithmetic operations. - """ - return request.param - - -@pytest.fixture(params=[pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(strict=True))], - ids=lambda x: x.__name__) -def box_df_fail(request): - """ - Fixture equivalent to `box` fixture but xfailing the DataFrame case. - """ - return request.param - - # ------------------------------------------------------------------ # Timedelta64[ns] dtype Comparisons @@ -522,8 +468,8 @@ def test_td64arr_add_sub_timestamp(self, box): with pytest.raises(TypeError): tdser - ts - def test_tdi_sub_dt64_array(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + def test_tdi_sub_dt64_array(self, box_df_broadcast_failure): + box = box_df_broadcast_failure dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) @@ -540,8 +486,8 @@ def test_tdi_sub_dt64_array(self, box_df_fail): result = dtarr - tdi tm.assert_equal(result, expected) - def test_tdi_add_dt64_array(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + def test_tdi_add_dt64_array(self, box_df_broadcast_failure): + box = box_df_broadcast_failure dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) @@ -559,43 +505,33 @@ def test_tdi_add_dt64_array(self, box_df_fail): # ------------------------------------------------------------------ # Operations with int-like others - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Attempts to broadcast " - "incorrectly", - strict=True, raises=ValueError)) - ], ids=lambda x: x.__name__) - def test_td64arr_add_int_series_invalid(self, box, tdser): + def test_td64arr_add_int_series_invalid(self, box_df_broadcast_failure, + tdser): + box = box_df_broadcast_failure tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): tdser + Series([2, 3, 4]) - def test_td64arr_radd_int_series_invalid(self, box_df_fail, tdser): - box = box_df_fail # Tries to broadcast incorrectly + def test_td64arr_radd_int_series_invalid(self, box_df_broadcast_failure, + tdser): + box = box_df_broadcast_failure tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): Series([2, 3, 4]) + tdser - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Attempts to broadcast " - "incorrectly", - strict=True, raises=ValueError)) - ], ids=lambda x: x.__name__) - def test_td64arr_sub_int_series_invalid(self, box, tdser): + def test_td64arr_sub_int_series_invalid(self, box_df_broadcast_failure, + tdser): + box = box_df_broadcast_failure tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): tdser - Series([2, 3, 4]) - def test_td64arr_rsub_int_series_invalid(self, box_df_fail, tdser): - box = box_df_fail # Tries to broadcast incorrectly + def test_td64arr_rsub_int_series_invalid(self, box_df_broadcast_failure, + tdser): + box = box_df_broadcast_failure tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): @@ -669,9 +605,9 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): Series([1, 2, 3]) # TODO: Add DataFrame in here? ], ids=lambda x: type(x).__name__) - def test_td64arr_add_sub_numeric_arr_invalid(self, box_df_fail, vec, - dtype, tdser): - box = box_df_fail # tries to broadcast incorrectly + def test_td64arr_add_sub_numeric_arr_invalid( + self, box_df_broadcast_failure, vec, dtype, tdser): + box = box_df_broadcast_failure tdser = tm.box_expected(tdser, box) err = TypeError if box is pd.Index and not dtype.startswith('float'): @@ -744,8 +680,8 @@ def test_timedelta64_operations_with_timedeltas(self): # roundtrip tm.assert_series_equal(result + td2, td1) - def test_td64arr_add_td64_array(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + def test_td64arr_add_td64_array(self, box_df_broadcast_failure): + box = box_df_broadcast_failure dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) @@ -760,8 +696,8 @@ def test_td64arr_add_td64_array(self, box_df_fail): result = tdarr + tdi tm.assert_equal(result, expected) - def test_td64arr_sub_td64_array(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + def test_td64arr_sub_td64_array(self, box_df_broadcast_failure): + box = box_df_broadcast_failure dti = pd.date_range('2016-01-01', periods=3) tdi = dti - dti.shift(1) @@ -843,7 +779,7 @@ def test_td64arr_sub_NaT(self, box): res = ser - pd.NaT tm.assert_equal(res, expected) - def test_td64arr_add_timedeltalike(self, delta, box): + def test_td64arr_add_timedeltalike(self, two_hours, box): # only test adding/sub offsets as + is now numeric rng = timedelta_range('1 days', '10 days') expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', @@ -851,10 +787,10 @@ def test_td64arr_add_timedeltalike(self, delta, box): rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, box) - result = rng + delta + result = rng + two_hours tm.assert_equal(result, expected) - def test_td64arr_sub_timedeltalike(self, delta, box): + def test_td64arr_sub_timedeltalike(self, two_hours, box): # only test adding/sub offsets as - is now numeric rng = timedelta_range('1 days', '10 days') expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') @@ -862,7 +798,7 @@ def test_td64arr_sub_timedeltalike(self, delta, box): rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, box) - result = rng - delta + result = rng - two_hours tm.assert_equal(result, expected) # ------------------------------------------------------------------ @@ -934,9 +870,9 @@ def test_td64arr_add_offset_index(self, names, box): # TODO: combine with test_td64arr_add_offset_index by parametrizing # over second box? - def test_td64arr_add_offset_array(self, box_df_fail): + def test_td64arr_add_offset_array(self, box_df_broadcast_failure): # GH#18849 - box = box_df_fail # tries to broadcast incorrectly + box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) @@ -957,9 +893,9 @@ def test_td64arr_add_offset_array(self, box_df_fail): @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_sub_offset_index(self, names, box_df_fail): + def test_td64arr_sub_offset_index(self, names, box_df_broadcast_failure): # GH#18824, GH#19744 - box = box_df_fail # tries to broadcast incorrectly + box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], name=names[0]) other = pd.Index([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], @@ -975,9 +911,9 @@ def test_td64arr_sub_offset_index(self, names, box_df_fail): res = tdi - other tm.assert_equal(res, expected) - def test_td64arr_sub_offset_array(self, box_df_fail): + def test_td64arr_sub_offset_array(self, box_df_broadcast_failure): # GH#18824 - box = box_df_fail # tries to broadcast incorrectly + box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)]) @@ -994,9 +930,9 @@ def test_td64arr_sub_offset_array(self, box_df_fail): @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_with_offset_series(self, names, box_df_fail): + def test_td64arr_with_offset_series(self, names, box_df_broadcast_failure): # GH#18849 - box = box_df_fail # tries to broadcast incorrectly + box = box_df_broadcast_failure box2 = Series if box is pd.Index else box tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], @@ -1027,9 +963,10 @@ def test_td64arr_with_offset_series(self, names, box_df_fail): tm.assert_equal(res3, expected_sub) @pytest.mark.parametrize('obox', [np.array, pd.Index, pd.Series]) - def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_df_fail): + def test_td64arr_addsub_anchored_offset_arraylike( + self, obox, box_df_broadcast_failure): # GH#18824 - box = box_df_fail # DataFrame tries to broadcast incorrectly + box = box_df_broadcast_failure tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) tdi = tm.box_expected(tdi, box) @@ -1090,11 +1027,11 @@ def test_td64arr_mul_int(self, box): result = 1 * idx tm.assert_equal(result, idx) - def test_td64arr_mul_tdlike_scalar_raises(self, delta, box): + def test_td64arr_mul_tdlike_scalar_raises(self, two_hours, box): rng = timedelta_range('1 days', '10 days', name='foo') rng = tm.box_expected(rng, box) with pytest.raises(TypeError): - rng * delta + rng * two_hours def test_tdi_mul_int_array_zerodim(self, box): rng5 = np.arange(5, dtype='int64') @@ -1107,8 +1044,8 @@ def test_tdi_mul_int_array_zerodim(self, box): result = idx * np.array(5, dtype='int64') tm.assert_equal(result, expected) - def test_tdi_mul_int_array(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + def test_tdi_mul_int_array(self, box_df_broadcast_failure): + box = box_df_broadcast_failure rng5 = np.arange(5, dtype='int64') idx = TimedeltaIndex(rng5) expected = TimedeltaIndex(rng5 ** 2) @@ -1120,7 +1057,7 @@ def test_tdi_mul_int_array(self, box_df_fail): tm.assert_equal(result, expected) def test_tdi_mul_int_series(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + box = box_df_fail idx = TimedeltaIndex(np.arange(5, dtype='int64')) expected = TimedeltaIndex(np.arange(5, dtype='int64') ** 2) @@ -1133,7 +1070,7 @@ def test_tdi_mul_int_series(self, box_df_fail): tm.assert_equal(result, expected) def test_tdi_mul_float_series(self, box_df_fail): - box = box_df_fail # DataFrame tries to broadcast incorrectly + box = box_df_fail idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box) @@ -1186,7 +1123,7 @@ def test_td64arr_div_int(self, box): result = idx / 1 tm.assert_equal(result, idx) - def test_tdi_div_tdlike_scalar(self, delta, box): + def test_tdi_div_tdlike_scalar(self, two_hours, box): # GH#20088, GH#22163 ensure DataFrame returns correct dtype rng = timedelta_range('1 days', '10 days', name='foo') expected = pd.Float64Index((np.arange(10) + 1) * 12, name='foo') @@ -1194,17 +1131,17 @@ def test_tdi_div_tdlike_scalar(self, delta, box): rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, box) - result = rng / delta + result = rng / two_hours tm.assert_equal(result, expected) - def test_tdi_div_tdlike_scalar_with_nat(self, delta, box): + def test_tdi_div_tdlike_scalar_with_nat(self, two_hours, box): rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = pd.Float64Index([12, np.nan, 24], name='foo') rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, box) - result = rng / delta + result = rng / two_hours tm.assert_equal(result, expected) # ------------------------------------------------------------------ @@ -1260,14 +1197,14 @@ def test_td64arr_floordiv_int(self, box): result = idx // 1 tm.assert_equal(result, idx) - def test_td64arr_floordiv_tdlike_scalar(self, delta, box): + def test_td64arr_floordiv_tdlike_scalar(self, two_hours, box): tdi = timedelta_range('1 days', '10 days', name='foo') expected = pd.Int64Index((np.arange(10) + 1) * 12, name='foo') tdi = tm.box_expected(tdi, box) expected = tm.box_expected(expected, box) - result = tdi // delta + result = tdi // two_hours tm.assert_equal(result, expected) # TODO: Is this redundant with test_td64arr_floordiv_tdlike_scalar? @@ -1364,14 +1301,6 @@ def test_td64arr_div_numeric_scalar(self, box, two, tdser): result = tdser / two tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="broadcasts along " - "wrong axis", - strict=True)) - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('dtype', ['int64', 'int32', 'int16', 'uint64', 'uint32', 'uint16', 'uint8', 'float64', 'float32', 'float16']) @@ -1380,9 +1309,11 @@ def test_td64arr_div_numeric_scalar(self, box, two, tdser): Series([20, 30, 40])], ids=lambda x: type(x).__name__) @pytest.mark.parametrize('op', [operator.mul, ops.rmul]) - def test_td64arr_rmul_numeric_array(self, op, box, vector, dtype, tdser): + def test_td64arr_rmul_numeric_array(self, op, box_df_fail, + vector, dtype, tdser): # GH#4521 # divide/multiply by integers + box = box_df_fail # broadcasts incorrectly but doesn't raise vector = vector.astype(dtype) expected = Series(['1180 Days', '1770 Days', 'NaT'], @@ -1428,22 +1359,15 @@ def test_td64arr_div_numeric_array(self, box, vector, dtype, tdser): with pytest.raises(TypeError): vector / tdser - # TODO: Should we be parametrizing over types for `ser` too? - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="broadcasts along " - "wrong axis", - strict=True)) - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('names', [(None, None, None), ('Egon', 'Venkman', None), ('NCC1701D', 'NCC1701D', 'NCC1701D')]) - def test_td64arr_mul_int_series(self, box, names): + def test_td64arr_mul_int_series(self, box_df_fail, names): # GH#19042 test for correct name attachment + box = box_df_fail # broadcasts along wrong axis, but doesn't raise tdi = TimedeltaIndex(['0days', '1day', '2days', '3days', '4days'], name=names[0]) + # TODO: Should we be parametrizing over types for `ser` too? ser = Series([0, 1, 2, 3, 4], dtype=np.int64, name=names[1]) expected = Series(['0days', '1day', '4days', '9days', '16days'], @@ -1491,10 +1415,6 @@ def test_float_series_rdiv_td64arr(self, box, names): class TestTimedeltaArraylikeInvalidArithmeticOps(object): - @pytest.mark.parametrize('scalar_td', [ - timedelta(minutes=5, seconds=4), - Timedelta('5m4s'), - Timedelta('5m4s').to_timedelta64()]) def test_td64arr_pow_invalid(self, scalar_td, box): td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan From 2ec957b0c2e87ae06eba6b4c9421088ce2ad4f19 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 12 Sep 2018 04:33:43 -0700 Subject: [PATCH 38/86] pythonize cython code (#22638) --- .coveragerc | 30 ------------------ pandas/_libs/algos.pyx | 6 ++-- pandas/_libs/hashing.pyx | 16 ++-------- pandas/_libs/index.pyx | 15 ++++----- pandas/_libs/internals.pyx | 47 +++++++++++++++++------------ pandas/_libs/interval.pyx | 2 +- pandas/_libs/lib.pyx | 42 ++++++++++++-------------- pandas/_libs/missing.pyx | 13 ++++---- pandas/_libs/ops.pyx | 11 +++---- pandas/_libs/parsers.pyx | 14 ++++----- pandas/_libs/properties.pyx | 2 +- pandas/_libs/reduction.pyx | 2 +- pandas/_libs/sparse.pyx | 2 +- pandas/_libs/testing.pyx | 7 +++++ pandas/_libs/tslib.pyx | 8 ++--- pandas/_libs/tslibs/ccalendar.pyx | 4 +-- pandas/_libs/tslibs/conversion.pyx | 4 +-- pandas/_libs/tslibs/fields.pyx | 4 +-- pandas/_libs/tslibs/frequencies.pyx | 4 +-- pandas/_libs/tslibs/nattype.pyx | 1 - pandas/_libs/tslibs/np_datetime.pyx | 4 +-- pandas/_libs/tslibs/offsets.pyx | 4 +-- pandas/_libs/tslibs/parsing.pyx | 2 +- pandas/_libs/tslibs/period.pyx | 3 +- pandas/_libs/tslibs/resolution.pyx | 2 +- pandas/_libs/tslibs/strptime.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 6 ++-- pandas/_libs/tslibs/timezones.pyx | 2 +- pandas/_libs/window.pyx | 4 +-- pandas/_libs/writers.pyx | 14 +++++---- setup.cfg | 30 ++++++++++++++++++ 31 files changed, 152 insertions(+), 155 deletions(-) delete mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 13baa100b84b7..0000000000000 --- a/.coveragerc +++ /dev/null @@ -1,30 +0,0 @@ -# .coveragerc to control coverage.py -[run] -branch = False -omit = */tests/* -plugins = Cython.Coverage - -[report] -# Regexes for lines to exclude from consideration -exclude_lines = - # Have to re-enable the standard pragma - pragma: no cover - - # Don't complain about missing debug-only code: - def __repr__ - if self\.debug - - # Don't complain if tests don't hit defensive assertion code: - raise AssertionError - raise NotImplementedError - AbstractMethodError - - # Don't complain if non-runnable code isn't run: - if 0: - if __name__ == .__main__.: - -ignore_errors = False -show_missing = True - -[html] -directory = coverage_html_report diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 249033b8636bd..415e7026e09c8 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libc.stdlib cimport malloc, free from libc.string cimport memmove @@ -114,7 +114,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(ndarray[int64_t] arr): @cython.wraparound(False) @cython.boundscheck(False) -def is_lexsorted(list list_of_arrays): +def is_lexsorted(list_of_arrays: list) -> bint: cdef: Py_ssize_t i Py_ssize_t n, nlevels diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 88b4d97de492c..c2305c8f3ff00 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -3,7 +3,6 @@ # at https://github.com/veorq/SipHash import cython -from cpython cimport PyBytes_Check, PyUnicode_Check from libc.stdlib cimport malloc, free import numpy as np @@ -44,6 +43,7 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): char **vecs char *cdata object val + list datas = [] k = key.encode(encoding) kb = k @@ -57,12 +57,11 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): vecs = malloc(n * sizeof(char *)) lens = malloc(n * sizeof(uint64_t)) - cdef list datas = [] for i in range(n): val = arr[i] - if PyBytes_Check(val): + if isinstance(val, bytes): data = val - elif PyUnicode_Check(val): + elif isinstance(val, unicode): data = val.encode(encoding) elif val is None or is_nan(val): # null, stringify and encode @@ -132,15 +131,6 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1, v2[0] = _rotl(v2[0], 32) -# TODO: This appears unused; remove? -cpdef uint64_t siphash(bytes data, bytes key) except? 0: - if len(key) != 16: - raise ValueError("key should be a 16-byte bytestring, " - "got {key} (len {klen})" - .format(key=key, klen=len(key))) - return low_level_siphash(data, len(data), key) - - @cython.cdivision(True) cdef uint64_t low_level_siphash(uint8_t* data, size_t datalen, uint8_t* key) nogil: diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index d5846f2b42378..562c1ba218141 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- from datetime import datetime, timedelta, date -cimport cython - -from cpython cimport PyTuple_Check, PyList_Check -from cpython.slice cimport PySlice_Check +import cython import numpy as np cimport numpy as cnp @@ -30,15 +27,15 @@ cdef int64_t iNaT = util.get_nat() cdef inline bint is_definitely_invalid_key(object val): - if PyTuple_Check(val): + if isinstance(val, tuple): try: hash(val) except TypeError: return True # we have a _data, means we are a NDFrame - return (PySlice_Check(val) or util.is_array(val) - or PyList_Check(val) or hasattr(val, '_data')) + return (isinstance(val, slice) or util.is_array(val) + or isinstance(val, list) or hasattr(val, '_data')) cpdef get_value_at(ndarray arr, object loc, object tz=None): @@ -88,7 +85,7 @@ cdef class IndexEngine: void* data_ptr loc = self.get_loc(key) - if PySlice_Check(loc) or util.is_array(loc): + if isinstance(loc, slice) or util.is_array(loc): return arr[loc] else: return get_value_at(arr, loc, tz=tz) @@ -640,7 +637,7 @@ cdef class BaseMultiIndexCodesEngine: def get_loc(self, object key): if is_definitely_invalid_key(key): raise TypeError("'{key}' is an invalid key".format(key=key)) - if not PyTuple_Check(key): + if not isinstance(key, tuple): raise KeyError(key) try: indices = [0 if checknull(v) else lev.get_loc(v) + 1 diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 996570dae3302..681530ed494d7 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,10 +1,9 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport PyObject -from cpython.slice cimport PySlice_Check cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX @@ -30,14 +29,15 @@ cdef class BlockPlacement: cdef bint _has_slice, _has_array, _is_known_slice_like def __init__(self, val): - cdef slice slc + cdef: + slice slc self._as_slice = None self._as_array = None self._has_slice = False self._has_array = False - if PySlice_Check(val): + if isinstance(val, slice): slc = slice_canonize(val) if slc.start != slc.stop: @@ -55,7 +55,8 @@ cdef class BlockPlacement: self._has_array = True def __str__(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: v = self._as_slice else: @@ -66,15 +67,17 @@ cdef class BlockPlacement: __repr__ = __str__ def __len__(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: return slice_len(s) else: return len(self._as_array) def __iter__(self): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t start, stop, step, _ + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t start, stop, step, _ if s is not None: start, stop, step, _ = slice_get_indices_ex(s) return iter(range(start, stop, step)) @@ -83,7 +86,8 @@ cdef class BlockPlacement: @property def as_slice(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is None: raise TypeError('Not slice-like') else: @@ -91,7 +95,8 @@ cdef class BlockPlacement: @property def indexer(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: return s else: @@ -103,7 +108,8 @@ cdef class BlockPlacement: @property def as_array(self): - cdef Py_ssize_t start, stop, end, _ + cdef: + Py_ssize_t start, stop, end, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) self._as_array = np.arange(start, stop, step, @@ -113,17 +119,19 @@ cdef class BlockPlacement: @property def is_slice_like(self): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() return s is not None def __getitem__(self, loc): - cdef slice s = self._ensure_has_slice() + cdef: + slice s = self._ensure_has_slice() if s is not None: val = slice_getitem(s, loc) else: val = self._as_array[loc] - if not PySlice_Check(val) and val.ndim == 0: + if not isinstance(val, slice) and val.ndim == 0: return val return BlockPlacement(val) @@ -139,8 +147,9 @@ cdef class BlockPlacement: [o.as_array for o in others])) cdef iadd(self, other): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t other_int, start, stop, step, l + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step, l if isinstance(other, int) and s is not None: other_int = other @@ -184,7 +193,7 @@ cdef class BlockPlacement: return self._as_slice -cdef slice_canonize(slice s): +cdef slice slice_canonize(slice s): """ Convert slice to canonical bounded form. """ @@ -282,7 +291,7 @@ def slice_getitem(slice slc not None, ind): s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - if PySlice_Check(ind): + if isinstance(ind, slice): ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index d8e2e8eb4b4ea..82261094022fb 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -271,7 +271,7 @@ cdef class Interval(IntervalMixin): return ((self.left < key if self.open_left else self.left <= key) and (key < self.right if self.open_right else key <= self.right)) - def __richcmp__(self, other, int op): + def __richcmp__(self, other, op: int): if hasattr(other, 'ndim'): # let numpy (or IntervalIndex) handle vectorization return NotImplemented diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6b425d7022ecd..0b9793a6ef97a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2,14 +2,10 @@ from decimal import Decimal import sys -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport (Py_INCREF, PyTuple_SET_ITEM, - PyList_Check, - PyString_Check, - PyBytes_Check, - PyUnicode_Check, PyTuple_New, Py_EQ, PyObject_RichCompareBool) @@ -91,13 +87,14 @@ def values_from_object(object obj): @cython.wraparound(False) @cython.boundscheck(False) -def memory_usage_of_objects(object[:] arr): +def memory_usage_of_objects(arr: object[:]) -> int64_t: """ return the memory usage of an object array in bytes, does not include the actual bytes of the pointers """ - cdef: - Py_ssize_t i, n - int64_t size = 0 + i: Py_ssize_t + n: Py_ssize_t + size: int64_t + size = 0 n = len(arr) for i in range(n): size += arr[i].__sizeof__() @@ -127,7 +124,7 @@ def is_scalar(val: object) -> bint: return (cnp.PyArray_IsAnyScalar(val) # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3. - or PyBytes_Check(val) + or isinstance(val, bytes) # We differ from numpy (as of 1.10), which claims that None is # not scalar in np.isscalar(). or val is None @@ -140,7 +137,7 @@ def is_scalar(val: object) -> bint: or util.is_offset_object(val)) -def item_from_zerodim(object val): +def item_from_zerodim(val: object) -> object: """ If the value is a zerodim array, return the item it contains. @@ -359,7 +356,7 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): return rev_indexer -def has_infs_f4(ndarray[float32_t] arr): +def has_infs_f4(ndarray[float32_t] arr) -> bint: cdef: Py_ssize_t i, n = len(arr) float32_t inf, neginf, val @@ -374,7 +371,7 @@ def has_infs_f4(ndarray[float32_t] arr): return False -def has_infs_f8(ndarray[float64_t] arr): +def has_infs_f8(ndarray[float64_t] arr) -> bint: cdef: Py_ssize_t i, n = len(arr) float64_t inf, neginf, val @@ -530,7 +527,8 @@ def clean_index_list(list obj): for i in range(n): v = obj[i] - if not (PyList_Check(v) or util.is_array(v) or hasattr(v, '_data')): + if not (isinstance(v, list) or + util.is_array(v) or hasattr(v, '_data')): all_arrays = 0 break @@ -1120,7 +1118,7 @@ def infer_dtype(object value, bint skipna=False): .format(typ=type(value))) else: - if not PyList_Check(value): + if not isinstance(value, list): value = list(value) from pandas.core.dtypes.cast import ( construct_1d_object_array_from_listlike) @@ -1209,15 +1207,15 @@ def infer_dtype(object value, bint skipna=False): if is_bool_array(values, skipna=skipna): return 'boolean' - elif PyString_Check(val): + elif isinstance(val, str): if is_string_array(values, skipna=skipna): return 'string' - elif PyUnicode_Check(val): + elif isinstance(val, unicode): if is_unicode_array(values, skipna=skipna): return 'unicode' - elif PyBytes_Check(val): + elif isinstance(val, bytes): if is_bytes_array(values, skipna=skipna): return 'bytes' @@ -1474,7 +1472,7 @@ cpdef bint is_float_array(ndarray values): cdef class StringValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return PyString_Check(value) + return isinstance(value, str) cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.str_) @@ -1490,7 +1488,7 @@ cpdef bint is_string_array(ndarray values, bint skipna=False): cdef class UnicodeValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return PyUnicode_Check(value) + return isinstance(value, unicode) cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.unicode_) @@ -1506,7 +1504,7 @@ cdef bint is_unicode_array(ndarray values, bint skipna=False): cdef class BytesValidator(Validator): cdef inline bint is_value_typed(self, object value) except -1: - return PyBytes_Check(value) + return isinstance(value, bytes) cdef inline bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.bytes_) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index c787cc61e8773..2590a30c57f33 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- -from cpython cimport PyFloat_Check, PyComplex_Check - -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp @@ -23,8 +21,9 @@ cdef int64_t NPY_NAT = util.get_nat() cdef inline bint _check_all_nulls(object val): """ utility to check if a value is any type of null """ - cdef bint res - if PyFloat_Check(val) or PyComplex_Check(val): + res: bint + + if isinstance(val, (float, complex)): res = val != val elif val is NaT: res = 1 @@ -117,7 +116,7 @@ cpdef bint checknull_old(object val): cdef inline bint _check_none_nan_inf_neginf(object val): try: - return val is None or (PyFloat_Check(val) and + return val is None or (isinstance(val, float) and (val != val or val == INF or val == NEGINF)) except ValueError: return False diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index a194f1588e231..e21bce177b38b 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -1,12 +1,11 @@ # -*- coding: utf-8 -*- import operator -from cpython cimport (PyFloat_Check, PyBool_Check, - PyObject_RichCompareBool, +from cpython cimport (PyObject_RichCompareBool, Py_EQ, Py_NE, Py_LT, Py_LE, Py_GT, Py_GE) -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np from numpy cimport ndarray, uint8_t, import_array @@ -272,7 +271,7 @@ def maybe_convert_bool(ndarray[object] arr, for i in range(n): val = arr[i] - if PyBool_Check(val): + if isinstance(val, bool): if val is True: result[i] = 1 else: @@ -281,7 +280,7 @@ def maybe_convert_bool(ndarray[object] arr, result[i] = 1 elif val in false_vals: result[i] = 0 - elif PyFloat_Check(val): + elif isinstance(val, float): result[i] = UINT8_MAX na_count += 1 else: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 91faed678192f..e3df391c5c45d 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -10,12 +10,12 @@ from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE from libc.stdlib cimport free from libc.string cimport strncpy, strlen, strcasecmp -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport (PyObject, PyBytes_FromString, - PyBytes_AsString, PyBytes_Check, - PyUnicode_Check, PyUnicode_AsUTF8String, + PyBytes_AsString, + PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport Py_XDECREF @@ -1341,9 +1341,9 @@ cdef object _false_values = [b'False', b'FALSE', b'false'] def _ensure_encoded(list lst): cdef list result = [] for x in lst: - if PyUnicode_Check(x): + if isinstance(x, unicode): x = PyUnicode_AsUTF8String(x) - elif not PyBytes_Check(x): + elif not isinstance(x, bytes): x = asbytes(x) result.append(x) @@ -2046,7 +2046,7 @@ cdef kh_str_t* kset_from_list(list values) except NULL: val = values[i] # None creeps in sometimes, which isn't possible here - if not PyBytes_Check(val): + if not isinstance(val, bytes): raise ValueError('Must be all encoded bytes') k = kh_put_str(table, PyBytes_AsString(val), &ret) diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx index 0f2900619fdb6..6e4c0c62b0dd8 100644 --- a/pandas/_libs/properties.pyx +++ b/pandas/_libs/properties.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t +from cython import Py_ssize_t from cpython cimport ( PyDict_Contains, PyDict_GetItem, PyDict_SetItem) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index d87a590730fd6..681ea2c6295f2 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from distutils.version import LooseVersion -from cython cimport Py_ssize_t +from cython import Py_ssize_t from cpython cimport Py_INCREF from libc.stdlib cimport malloc, free diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 7f5990ce5d65c..2993114a668bb 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -2,7 +2,7 @@ import operator import sys -cimport cython +import cython import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index ab7f3c3de2131..10f68187938c0 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -22,24 +22,30 @@ cdef NUMERIC_TYPES = ( np.float64, ) + cdef bint is_comparable_as_number(obj): return isinstance(obj, NUMERIC_TYPES) + cdef bint isiterable(obj): return hasattr(obj, '__iter__') + cdef bint has_length(obj): return hasattr(obj, '__len__') + cdef bint is_dictlike(obj): return hasattr(obj, 'keys') and hasattr(obj, '__getitem__') + cdef bint decimal_almost_equal(double desired, double actual, int decimal): # Code from # http://docs.scipy.org/doc/numpy/reference/generated # /numpy.testing.assert_almost_equal.html return abs(desired - actual) < (0.5 * 10.0 ** -decimal) + cpdef assert_dict_equal(a, b, bint compare_keys=True): assert is_dictlike(a) and is_dictlike(b), ( "Cannot compare dict objects, one or both is not dict-like" @@ -56,6 +62,7 @@ cpdef assert_dict_equal(a, b, bint compare_keys=True): return True + cpdef assert_almost_equal(a, b, check_less_precise=False, bint check_dtype=True, diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 93fae695d51fd..16fea0615f199 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t - -from cpython cimport PyFloat_Check, PyUnicode_Check +from cython import Py_ssize_t from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, PyDateTime_CheckExact, @@ -601,7 +599,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', if len(val) == 0 or val in nat_strings: iresult[i] = NPY_NAT continue - if PyUnicode_Check(val) and PY2: + if isinstance(val, unicode) and PY2: val = val.encode('utf-8') try: @@ -740,7 +738,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', # set as nan except if its a NaT if checknull_with_nat(val): - if PyFloat_Check(val): + if isinstance(val, float): oresult[i] = np.nan else: oresult[i] = NaT diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx index ec54c023290b3..7d58b43e5d460 100644 --- a/pandas/_libs/tslibs/ccalendar.pyx +++ b/pandas/_libs/tslibs/ccalendar.pyx @@ -4,8 +4,8 @@ Cython implementations of functions resembling the stdlib calendar module """ -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from numpy cimport int64_t, int32_t diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index fe664cf03b0b9..d7eef546befbd 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 9cbad8acabff1..684344ceb9002 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -4,8 +4,8 @@ Functions for accessing attributes of Timestamp/datetime64/datetime-like objects and arrays """ -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import numpy as np cimport numpy as cnp diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index 70a3f3f410636..c555fce9dd007 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -321,7 +321,7 @@ cpdef object get_freq(object freq): # ---------------------------------------------------------------------- # Frequency comparison -cpdef bint is_subperiod(source, target): +def is_subperiod(source, target) -> bint: """ Returns True if downsampling is possible between source and target frequencies @@ -374,7 +374,7 @@ cpdef bint is_subperiod(source, target): return source in {'N'} -cpdef bint is_superperiod(source, target): +def is_superperiod(source, target) -> bint: """ Returns True if upsampling is possible between source and target frequencies diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 08d9128ff660c..fd8486f690745 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- from cpython cimport ( - PyFloat_Check, PyComplex_Check, PyObject_RichCompare, Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index f0aa6389fba56..e0ecfc24804a9 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from cpython cimport (Py_EQ, Py_NE, Py_GE, Py_GT, Py_LT, Py_LE, - PyUnicode_Check, PyUnicode_AsASCIIString) + PyUnicode_AsASCIIString) from cpython.datetime cimport (datetime, date, PyDateTime_IMPORT, @@ -175,7 +175,7 @@ cdef inline int _string_to_dts(object val, npy_datetimestruct* dts, int result char *tmp - if PyUnicode_Check(val): + if isinstance(val, unicode): val = PyUnicode_AsASCIIString(val) tmp = val diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8c53fabffdbeb..4d611f89bca9c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t import time from cpython.datetime cimport (PyDateTime_IMPORT, diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 6ee6c4b9d9026..3887957aeefd4 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -537,7 +537,7 @@ except (ImportError, AttributeError): pass -def _format_is_iso(f): +def _format_is_iso(f) -> bint: """ Does format match the iso8601 set that can be handled by the C parser? Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f68b6d8fdef57..43dc415bfd464 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2397,7 +2397,6 @@ class Period(_Period): # ('T', 5) but may be passed in as a string like '5T' # ordinal is the period offset from the gregorian proleptic epoch - cdef _Period self if freq is not None: @@ -2495,7 +2494,7 @@ cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day, minute, second, 0, 0, base) -def quarter_to_myear(int year, int quarter, freq): +def quarter_to_myear(year: int, quarter: int, freq): """ A quarterly frequency defines a "year" which may not coincide with the calendar-year. Find the calendar-year and calendar-month associated diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 4e3350395400c..4acffdea78f55 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t +from cython import Py_ssize_t import numpy as np from numpy cimport ndarray, int64_t, int32_t diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index d472320cfdb12..46a1145009857 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -20,7 +20,7 @@ except: except: from _dummy_thread import allocate_lock as _thread_allocate_lock -from cython cimport Py_ssize_t +from cython import Py_ssize_t import pytz diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b84c1a753215a..9b13ef5982396 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -6,9 +6,9 @@ import warnings import sys cdef bint PY3 = (sys.version_info[0] >= 3) -from cython cimport Py_ssize_t +from cython import Py_ssize_t -from cpython cimport PyUnicode_Check, Py_NE, Py_EQ, PyObject_RichCompare +from cpython cimport Py_NE, Py_EQ, PyObject_RichCompare import numpy as np cimport numpy as cnp @@ -281,7 +281,7 @@ cpdef inline int64_t cast_from_unit(object ts, object unit) except? -1: cdef inline _decode_if_necessary(object ts): # decode ts if necessary - if not PyUnicode_Check(ts) and not PY3: + if not isinstance(ts, unicode) and not PY3: ts = str(ts).decode('utf-8') return ts diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 36ec499c7335c..b7e4de81da35c 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from cython cimport Py_ssize_t +from cython import Py_ssize_t # dateutil compat from dateutil.tz import ( diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index b25fb47065fdd..d4b61b8611b68 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- # cython: boundscheck=False, wraparound=False, cdivision=True -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from libcpp.deque cimport deque from libc.stdlib cimport malloc, free diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 8e55ffad8d231..9af12cbec1e9c 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE @@ -36,9 +36,10 @@ def write_csv_rows(list data, ndarray data_index, cols : ndarray writer : object """ - cdef int N, j, i, ncols - cdef list rows - cdef object val + cdef: + int N, j, i, ncols + list rows + object val # In crude testing, N>100 yields little marginal improvement N = 100 @@ -157,8 +158,9 @@ def string_array_replace_from_nan_rep( Replace the values in the array with 'replacement' if they are 'nan_rep'. Return the same array. """ + cdef: + int length = arr.shape[0], i = 0 - cdef int length = arr.shape[0], i = 0 if replace is None: replace = np.nan diff --git a/setup.cfg b/setup.cfg index c4e3243d824e5..5fc0236066b93 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,3 +40,33 @@ markers = high_memory: mark a test as a high-memory only doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL addopts = --strict-data-files + + +[coverage:run] +branch = False +omit = */tests/* +plugins = Cython.Coverage + +[coverage:report] +ignore_errors = False +show_missing = True +# Regexes for lines to exclude from consideration +exclude_lines = + # Have to re-enable the standard pragma + pragma: no cover + + # Don't complain about missing debug-only code: + def __repr__ + if self\.debug + + # Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + AbstractMethodError + + # Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + +[coverage:html] +directory = coverage_html_report From 9837dbc0b56d1850ce967a9243692cbf261027ab Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 13 Sep 2018 06:27:42 -0500 Subject: [PATCH 39/86] API: register_extension_dtype class decorator (#22666) --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.24.0.txt | 1 + pandas/api/extensions/__init__.py | 4 ++- pandas/core/arrays/integer.py | 8 ++--- pandas/core/dtypes/base.py | 8 ++++- pandas/core/dtypes/dtypes.py | 36 +++++++++++++------ pandas/tests/extension/base/dtype.py | 5 --- .../tests/extension/decimal/test_decimal.py | 4 +-- pandas/tests/extension/json/test_json.py | 4 +-- pandas/tests/extension/test_categorical.py | 4 +-- pandas/tests/extension/test_integer.py | 5 +-- pandas/tests/extension/test_interval.py | 4 +-- 12 files changed, 47 insertions(+), 37 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 77d37ec2a7b2e..9c3770a497cf8 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2559,6 +2559,7 @@ objects. .. autosummary:: :toctree: generated/ + api.extensions.register_extension_dtype api.extensions.register_dataframe_accessor api.extensions.register_series_accessor api.extensions.register_index_accessor diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3660c1e843f6c..232f879285543 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -491,6 +491,7 @@ ExtensionType Changes - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`). +- Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) .. _whatsnew_0240.api.incompatibilities: diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 851a63725952a..8a515661920f3 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -5,4 +5,6 @@ from pandas.core.algorithms import take # noqa from pandas.core.arrays.base import (ExtensionArray, # noqa ExtensionScalarOpsMixin) -from pandas.core.dtypes.dtypes import ExtensionDtype # noqa +from pandas.core.dtypes.dtypes import ( # noqa + ExtensionDtype, register_extension_dtype +) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 5f6a96833c4f8..aebc7a6a04ffc 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -19,7 +19,7 @@ is_list_like) from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.dtypes import registry +from pandas.core.dtypes.dtypes import register_extension_dtype from pandas.core.dtypes.missing import isna, notna from pandas.io.formats.printing import ( @@ -614,9 +614,9 @@ def integer_arithmetic_method(self, other): classname = "{}Dtype".format(name) attributes_dict = {'type': getattr(np, dtype), 'name': name} - dtype_type = type(classname, (_IntegerDtype, ), attributes_dict) + dtype_type = register_extension_dtype( + type(classname, (_IntegerDtype, ), attributes_dict) + ) setattr(module, classname, dtype_type) - # register - registry.register(dtype_type) _dtypes[dtype] = dtype_type() diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 1ecb6234ad2d9..7dcdf878231f1 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -127,7 +127,8 @@ class ExtensionDtype(_DtypeOpsMixin): * _is_numeric Optionally one can override construct_array_type for construction - with the name of this dtype via the Registry + with the name of this dtype via the Registry. See + :meth:`pandas.api.extensions.register_extension_dtype`. * construct_array_type @@ -138,6 +139,11 @@ class ExtensionDtype(_DtypeOpsMixin): Methods and properties required by the interface raise ``pandas.errors.AbstractMethodError`` and no ``register`` method is provided for registering virtual subclasses. + + See Also + -------- + pandas.api.extensions.register_extension_dtype + pandas.api.extensions.ExtensionArray """ def __str__(self): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index f53ccc86fc4ff..4fd77e41a1c67 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -8,6 +8,26 @@ from .base import ExtensionDtype, _DtypeOpsMixin +def register_extension_dtype(cls): + """Class decorator to register an ExtensionType with pandas. + + .. versionadded:: 0.24.0 + + This enables operations like ``.astype(name)`` for the name + of the ExtensionDtype. + + Examples + -------- + >>> from pandas.api.extensions import register_extension_dtype + >>> from pandas.api.extensions import ExtensionDtype + >>> @register_extension_dtype + ... class MyExtensionDtype(ExtensionDtype): + ... pass + """ + registry.register(cls) + return cls + + class Registry(object): """ Registry for dtype inference @@ -17,10 +37,6 @@ class Registry(object): Multiple extension types can be registered. These are tried in order. - - Examples - -------- - registry.register(MyExtensionDtype) """ def __init__(self): self.dtypes = [] @@ -65,9 +81,6 @@ def find(self, dtype): registry = Registry() -# TODO(Extension): remove the second registry once all internal extension -# dtypes are real extension dtypes. -_pandas_registry = Registry() class PandasExtensionDtype(_DtypeOpsMixin): @@ -145,6 +158,7 @@ class CategoricalDtypeType(type): pass +@register_extension_dtype class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): """ Type for categorical data with the categories and orderedness @@ -692,6 +706,7 @@ class IntervalDtypeType(type): pass +@register_extension_dtype class IntervalDtype(PandasExtensionDtype, ExtensionDtype): """ A Interval duck-typed class, suitable for holding an interval @@ -824,8 +839,9 @@ def is_dtype(cls, dtype): return super(IntervalDtype, cls).is_dtype(dtype) -# register the dtypes in search order -registry.register(IntervalDtype) -registry.register(CategoricalDtype) +# TODO(Extension): remove the second registry once all internal extension +# dtypes are real extension dtypes. +_pandas_registry = Registry() + _pandas_registry.register(DatetimeTZDtype) _pandas_registry.register(PeriodDtype) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 2125458e8a0ba..02b7c9527769f 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -1,4 +1,3 @@ -import pytest import numpy as np import pandas as pd @@ -51,10 +50,6 @@ def test_eq_with_numpy_object(self, dtype): def test_array_type(self, data, dtype): assert dtype.construct_array_type() is type(data) - def test_array_type_with_arg(self, data, dtype): - with pytest.raises(NotImplementedError): - dtype.construct_array_type('foo') - def test_check_dtype(self, data): dtype = data.dtype diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 04e855242b5e6..03fdd25826b79 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -105,9 +105,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): class TestDtype(BaseDecimal, base.BaseDtypeTests): - - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is DecimalArray + pass class TestInterface(BaseDecimal, base.BaseInterfaceTests): diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b9cc3c431528f..0126d771caf7f 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -116,9 +116,7 @@ def assert_frame_equal(self, left, right, *args, **kwargs): class TestDtype(BaseJSON, base.BaseDtypeTests): - - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is JSONArray + pass class TestInterface(BaseJSON, base.BaseInterfaceTests): diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index b8c73a9efdae8..6c6cf80c16da6 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -77,9 +77,7 @@ def data_for_grouping(): class TestDtype(base.BaseDtypeTests): - - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is Categorical + pass class TestInterface(base.BaseInterfaceTests): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 50c0e6dd8b347..57e0922a0b7d9 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -20,7 +20,7 @@ from pandas.tests.extension import base from pandas.core.dtypes.common import is_extension_array_dtype -from pandas.core.arrays import IntegerArray, integer_array +from pandas.core.arrays import integer_array from pandas.core.arrays.integer import ( Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype, UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype) @@ -92,9 +92,6 @@ def test_is_dtype_unboxes_dtype(self): # we have multiple dtypes, so skip pass - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is IntegerArray - class TestArithmeticOps(base.BaseArithmeticOpsTests): diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 625619a90ed4c..34b98f590df0d 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -84,9 +84,7 @@ class BaseInterval(object): class TestDtype(BaseInterval, base.BaseDtypeTests): - - def test_array_type_with_arg(self, data, dtype): - assert dtype.construct_array_type() is IntervalArray + pass class TestCasting(BaseInterval, base.BaseCastingTests): From e371129e76fa086452cd7394002bcb876b44b858 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 13 Sep 2018 08:24:14 -0500 Subject: [PATCH 40/86] TST: Close ZipFile in compression test (#22679) * Updates HypothesisCheck setting * Skips tests for old xlrd --- pandas/conftest.py | 11 +++++++---- pandas/tests/io/test_excel.py | 6 ++++++ pandas/tests/io/test_pickle.py | 8 ++++---- pandas/util/testing.py | 4 ++-- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index fdac045e67ffa..28c24fc8c0640 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -8,11 +8,14 @@ import pandas as pd from pandas.compat import PY3 import pandas.util._test_decorators as td - import hypothesis -hypothesis.settings.suppress_health_check = (hypothesis.HealthCheck.too_slow,) -# HealthCheck.all() to disable all health checks -# https://hypothesis.readthedocs.io/en/latest/healthchecks.html + + +hypothesis.settings.register_profile( + "ci", + suppress_health_check=(hypothesis.HealthCheck.too_slow,) +) +hypothesis.settings.load_profile("ci") def pytest_addoption(parser): diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 5f27ff719fda1..6741645e466f3 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -105,6 +105,7 @@ def get_exceldf(self, basename, ext, *args, **kwds): class ReadingTestsBase(SharedItems): # This is based on ExcelWriterBase + @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_usecols_int(self, ext): dfref = self.get_csv_refdf('test1') @@ -122,6 +123,7 @@ def test_usecols_int(self, ext): tm.assert_frame_equal(df2, dfref, check_names=False) tm.assert_frame_equal(df3, dfref, check_names=False) + @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_usecols_list(self, ext): dfref = self.get_csv_refdf('test1') @@ -140,6 +142,7 @@ def test_usecols_list(self, ext): tm.assert_frame_equal(df2, dfref, check_names=False) tm.assert_frame_equal(df3, dfref, check_names=False) + @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_usecols_str(self, ext): dfref = self.get_csv_refdf('test1') @@ -219,6 +222,7 @@ def test_excel_passes_na(self, ext): columns=['Test']) tm.assert_frame_equal(parsed, expected) + @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_deprecated_sheetname(self, ext): # gh-17964 excel = self.get_excelfile('test1', ext) @@ -229,6 +233,7 @@ def test_deprecated_sheetname(self, ext): with pytest.raises(TypeError): read_excel(excel, sheet='Sheet1') + @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_excel_table_sheet_by_index(self, ext): excel = self.get_excelfile('test1', ext) @@ -507,6 +512,7 @@ def test_date_conversion_overflow(self, ext): result = self.get_exceldf('testdateoverflow', ext) tm.assert_frame_equal(result, expected) + @td.skip_if_no('xlrd', '1.0.1') # GH-22682 def test_sheet_name_and_sheetname(self, ext): # GH10559: Minor improvement: Change "sheet_name" to "sheetname" # GH10969: DOC: Consistent var names (sheetname vs sheet_name) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index c71e26ae56e8e..77b4a3c7cac5f 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -332,9 +332,9 @@ def compress_file(self, src_path, dest_path, compression): f = bz2.BZ2File(dest_path, "w") elif compression == 'zip': import zipfile - zip_file = zipfile.ZipFile(dest_path, "w", - compression=zipfile.ZIP_DEFLATED) - zip_file.write(src_path, os.path.basename(src_path)) + f = zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) + f.write(src_path, os.path.basename(src_path)) elif compression == 'xz': lzma = pandas.compat.import_lzma() f = lzma.LZMAFile(dest_path, "w") @@ -345,7 +345,7 @@ def compress_file(self, src_path, dest_path, compression): if compression != "zip": with open(src_path, "rb") as fh: f.write(fh.read()) - f.close() + f.close() def test_write_explicit(self, compression, get_random_path): base = get_random_path diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f785ec35f52db..1e8c123fa6f13 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2560,7 +2560,7 @@ class for all warnings. To check that no warning is returned, the ``__warningsregistry__`` to ensure that no warning messages are suppressed by this context manager. If ``None`` is specified, the ``__warningsregistry__`` keeps track of which warnings have been - shown, and does not show them again. + shown, and does not show them again. check_stacklevel : bool, default True If True, displays the line that called the function containing the warning to show were the function is called. Otherwise, the @@ -2589,7 +2589,7 @@ class for all warnings. To check that no warning is returned, with warnings.catch_warnings(record=True) as w: if clear is not None: - # make sure that we are clearning these warnings + # make sure that we are clearing these warnings # if they have happened before # to guarantee that we will catch them if not is_list_like(clear): From 788158d271c52e3c4f744455c1a83d963953e00c Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 13 Sep 2018 12:52:34 -0700 Subject: [PATCH 41/86] CLN: Standardize searchsorted signatures (#22670) The parameter is "value" across the board. xref gh-14645. --- doc/source/whatsnew/v0.24.0.txt | 2 ++ pandas/core/arrays/categorical.py | 1 - pandas/core/base.py | 4 +--- pandas/core/indexes/datetimes.py | 4 +--- pandas/core/indexes/period.py | 4 +--- pandas/core/indexes/timedeltas.py | 3 +-- pandas/core/series.py | 4 +--- pandas/tests/arrays/categorical/test_analytics.py | 5 ----- pandas/tests/indexes/period/test_tools.py | 3 --- pandas/tests/series/test_analytics.py | 4 ---- 10 files changed, 7 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 232f879285543..9a4747f40b86d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -571,6 +571,8 @@ Removal of prior version deprecations/changes - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) - :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`) - Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`) +- :meth:`Categorical.searchsorted` and :meth:`Series.searchsorted` have renamed the ``v`` argument to ``value`` (:issue:`14645`) +- :meth:`TimedeltaIndex.searchsorted`, :meth:`DatetimeIndex.searchsorted`, and :meth:`PeriodIndex.searchsorted` have renamed the ``key`` argument to ``value`` (:issue:`14645`) .. _whatsnew_0240.performance: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 5410412d5f45b..63a1dacb47abb 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1335,7 +1335,6 @@ def memory_usage(self, deep=False): @Substitution(klass='Categorical') @Appender(_shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='v', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): if not self.ordered: raise ValueError("Categorical not ordered\nyou can use " diff --git a/pandas/core/base.py b/pandas/core/base.py index 084a976320d77..d831dc69338bd 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -24,8 +24,7 @@ import pandas._libs.lib as lib from pandas.compat.numpy import function as nv from pandas.compat import PYPY, OrderedDict -from pandas.util._decorators import (Appender, cache_readonly, - deprecate_kwarg, Substitution) +from pandas.util._decorators import Appender, cache_readonly, Substitution from pandas.core.accessor import DirNamesMixin @@ -1228,7 +1227,6 @@ def factorize(self, sort=False, na_sentinel=-1): @Substitution(klass='IndexOpsMixin') @Appender(_shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='key', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): # needs coercion on the key (DatetimeIndex does already) return self.values.searchsorted(value, side=side, sorter=sorter) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f780b68a536a1..46741ab15aa31 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -43,8 +43,7 @@ generate_range, CDay, prefix_mapping) from pandas.core.tools.timedeltas import to_timedelta -from pandas.util._decorators import ( - Appender, cache_readonly, deprecate_kwarg, Substitution) +from pandas.util._decorators import Appender, cache_readonly, Substitution import pandas.core.common as com import pandas.tseries.offsets as offsets import pandas.core.tools.datetimes as tools @@ -1375,7 +1374,6 @@ def normalize(self): @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='key', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): if isinstance(value, (np.ndarray, Index)): value = np.array(value, dtype=_NS_DTYPE, copy=False) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 3a68c6c26a974..0f86e18103e3c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -36,8 +36,7 @@ from pandas.core.indexes.base import _index_shared_docs, ensure_index from pandas import compat -from pandas.util._decorators import (Appender, Substitution, cache_readonly, - deprecate_kwarg) +from pandas.util._decorators import Appender, Substitution, cache_readonly import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -426,7 +425,6 @@ def astype(self, dtype, copy=True, how='start'): @Substitution(klass='PeriodIndex') @Appender(_shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='key', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): if isinstance(value, Period): if value.freq != self.freq: diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index e0c78d6a1c518..933bc6233dca9 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -29,7 +29,7 @@ from pandas.core.indexes.base import _index_shared_docs import pandas.core.common as com import pandas.core.dtypes.concat as _concat -from pandas.util._decorators import Appender, Substitution, deprecate_kwarg +from pandas.util._decorators import Appender, Substitution from pandas.core.indexes.datetimelike import ( TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op) from pandas.core.tools.timedeltas import ( @@ -609,7 +609,6 @@ def _partial_td_slice(self, key, freq, use_lhs=True, use_rhs=True): @Substitution(klass='TimedeltaIndex') @Appender(_shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='key', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): if isinstance(value, (np.ndarray, Index)): value = np.array(value, dtype=_TD_DTYPE, copy=False) diff --git a/pandas/core/series.py b/pandas/core/series.py index ab41954990412..a4d403e4bcd94 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -75,8 +75,7 @@ import pandas.core.indexes.base as ibase import pandas.io.formats.format as fmt -from pandas.util._decorators import ( - Appender, deprecate, deprecate_kwarg, Substitution) +from pandas.util._decorators import Appender, deprecate, Substitution from pandas.util._validators import validate_bool_kwarg from pandas._libs import index as libindex, tslibs, lib, iNaT @@ -2089,7 +2088,6 @@ def __rmatmul__(self, other): @Substitution(klass='Series') @Appender(base._shared_docs['searchsorted']) - @deprecate_kwarg(old_arg_name='v', new_arg_name='value') def searchsorted(self, value, side='left', sorter=None): if sorter is not None: sorter = ensure_platform_int(sorter) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 53d0e596a1d99..b1b2e609f9b07 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -118,11 +118,6 @@ def test_searchsorted(self): pytest.raises(ValueError, lambda: c2.searchsorted('apple')) pytest.raises(ValueError, lambda: s2.searchsorted('apple')) - with tm.assert_produces_warning(FutureWarning): - res = c1.searchsorted(v=['bread']) - exp = np.array([3], dtype=np.intp) - tm.assert_numpy_array_equal(res, exp) - def test_unique(self): # categories are reordered based on value when ordered=False cat = Categorical(["a", "b"]) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index c4ed07d98413f..a5c58eb40cc0d 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -220,9 +220,6 @@ def test_searchsorted(self, freq): with tm.assert_raises_regex(period.IncompatibleFrequency, msg): pidx.searchsorted(pd.Period('2014-01-01', freq='5D')) - with tm.assert_produces_warning(FutureWarning): - pidx.searchsorted(key=p2) - class TestPeriodIndexConversion(object): def test_tolist(self): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 3a8b84cd53087..d5d9e5f4f14de 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1368,10 +1368,6 @@ def test_searchsorted(self): idx = s.searchsorted(1, side='right') tm.assert_numpy_array_equal(idx, np.array([1], dtype=np.intp)) - with tm.assert_produces_warning(FutureWarning): - idx = s.searchsorted(v=1, side='left') - tm.assert_numpy_array_equal(idx, np.array([0], dtype=np.intp)) - def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted(30) From 243a19e65079e4e23b35b717b3e57c4b4f00dadc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 13 Sep 2018 15:21:18 -0500 Subject: [PATCH 42/86] DEPR: Removed styler shim (#22691) * DEPR: Removed styler shim * sphinx --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/formats/__init__.py | 0 pandas/formats/style.py | 7 ------- pandas/tests/io/formats/test_style.py | 8 -------- 4 files changed, 1 insertion(+), 15 deletions(-) delete mode 100644 pandas/formats/__init__.py delete mode 100644 pandas/formats/style.py diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9a4747f40b86d..66b6923ef13e4 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -571,6 +571,7 @@ Removal of prior version deprecations/changes - Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`) - :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats`` (:issue:`14645`) - Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`, :issue:`6581`) +- Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`) - :meth:`Categorical.searchsorted` and :meth:`Series.searchsorted` have renamed the ``v`` argument to ``value`` (:issue:`14645`) - :meth:`TimedeltaIndex.searchsorted`, :meth:`DatetimeIndex.searchsorted`, and :meth:`PeriodIndex.searchsorted` have renamed the ``key`` argument to ``value`` (:issue:`14645`) diff --git a/pandas/formats/__init__.py b/pandas/formats/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/formats/style.py b/pandas/formats/style.py deleted file mode 100644 index ec4b4a2cde0c5..0000000000000 --- a/pandas/formats/style.py +++ /dev/null @@ -1,7 +0,0 @@ -import warnings - -warnings.warn("Styler has been moved from pandas.formats.style.Styler" - " to pandas.io.formats.style.Styler. This shim will be" - " removed in pandas 0.21", - FutureWarning) -from pandas.io.formats.style import Styler # noqa diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 5254ccc742ab8..e407573c9a462 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1269,11 +1269,3 @@ def test_from_custom_template(tmpdir): assert result.template is not Styler.template styler = result(pd.DataFrame({"A": [1, 2]})) assert styler.render() - - -def test_shim(): - # https://github.com/pandas-dev/pandas/pull/16059 - # Remove in 0.21 - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - from pandas.formats.style import Styler as _styler # noqa From 3445e19552019b4a9886efb5509fc15a5aa69d0c Mon Sep 17 00:00:00 2001 From: Roman Yurchak Date: Fri, 14 Sep 2018 00:05:03 +0200 Subject: [PATCH 43/86] TST Use pytest.raises instead of legacy constructs (#22681) --- pandas/tests/frame/test_indexing.py | 6 +----- pandas/tests/plotting/test_frame.py | 6 +----- pandas/tests/series/test_combine_concat.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- pandas/tests/test_config.py | 8 +++----- 5 files changed, 7 insertions(+), 17 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index f0c4d7be2f293..96b2e98dd7e8d 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -71,12 +71,8 @@ def test_getitem(self): def test_getitem_dupe_cols(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) - try: + with pytest.raises(KeyError): df[['baf']] - except KeyError: - pass - else: - self.fail("Dataframe failed to raise KeyError") def test_get(self): b = self.frame.get('B') diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 47a93ba82d77b..772989231e9a7 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -588,17 +588,13 @@ def test_subplots_layout(self): @pytest.mark.slow def test_subplots_warnings(self): # GH 9464 - warnings.simplefilter('error') - try: + with tm.assert_produces_warning(None): df = DataFrame(np.random.randn(100, 4)) df.plot(subplots=True, layout=(3, 2)) df = DataFrame(np.random.randn(100, 4), index=date_range('1/1/2000', periods=100)) df.plot(subplots=True, layout=(3, 2)) - except Warning as w: - self.fail(w) - warnings.simplefilter('default') @pytest.mark.slow def test_subplots_multiple_axes(self): diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index f8420b302836e..35ba4fbf0ce25 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -28,7 +28,7 @@ def test_append(self): elif idx in self.objSeries.index: assert value == self.objSeries[idx] else: - self.fail("orphaned index!") + raise AssertionError("orphaned index!") pytest.raises(ValueError, self.ts.append, self.ts, verify_integrity=True) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d2fbd69a2a08f..9faf47ace242d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -465,7 +465,7 @@ def test_constructor_index_mismatch(self, input): # test that construction of a Series with an index of different length # raises an error msg = 'Length of passed values is 3, index implies 4' - with pytest.raises(ValueError, message=msg): + with pytest.raises(ValueError, match=msg): Series(input, index=np.arange(4)) def test_constructor_numpy_scalar(self): diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 91ce65dcce9b2..fd8e98c483f78 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -247,12 +247,10 @@ def test_deprecate_option(self): assert self.cf._is_deprecated('foo') with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') - try: + with pytest.raises( + KeyError, + message="Nonexistent option didn't raise KeyError"): self.cf.get_option('foo') - except KeyError: - pass - else: - self.fail("Nonexistent option didn't raise KeyError") assert len(w) == 1 # should have raised one warning assert 'deprecated' in str(w[-1]) # we get the default message From 7d6f2751c5be1f07cd693af6ef20b1090ff8b2f6 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Fri, 14 Sep 2018 02:54:17 +0100 Subject: [PATCH 44/86] Fix test_sql pytest fixture warnings (#22515) * Avoid calling pytest fixtures directly --- pandas/tests/io/test_sql.py | 55 +++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 824e5a2b23df3..e4df7043919ae 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -253,9 +253,13 @@ def _get_exec(self): else: return self.conn.cursor() - def _load_iris_data(self, datapath): + @pytest.fixture(params=[('io', 'data', 'iris.csv')]) + def load_iris_data(self, datapath, request): import io - iris_csv_file = datapath('io', 'data', 'iris.csv') + iris_csv_file = datapath(*request.param) + + if not hasattr(self, 'conn'): + self.setup_connect() self.drop_table('iris') self._get_exec().execute(SQL_STRINGS['create_iris'][self.flavor]) @@ -503,10 +507,14 @@ class _TestSQLApi(PandasSQLTest): flavor = 'sqlite' mode = None - @pytest.fixture(autouse=True) - def setup_method(self, datapath): + def setup_connect(self): self.conn = self.connect() - self._load_iris_data(datapath) + + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + self.load_test_data_and_sql() + + def load_test_data_and_sql(self): self._load_iris_view() self._load_test1_data() self._load_test2_data() @@ -1027,8 +1035,8 @@ class _EngineToConnMixin(object): """ @pytest.fixture(autouse=True) - def setup_method(self, datapath): - super(_EngineToConnMixin, self).setup_method(datapath) + def setup_method(self, load_iris_data): + super(_EngineToConnMixin, self).load_test_data_and_sql() engine = self.conn conn = engine.connect() self.__tx = conn.begin() @@ -1153,14 +1161,14 @@ def setup_class(cls): msg = "{0} - can't connect to {1} server".format(cls, cls.flavor) pytest.skip(msg) - @pytest.fixture(autouse=True) - def setup_method(self, datapath): - self.setup_connect() - - self._load_iris_data(datapath) + def load_test_data_and_sql(self): self._load_raw_sql() self._load_test1_data() + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + self.load_test_data_and_sql() + @classmethod def setup_import(cls): # Skip this test if SQLAlchemy not available @@ -1925,15 +1933,17 @@ class TestSQLiteFallback(SQLiteMixIn, PandasSQLTest): def connect(cls): return sqlite3.connect(':memory:') - @pytest.fixture(autouse=True) - def setup_method(self, datapath): + def setup_connect(self): self.conn = self.connect() - self.pandasSQL = sql.SQLiteDatabase(self.conn) - - self._load_iris_data(datapath) + def load_test_data_and_sql(self): + self.pandasSQL = sql.SQLiteDatabase(self.conn) self._load_test1_data() + @pytest.fixture(autouse=True) + def setup_method(self, load_iris_data): + self.load_test_data_and_sql() + def test_read_sql(self): self._read_sql_iris() @@ -2151,6 +2161,12 @@ def setup_method(self, request, datapath): self.method = request.function self.conn = sqlite3.connect(':memory:') + # In some test cases we may close db connection + # Re-open conn here so we can perform cleanup in teardown + yield + self.method = request.function + self.conn = sqlite3.connect(':memory:') + def test_basic(self): frame = tm.makeTimeDataFrame() self._check_roundtrip(frame) @@ -2227,7 +2243,7 @@ def test_execute_fail(self): with pytest.raises(Exception): sql.execute('INSERT INTO test VALUES("foo", "bar", 7)', self.conn) - def test_execute_closed_connection(self, request, datapath): + def test_execute_closed_connection(self): create_sql = """ CREATE TABLE test ( @@ -2246,9 +2262,6 @@ def test_execute_closed_connection(self, request, datapath): with pytest.raises(Exception): tquery("select * from test", con=self.conn) - # Initialize connection again (needed for tearDown) - self.setup_method(request, datapath) - def test_na_roundtrip(self): pass From b151427d8134050f19865217881de1d59faea4a2 Mon Sep 17 00:00:00 2001 From: henriqueribeiro Date: Fri, 14 Sep 2018 05:45:16 +0100 Subject: [PATCH 45/86] API: Add 'name' as argument for index 'to_frame' method (#22580) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/base.py | 19 +++++++++-- pandas/core/indexes/multi.py | 21 ++++++++++-- pandas/tests/indexes/common.py | 19 +++++++---- pandas/tests/indexes/multi/test_conversion.py | 34 +++++++++++++++++-- 5 files changed, 79 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 66b6923ef13e4..f2ec08c61a6d8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -184,6 +184,7 @@ Other Enhancements - :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`) - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). +- :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b2b6e02e908c5..ca381160de0df 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1115,17 +1115,21 @@ def to_series(self, index=None, name=None): return Series(self._to_embed(), index=index, name=name) - def to_frame(self, index=True): + def to_frame(self, index=True, name=None): """ Create a DataFrame with a column containing the Index. - .. versionadded:: 0.21.0 + .. versionadded:: 0.24.0 Parameters ---------- index : boolean, default True Set the index of the returned DataFrame as the original Index. + name : object, default None + The passed name should substitute for the index name (if it has + one). + Returns ------- DataFrame @@ -1153,10 +1157,19 @@ def to_frame(self, index=True): 0 Ant 1 Bear 2 Cow + + To override the name of the resulting column, specify `name`: + + >>> idx.to_frame(index=False, name='zoo') + zoo + 0 Ant + 1 Bear + 2 Cow """ from pandas import DataFrame - name = self.name or 0 + if name is None: + name = self.name or 0 result = DataFrame({name: self.values.copy()}) if index: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4f38f61f7b0e4..a7932f667f6de 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1126,20 +1126,23 @@ def _to_safe_for_reshape(self): """ convert to object if we are a categorical """ return self.set_levels([i._to_safe_for_reshape() for i in self.levels]) - def to_frame(self, index=True): + def to_frame(self, index=True, name=None): """ Create a DataFrame with the levels of the MultiIndex as columns. Column ordering is determined by the DataFrame constructor with data as a dict. - .. versionadded:: 0.20.0 + .. versionadded:: 0.24.0 Parameters ---------- index : boolean, default True Set the index of the returned DataFrame as the original MultiIndex. + name : list / sequence of strings, optional + The passed names should substitute index level names. + Returns ------- DataFrame : a DataFrame containing the original MultiIndex data. @@ -1150,10 +1153,22 @@ def to_frame(self, index=True): """ from pandas import DataFrame + if name is not None: + if not is_list_like(name): + raise TypeError("'name' must be a list / sequence " + "of column names.") + + if len(name) != len(self.levels): + raise ValueError("'name' should have same length as " + "number of levels on index.") + idx_names = name + else: + idx_names = self.names + result = DataFrame({(name or level): self._get_level_values(level) for name, level in - zip(self.names, range(len(self.levels)))}, + zip(idx_names, range(len(self.levels)))}, copy=False) if index: result.index = self diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 56f59851d6d04..49a247608ab0b 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -66,19 +66,24 @@ def test_to_series_with_arguments(self): assert s.index is not idx assert s.name != idx.name - def test_to_frame(self): - # see gh-15230 + @pytest.mark.parametrize("name", [None, "new_name"]) + def test_to_frame(self, name): + # see GH-15230, GH-22580 idx = self.create_index() - name = idx.name or 0 - df = idx.to_frame() + if name: + idx_name = name + else: + idx_name = idx.name or 0 + + df = idx.to_frame(name=idx_name) assert df.index is idx assert len(df.columns) == 1 - assert df.columns[0] == name - assert df[name].values is not idx.values + assert df.columns[0] == idx_name + assert df[idx_name].values is not idx.values - df = idx.to_frame(index=False) + df = idx.to_frame(index=False, name=idx_name) assert df.index is not idx def test_shift(self): diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index fcc22390e17a1..8c9566b7e651f 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -37,6 +37,27 @@ def test_to_frame(): expected.index = index tm.assert_frame_equal(result, expected) + # See GH-22580 + index = MultiIndex.from_tuples(tuples) + result = index.to_frame(index=False, name=['first', 'second']) + expected = DataFrame(tuples) + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=['first', 'second']) + expected.index = index + expected.columns = ['first', 'second'] + tm.assert_frame_equal(result, expected) + + msg = "'name' must be a list / sequence of column names." + with tm.assert_raises_regex(TypeError, msg): + index.to_frame(name='first') + + msg = "'name' should have same length as number of levels on index." + with tm.assert_raises_regex(ValueError, msg): + index.to_frame(name=['first']) + + # Tests for datetime index index = MultiIndex.from_product([range(5), pd.date_range('20130101', periods=3)]) result = index.to_frame(index=False) @@ -45,12 +66,21 @@ def test_to_frame(): 1: np.tile(pd.date_range('20130101', periods=3), 5)}) tm.assert_frame_equal(result, expected) - index = MultiIndex.from_product([range(5), - pd.date_range('20130101', periods=3)]) result = index.to_frame() expected.index = index tm.assert_frame_equal(result, expected) + # See GH-22580 + result = index.to_frame(index=False, name=['first', 'second']) + expected = DataFrame( + {'first': np.repeat(np.arange(5, dtype='int64'), 3), + 'second': np.tile(pd.date_range('20130101', periods=3), 5)}) + tm.assert_frame_equal(result, expected) + + result = index.to_frame(name=['first', 'second']) + expected.index = index + tm.assert_frame_equal(result, expected) + def test_to_hierarchical(): index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( From dad9b7c804f172fae44619e2c54144cbf38fa05c Mon Sep 17 00:00:00 2001 From: Paul Reidy Date: Fri, 14 Sep 2018 12:13:50 +0100 Subject: [PATCH 46/86] BUG: Incorrect addition of Week(weekday=6) to DatetimeIndex (#22695) * BUG: Incorrect addition of Week(weekday=6) to DatetimeIndex --- pandas/tests/arithmetic/test_datetime64.py | 1 + pandas/tseries/offsets.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index a3fa4e6b88256..b19cc61a2999e 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1764,6 +1764,7 @@ def test_dt64_with_DateOffsets_relativedelta(klass): 'MonthBegin', 'MonthEnd', 'SemiMonthEnd', 'SemiMonthBegin', 'Week', ('Week', {'weekday': 3}), + 'Week', ('Week', {'weekday': 6}), 'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin', 'CustomBusinessDay', 'CDay', 'CBMonthEnd', 'CBMonthBegin', 'BMonthBegin', 'BMonthEnd', diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index d4a8211c17b87..0a9931c46bbd5 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1313,7 +1313,7 @@ def _end_apply_index(self, dtindex): base_period = dtindex.to_period(base) if self.n > 0: # when adding, dates on end roll to next - normed = dtindex - off + normed = dtindex - off + Timedelta(1, 'D') - Timedelta(1, 'ns') roll = np.where(base_period.to_timestamp(how='end') == normed, self.n, self.n - 1) else: From fab723c858026001be99d0657244329907aa396d Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Fri, 14 Sep 2018 14:43:58 +0200 Subject: [PATCH 47/86] ASV: more for str.cat (#22652) --- asv_bench/benchmarks/strings.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index b203c8b0fa5c9..ccfac2f73f14d 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -1,7 +1,7 @@ import warnings import numpy as np -from pandas import Series +from pandas import Series, DataFrame import pandas.util.testing as tm @@ -12,9 +12,6 @@ class Methods(object): def setup(self): self.s = Series(tm.makeStringIndex(10**5)) - def time_cat(self): - self.s.str.cat(sep=',') - def time_center(self): self.s.str.center(100) @@ -87,6 +84,32 @@ def time_repeat(self, repeats): self.s.str.repeat(self.repeat) +class Cat(object): + + goal_time = 0.2 + params = ([0, 3], [None, ','], [None, '-'], [0.0, 0.001, 0.15]) + param_names = ['other_cols', 'sep', 'na_rep', 'na_frac'] + + def setup(self, other_cols, sep, na_rep, na_frac): + N = 10 ** 5 + mask_gen = lambda: np.random.choice([True, False], N, + p=[1 - na_frac, na_frac]) + self.s = Series(tm.makeStringIndex(N)).where(mask_gen()) + if other_cols == 0: + # str.cat self-concatenates only for others=None + self.others = None + else: + self.others = DataFrame({i: tm.makeStringIndex(N).where(mask_gen()) + for i in range(other_cols)}) + + def time_cat(self, other_cols, sep, na_rep, na_frac): + # before the concatenation (one caller + other_cols columns), the total + # expected fraction of rows containing any NaN is: + # reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0) + # for other_cols=3 and na_frac=0.15, this works out to ~48% + self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep) + + class Contains(object): goal_time = 0.2 From 1761dbcca5b827f0d50d8b93a7bc37f0553ccbc1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 15 Sep 2018 04:55:18 -0700 Subject: [PATCH 48/86] TST: Test for bug fixed during #22534 discussion (#22694) --- pandas/tests/frame/test_arithmetic.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index a6f4e0e38ec5d..9c61f13b944ea 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -100,6 +100,18 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname): # Arithmetic class TestFrameFlexArithmetic(object): + def test_df_add_td64_columnwise(self): + # GH#22534 Check that column-wise addition broadcasts correctly + dti = pd.date_range('2016-01-01', periods=10) + tdi = pd.timedelta_range('1', periods=10) + tser = pd.Series(tdi) + df = pd.DataFrame({0: dti, 1: tdi}) + + result = df.add(tser, axis=0) + expected = pd.DataFrame({0: dti + tdi, + 1: tdi + tdi}) + tm.assert_frame_equal(result, expected) + def test_df_add_flex_filled_mixed_dtypes(self): # GH#19611 dti = pd.date_range('2016-01-01', periods=3) From 93628c5bdf50ddcd6db8b1366402813a6d45926d Mon Sep 17 00:00:00 2001 From: ratijas Date: Sat, 15 Sep 2018 15:01:09 +0300 Subject: [PATCH 49/86] Fix broken link in install.rst (#22716) --- doc/source/install.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/install.rst b/doc/source/install.rst index 4640da8b8239a..7a846c817aee2 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -270,7 +270,7 @@ Optional Dependencies * For Excel I/O: * `xlrd/xlwt `__: Excel reading (xlrd) and writing (xlwt) - * `openpyxl `__: openpyxl version 2.4.0 + * `openpyxl `__: openpyxl version 2.4.0 for writing .xlsx files (xlrd >= 0.9.0) * `XlsxWriter `__: Alternative Excel writer From d9500968f438c82111c3741f10c49b60ae160c12 Mon Sep 17 00:00:00 2001 From: Troels Nielsen Date: Sat, 15 Sep 2018 14:12:55 +0200 Subject: [PATCH 50/86] BUG: Make sure that sas7bdat parsers memory is initialized to 0 (#21616) (#22651) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/io/sas/sas7bdat.py | 2 +- pandas/tests/io/sas/data/cars.sas7bdat | Bin 0 -> 13312 bytes pandas/tests/io/sas/test_sas7bdat.py | 16 ++++++++++++++++ 4 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/io/sas/data/cars.sas7bdat diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f2ec08c61a6d8..e2e7f9e1e2324 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -739,7 +739,7 @@ I/O - :func:`read_html()` no longer ignores all-whitespace ```` within ```` when considering the ``skiprows`` and ``header`` arguments. Previously, users had to decrease their ``header`` and ``skiprows`` values on such tables to work around the issue. (:issue:`21641`) - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) -- +- :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) Plotting ^^^^^^^^ diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index b2d930c1be5e7..efeb306b618d1 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -614,7 +614,7 @@ def read(self, nrows=None): ns = (self.column_types == b's').sum() self._string_chunk = np.empty((ns, nrows), dtype=np.object) - self._byte_chunk = np.empty((nd, 8 * nrows), dtype=np.uint8) + self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8) self._current_row_in_chunk_index = 0 p = Parser(self) diff --git a/pandas/tests/io/sas/data/cars.sas7bdat b/pandas/tests/io/sas/data/cars.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..ca5d3474c36ad4532d1b87d1f872491c9bee8f1f GIT binary patch literal 13312 zcmeHN4Q!R=6@K{{GHjN*HCrXbT^V77AQlP?k+HYsPh2G^2q{c|Mi1o-REDNvA6!KwU>bU zC^l+D2A??sBDiWtjZl%5O)GOtmbENi*mTp%+^4t}HLYsm*PSc>>8|HbHrX93<(~6l zJ>F?tt#hLSko>r2dIPQ-8rCdZogr{MWH;`sb7zBEgo2l2c}o_8va&)21% z>r&5^aq?Zr@R^q^k$iA7z03LV+|cV?{ujx!n;zlHHt${=5ZI(Ekbd=uzztE_0se!+ zGa_8s;@vw6o9&hPYY8MO=XzJ0Jo@$C?JfyC$GiIEvE1R^#6tojf&y0n z4?H38>77*B1 zN&;t{Kd4@s^Nh)^!y^3+Qv&*|)ihYr+-=@Hq(oj2rOPY5n^g_F)uMMC-vsoln^JPL5XT4G*$!YVy0T9Sz%YDWW$ zm~2pK%MN*ROjB%xU@oszo>)^NA--NMP5NA^Fo>KJ&mcWGAkS8SY`vFP^MsZ>q)Qsv z2zMxKcGJKm3WFzOlVu7wA`K;3((Gn&cN`Vil(0jLRJ#r4$oP%I785%E+Ebzg9IQ39`PPs?K~qGbD5Q%qA^K@3K86=u%&?!yiaGErI>_RdtAIU-I-oxFv)hQ zWs#r26%Y}BoX#lVILv-|9?Zc6OPBQ?dQ zgwj7%7)cY|9MOYHA0q@)yuDwRwAx`zeyDbvJA|hyoN^xSOs{sHCG``HpczQJU)EiL z^%Gk1J0+VDnQi7oNenc`x^<{xg`snis z#NV0azVwN|ljHF=Tpye}cW$mc^EQ@0Mvn;hQSy9g3g=p*1?P89YRqpQgj?T8O_S`+~op*Kwk)F-lx;OOld@xCDx#$f+Yk&~raLGl5 zO`t}E8FQ{T2LJTVc<`Az^$sXtpA(Gt4}sy9xj@M(>|f`>{&^lOqv$jG@E)$f8R-89 zuHWKbBWqpOFw%Vm&%5cgTq?_t)A>Wlx*3mfAzXz2*RJ&0_hPou-I4gm`#8BXCak$m z`TR+^g|7te)P!Kw@uOTdp_V5kRjc3y@1tQ+DitC(ctLsmg0zfRN9z0^`rnB3cN^no zgxianl25*e)-EF{-{#$l#|5^L#C^oE0|KXpI;P1fo%I-wIg!9fdRFK0H{+5HgJ9aD zA~M(D+q`>S=Y!3j0yf0c^x$N6d)VD^sM?ZV943!c>8A8cssy^l%L2T7>j{yb9PQ?C zEDi{4(j3$C4+{)SLmdC?pGc=7xXQbh#{^Doh*x7LrevFI zfd#x>VOx@?#=W|vNprKkMbnUy#{TDMNtQGN>5f-R8Z&7qRPP{3pE3HT2$fc!=p+1! zvMLJ?C^apY+!M~%G-S(lttQQ+xix=KVM{SL+cQ+jjA%&(^x1V(yo8dJfm;+d1H(~Y zsW~%BlY(51-4fV>Ni#d+&%?o+@is?lz5pe)i3@Z-9>>q0`3t4N#u4q_H6SrD zR?+UW(l`oP_+F7_V8tEQm@J5PmoWr1Ca&4wl@{1Ssz~6a&a|?R(v0_pesL#T z4J*_@gFSNjOLgaJa~^h28x$|68w@XB6gWi)(x;&GF|x5EpEx2)EDhw~dSz9XYA(-J`YUv+xAzY|a{ohk+mJlCz`T?8qy$BLf~QpGua zdlyG(j*Mi<@=#5)!LTG3syM%Kqp_+04kpsR@%X|hO|nf3?qrm32&j_N$b2rbgk-xu z5CXalW=EbLio`cIf<7Y6P35UT2*8l8Pn3{V{ncb+k3AGRM508b2~Q2Jx7c@is&ts&mXyit>xUmr3LyHT#S@adUdAG|2xLuw<9`L1o<)TJnN2q+9$uW zp3FrD0^P7Eq9Th9_Mij<@nNFb!guD$?1t`aBI^378y%&|Vt6x7uK_Oh{W%3AKW87M zG&+iLmbrgDP#G|)UILX|Z^7N2o793e8ao z8_nnt6ErdNl7EayCOr&X8l~yw&fq)w^+B_Qe@^ zL5c35c3_||mPD=zF~M&ZL}}lOJ)ZQR7Sm1ooRQgm+T3BNPJ|^hp<>JXV~{TP-2&-) zk2P?$d-q((4h%$*$S#6^i8)j96mSJgLjeZv4iSXiwj_^xr$TlBTc|2|{QnGzcb^fW z(uzO+D3Mb`8iCy&U@Qqs#6Z&i`VeoJVlkoaQpp3uHfLcB72lwAJwg~t3}!A{LgRq6 zx#L%PR|wJ|o4LaX>fiJW%~3Z~9Gg-KMqo!5Jr>NgFlHy&LLuI%B@wFS8Y+WXX~_`* zWHWvfez-*4a3^ITlq4HPv`2%PhDEB|VDfu?$wP&2>@}Fx&>iB9;?CAik{uL@6*t@C z{J=r*1Zj}$f zQiBv@_lz_Sjz+ZvO1`Mw-t2bkF}sCT?%J43!bOFfGVrq{&()nv#RSq6E>r4_Fnl(BF=wJ&H5F@bb*ggHsx?)Siu;^qj4+3=Vrnzn@SOCs%z-&7LcRW?T@ zq|bR^w#Q1*Q*+u9(r4ZQA5fQy3Fxy&VOx@xJfoKQxMT*-N>5XFj3>p7sh3TVmR#ns zhxoDfK@dVC*oOBlng*i}U#v{lz?vnl75~#urJJJ?Chh)`cj3w=Ew)~RIlP86mrJ4& xc8FwhLyav--EV2AQXE9Q?^8>RRV+X~l_cg2{~V6Ma0G@UFdTv52z>St_%D}_T}J=_ literal 0 HcmV?d00001 diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 101ee3e619f5b..efde152a918bd 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -183,6 +183,22 @@ def test_date_time(datapath): tm.assert_frame_equal(df, df0) +def test_compact_numerical_values(datapath): + # Regression test for #21616 + fname = datapath("io", "sas", "data", "cars.sas7bdat") + df = pd.read_sas(fname, encoding='latin-1') + # The two columns CYL and WGT in cars.sas7bdat have column + # width < 8 and only contain integral values. + # Test that pandas doesn't corrupt the numbers by adding + # decimals. + result = df['WGT'] + expected = df['WGT'].round() + tm.assert_series_equal(result, expected, check_exact=True) + result = df['CYL'] + expected = df['CYL'].round() + tm.assert_series_equal(result, expected, check_exact=True) + + def test_zero_variables(datapath): # Check if the SAS file has zero variables (PR #18184) fname = datapath("io", "sas", "data", "zero_variables.sas7bdat") From 831a527771a3dd9b94cd2e8bf112e4215caafaa4 Mon Sep 17 00:00:00 2001 From: Aaron Critchley Date: Sat, 15 Sep 2018 13:25:20 +0100 Subject: [PATCH 51/86] API: Make .shift always copy (Fixes #22397) (#22517) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/arrays/datetimelike.py | 2 +- pandas/core/generic.py | 2 +- pandas/tests/generic/test_series.py | 19 +++++++++++++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e2e7f9e1e2324..c7a28729df11f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -544,6 +544,7 @@ Other API Changes - :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`) - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`) - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`) +- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`) .. _whatsnew_0240.deprecations: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index eb8821382037d..12e1dd1052e0b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -548,7 +548,7 @@ def shift(self, n, freq=None): if n == 0: # immutable so OK - return self + return self.copy() if self.freq is None: raise NullFrequencyError("Cannot shift with no freq") diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2e5da21f573b0..cdc5b4310bce2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8282,7 +8282,7 @@ def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None, @Appender(_shared_docs['shift'] % _shared_doc_kwargs) def shift(self, periods=1, freq=None, axis=0): if periods == 0: - return self + return self.copy() block_axis = self._get_block_manager_axis(axis) if freq is None: diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index 3393d7704e411..f0c6c969f765a 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -227,3 +227,22 @@ def test_valid_deprecated(self): # GH18800 with tm.assert_produces_warning(FutureWarning): pd.Series([]).valid() + + @pytest.mark.parametrize("s", [ + Series([np.arange(5)]), + pd.date_range('1/1/2011', periods=24, freq='H'), + pd.Series(range(5), index=pd.date_range("2017", periods=5)) + ]) + @pytest.mark.parametrize("shift_size", [0, 1, 2]) + def test_shift_always_copy(self, s, shift_size): + # GH22397 + assert s.shift(shift_size) is not s + + @pytest.mark.parametrize("move_by_freq", [ + pd.Timedelta('1D'), + pd.Timedelta('1M'), + ]) + def test_datetime_shift_always_copy(self, move_by_freq): + # GH22397 + s = pd.Series(range(5), index=pd.date_range("2017", periods=5)) + assert s.shift(freq=move_by_freq) is not s From 2b818539b3dbe1db433cb0bac270f305a89539b8 Mon Sep 17 00:00:00 2001 From: Nicholas Musolino Date: Sat, 15 Sep 2018 08:37:25 -0400 Subject: [PATCH 52/86] TST: Add test of DataFrame.xs() with duplicates (#13719) (#22294) --- pandas/tests/test_multilevel.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index dcfeab55f94fc..ecd0af9c13d34 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,7 +10,8 @@ import numpy as np from pandas.core.index import Index, MultiIndex -from pandas import Panel, DataFrame, Series, notna, isna, Timestamp, read_csv +from pandas import (Panel, DataFrame, Series, notna, isna, Timestamp, concat, + read_csv) from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com @@ -486,6 +487,14 @@ def test_xs_partial(self): expected = df.loc['foo', 'one'] tm.assert_frame_equal(result, expected) + def test_xs_with_duplicates(self): + # Issue #13719 + df_dup = concat([self.frame] * 2) + assert not df_dup.index.is_unique + expected = concat([self.frame.xs('one', level='second')] * 2) + tm.assert_frame_equal(df_dup.xs('one', level='second'), expected) + tm.assert_frame_equal(df_dup.xs(['one'], level=['second']), expected) + def test_xs_level(self): result = self.frame.xs('two', level='second') expected = self.frame[self.frame.index.get_level_values(1) == 'two'] From e5d334f6592609eb02cbfee50cb0c231f84a07e3 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sat, 15 Sep 2018 10:53:41 -0700 Subject: [PATCH 53/86] DEPR: Standardize searchsorted signature (#22672) "value" is the law of the land. xref gh-14645. Follow-up to gh-15601. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/frozen.py | 24 ++++++++++++++---------- pandas/tests/indexes/test_frozen.py | 7 +++++++ 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index c7a28729df11f..649629714c3b1 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -560,6 +560,7 @@ Deprecations - :func:`pandas.read_table` is deprecated. Instead, use :func:`pandas.read_csv` passing ``sep='\t'`` if necessary (:issue:`21948`) - :meth:`Series.str.cat` has deprecated using arbitrary list-likes *within* list-likes. A list-like container may still contain many ``Series``, ``Index`` or 1-dimensional ``np.ndarray``, or alternatively, only scalar values. (:issue:`21950`) +- :meth:`FrozenNDArray.searchsorted` has deprecated the ``v`` parameter in favor of ``value`` (:issue:`14645`) .. _whatsnew_0240.prior_deprecations: diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 3c6b922178abf..5a37e03b700f9 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -10,6 +10,7 @@ import numpy as np from pandas.core.base import PandasObject +from pandas.util._decorators import deprecate_kwarg from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.io.formats.printing import pprint_thing @@ -117,10 +118,10 @@ def __unicode__(self): quote_strings=True) return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype) - def searchsorted(self, v, side='left', sorter=None): + @deprecate_kwarg(old_arg_name="v", new_arg_name="value") + def searchsorted(self, value, side="left", sorter=None): """ - Find indices where elements of v should be inserted - in a to maintain order. + Find indices to insert `value` so as to maintain order. For full documentation, see `numpy.searchsorted` @@ -129,17 +130,20 @@ def searchsorted(self, v, side='left', sorter=None): numpy.searchsorted : equivalent function """ - # we are much more performant if the searched - # indexer is the same type as the array - # this doesn't matter for int64, but DOES - # matter for smaller int dtypes - # https://github.com/numpy/numpy/issues/5370 + # We are much more performant if the searched + # indexer is the same type as the array. + # + # This doesn't matter for int64, but DOES + # matter for smaller int dtypes. + # + # xref: https://github.com/numpy/numpy/issues/5370 try: - v = self.dtype.type(v) + value = self.dtype.type(value) except: pass + return super(FrozenNDArray, self).searchsorted( - v, side=side, sorter=sorter) + value, side=side, sorter=sorter) def _ensure_frozen(array_like, categories, copy=False): diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index ca9841112b1d5..36d318e7a11aa 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -69,3 +69,10 @@ def test_values(self): assert isinstance(self.container, FrozenNDArray) tm.assert_numpy_array_equal(self.container.values(), original) assert vals[0] == n + + def test_searchsorted(self): + expected = 2 + assert self.container.searchsorted(7) == expected + + with tm.assert_produces_warning(FutureWarning): + assert self.container.searchsorted(v=7) == expected From 2ac80c401261bd47eebd6ab719b74d4ecd023706 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Sun, 16 Sep 2018 00:50:54 +0200 Subject: [PATCH 54/86] TST/CLN: break up & parametrize tests for df.set_index (#22236) --- pandas/tests/frame/conftest.py | 191 ++++++ pandas/tests/frame/test_alter_axes.py | 850 +++++++++++++------------- 2 files changed, 632 insertions(+), 409 deletions(-) create mode 100644 pandas/tests/frame/conftest.py diff --git a/pandas/tests/frame/conftest.py b/pandas/tests/frame/conftest.py new file mode 100644 index 0000000000000..fdedb93835d75 --- /dev/null +++ b/pandas/tests/frame/conftest.py @@ -0,0 +1,191 @@ +import pytest + +import numpy as np + +from pandas import compat +import pandas.util.testing as tm +from pandas import DataFrame, date_range, NaT + + +@pytest.fixture +def float_frame(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + return DataFrame(tm.getSeriesData()) + + +@pytest.fixture +def float_frame2(): + """ + Fixture for DataFrame of floats with index of unique strings + + Columns are ['D', 'C', 'B', 'A'] + """ + return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A']) + + +@pytest.fixture +def int_frame(): + """ + Fixture for DataFrame of ints with index of unique strings + + Columns are ['A', 'B', 'C', 'D'] + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(tm.getSeriesData())}) + # force these all to int64 to avoid platform testing issues + return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64) + + +@pytest.fixture +def datetime_frame(): + """ + Fixture for DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] + """ + return DataFrame(tm.getTimeSeriesData()) + + +@pytest.fixture +def float_string_frame(): + """ + Fixture for DataFrame of floats and strings with index of unique strings + + Columns are ['A', 'B', 'C', 'D', 'foo']. + """ + df = DataFrame(tm.getSeriesData()) + df['foo'] = 'bar' + return df + + +@pytest.fixture +def mixed_float_frame(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame(tm.getSeriesData()) + df.A = df.A.astype('float16') + df.B = df.B.astype('float32') + df.C = df.C.astype('float64') + return df + + +@pytest.fixture +def mixed_float_frame2(): + """ + Fixture for DataFrame of different float types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame(tm.getSeriesData()) + df.D = df.D.astype('float16') + df.C = df.C.astype('float32') + df.B = df.B.astype('float64') + return df + + +@pytest.fixture +def mixed_int_frame(): + """ + Fixture for DataFrame of different int types with index of unique strings + + Columns are ['A', 'B', 'C', 'D']. + """ + df = DataFrame({k: v.astype(int) + for k, v in compat.iteritems(tm.getSeriesData())}) + df.A = df.A.astype('uint8') + df.B = df.B.astype('int32') + df.C = df.C.astype('int64') + df.D = np.ones(len(df.D), dtype='uint64') + return df + + +@pytest.fixture +def mixed_type_frame(): + """ + Fixture for DataFrame of float/int/string columns with RangeIndex + + Columns are ['a', 'b', 'c', 'float32', 'int32']. + """ + return DataFrame({'a': 1., 'b': 2, 'c': 'foo', + 'float32': np.array([1.] * 10, dtype='float32'), + 'int32': np.array([1] * 10, dtype='int32')}, + index=np.arange(10)) + + +@pytest.fixture +def timezone_frame(): + """ + Fixture for DataFrame of date_range Series with different time zones + + Columns are ['A', 'B', 'C']; some entries are missing + """ + df = DataFrame({'A': date_range('20130101', periods=3), + 'B': date_range('20130101', periods=3, + tz='US/Eastern'), + 'C': date_range('20130101', periods=3, + tz='CET')}) + df.iloc[1, 1] = NaT + df.iloc[1, 2] = NaT + return df + + +@pytest.fixture +def empty_frame(): + """ + Fixture for empty DataFrame + """ + return DataFrame({}) + + +@pytest.fixture +def datetime_series(): + """ + Fixture for Series of floats with DatetimeIndex + """ + return tm.makeTimeSeries(nper=30) + + +@pytest.fixture +def datetime_series_short(): + """ + Fixture for Series of floats with DatetimeIndex + """ + return tm.makeTimeSeries(nper=30)[5:] + + +@pytest.fixture +def simple_frame(): + """ + Fixture for simple 3x3 DataFrame + + Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c']. + """ + arr = np.array([[1., 2., 3.], + [4., 5., 6.], + [7., 8., 9.]]) + + return DataFrame(arr, columns=['one', 'two', 'three'], + index=['a', 'b', 'c']) + + +@pytest.fixture +def frame_of_index_cols(): + """ + Fixture for DataFrame of columns that can be used for indexing + + Columns are ['A', 'B', 'C', 'D', 'E']; 'A' & 'B' contain duplicates (but + are jointly unique), the rest are unique. + """ + df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], + 'B': ['one', 'two', 'three', 'one', 'two'], + 'C': ['a', 'b', 'c', 'd', 'e'], + 'D': np.random.randn(5), + 'E': np.random.randn(5)}) + return df diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 4f95eb3fe7b47..4e61c9c62266d 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -10,213 +10,251 @@ import numpy as np from pandas.compat import lrange, PY2 -from pandas import (DataFrame, Series, Index, MultiIndex, - RangeIndex, date_range, IntervalIndex, - to_datetime) +from pandas import (DataFrame, Series, Index, MultiIndex, RangeIndex, + IntervalIndex, DatetimeIndex, Categorical, cut, + Timestamp, date_range, to_datetime) from pandas.core.dtypes.common import ( is_object_dtype, is_categorical_dtype, is_interval_dtype) -import pandas as pd - -from pandas.util.testing import assert_series_equal, assert_frame_equal import pandas.util.testing as tm -from pandas.tests.frame.common import TestData +class TestDataFrameAlterAxes(): + + def test_set_index_directly(self, float_string_frame): + df = float_string_frame + idx = Index(np.arange(len(df))[::-1]) -class TestDataFrameAlterAxes(TestData): + df.index = idx + tm.assert_index_equal(df.index, idx) + with tm.assert_raises_regex(ValueError, 'Length mismatch'): + df.index = idx[::2] - def test_set_index(self): - idx = Index(np.arange(len(self.mixed_frame))) + def test_set_index(self, float_string_frame): + df = float_string_frame + idx = Index(np.arange(len(df))[::-1]) - # cache it - _ = self.mixed_frame['foo'] # noqa - self.mixed_frame.index = idx - assert self.mixed_frame['foo'].index is idx + df = df.set_index(idx) + tm.assert_index_equal(df.index, idx) with tm.assert_raises_regex(ValueError, 'Length mismatch'): - self.mixed_frame.index = idx[::2] + df.set_index(idx[::2]) def test_set_index_cast(self): - # issue casting an index then set_index df = DataFrame({'A': [1.1, 2.2, 3.3], 'B': [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012]) - expected = df.loc[2010] - new_index = df.index.astype(np.int32) - df.index = new_index - result = df.loc[2010] - assert_series_equal(result, expected) - - def test_set_index2(self): - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5)}) - - # new object, single-column - result = df.set_index('C') - result_nodrop = df.set_index('C', drop=False) - - index = Index(df['C'], name='C') - - expected = df.loc[:, ['A', 'B', 'D', 'E']] - expected.index = index - - expected_nodrop = df.copy() - expected_nodrop.index = index - - assert_frame_equal(result, expected) - assert_frame_equal(result_nodrop, expected_nodrop) - assert result.index.name == index.name - - # inplace, single - df2 = df.copy() - - df2.set_index('C', inplace=True) - - assert_frame_equal(df2, expected) - - df3 = df.copy() - df3.set_index('C', drop=False, inplace=True) - - assert_frame_equal(df3, expected_nodrop) - - # create new object, multi-column - result = df.set_index(['A', 'B']) - result_nodrop = df.set_index(['A', 'B'], drop=False) - - index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) + df2 = df.set_index(df.index.astype(np.int32)) + tm.assert_frame_equal(df, df2) + + # A has duplicate values, C does not + @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) + @pytest.mark.parametrize('inplace', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_drop_inplace(self, frame_of_index_cols, + drop, inplace, keys): + df = frame_of_index_cols + + if isinstance(keys, list): + idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys) + else: + idx = Index(df[keys], name=keys) + expected = df.drop(keys, axis=1) if drop else df + expected.index = idx + + if inplace: + result = df.copy() + result.set_index(keys, drop=drop, inplace=True) + else: + result = df.set_index(keys, drop=drop) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_append(self, frame_of_index_cols, drop, keys): + df = frame_of_index_cols + + keys = keys if isinstance(keys, list) else [keys] + idx = MultiIndex.from_arrays([df.index] + [df[x] for x in keys], + names=[None] + keys) + expected = df.drop(keys, axis=1) if drop else df.copy() + expected.index = idx + + result = df.set_index(keys, drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + # A has duplicate values, C does not + @pytest.mark.parametrize('keys', ['A', 'C', ['A', 'B']]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_append_to_multiindex(self, frame_of_index_cols, + drop, keys): + # append to existing multiindex + df = frame_of_index_cols.set_index(['D'], drop=drop, append=True) - expected = df.loc[:, ['C', 'D', 'E']] - expected.index = index + keys = keys if isinstance(keys, list) else [keys] + expected = frame_of_index_cols.set_index(['D'] + keys, + drop=drop, append=True) - expected_nodrop = df.copy() - expected_nodrop.index = index + result = df.set_index(keys, drop=drop, append=True) - assert_frame_equal(result, expected) - assert_frame_equal(result_nodrop, expected_nodrop) - assert result.index.names == index.names + tm.assert_frame_equal(result, expected) - # inplace - df2 = df.copy() - df2.set_index(['A', 'B'], inplace=True) - assert_frame_equal(df2, expected) + def test_set_index_after_mutation(self): + # GH1590 + df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) + expected = DataFrame({'val': [1, 2]}, + Index(['b', 'c'], name='key')) - df3 = df.copy() - df3.set_index(['A', 'B'], drop=False, inplace=True) - assert_frame_equal(df3, expected_nodrop) + df2 = df.loc[df.index.map(lambda indx: indx >= 1)] + result = df2.set_index('key') + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for B) + @pytest.mark.parametrize('box', [Series, Index, np.array, + lambda x: MultiIndex.from_arrays([x])]) + @pytest.mark.parametrize('append, index_name', [(True, None), + (True, 'B'), (True, 'test'), (False, None)]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_single_array(self, frame_of_index_cols, + drop, append, index_name, box): + df = frame_of_index_cols + df.index.name = index_name + + key = box(df['B']) + # np.array and list "forget" the name of B + name = [None if box in [np.array, list] else 'B'] + + result = df.set_index(key, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, nothing is dropped + expected = df.set_index(['B'], drop=False, append=append) + expected.index.names = [index_name] + name if append else name + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # also test index name if append=True (name is duplicate here for A & B) + @pytest.mark.parametrize('box', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x])]) + @pytest.mark.parametrize('append, index_name', + [(True, None), (True, 'A'), (True, 'B'), + (True, 'test'), (False, None)]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_arrays(self, frame_of_index_cols, + drop, append, index_name, box): + df = frame_of_index_cols + df.index.name = index_name + + keys = ['A', box(df['B'])] + # np.array and list "forget" the name of B + names = ['A', None if box in [np.array, list] else 'B'] + + result = df.set_index(keys, drop=drop, append=append) + + # only valid column keys are dropped + # since B is always passed as array above, only A is dropped, if at all + expected = df.set_index(['A', 'B'], drop=False, append=append) + expected = expected.drop('A', axis=1) if drop else expected + expected.index.names = [index_name] + names if append else names + + tm.assert_frame_equal(result, expected) + + # MultiIndex constructor does not work directly on Series -> lambda + # We also emulate a "constructor" for the label -> lambda + # also test index name if append=True (name is duplicate here for A) + @pytest.mark.parametrize('box2', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name]) + @pytest.mark.parametrize('box1', [Series, Index, np.array, list, + lambda x: MultiIndex.from_arrays([x]), + lambda x: x.name]) + @pytest.mark.parametrize('append, index_name', [(True, None), + (True, 'A'), (True, 'test'), (False, None)]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, + append, index_name, box1, box2): + df = frame_of_index_cols + df.index.name = index_name + + keys = [box1(df['A']), box2(df['A'])] + + # == gives ambiguous Boolean for Series + if drop and keys[0] is 'A' and keys[1] is 'A': + with tm.assert_raises_regex(KeyError, '.*'): + df.set_index(keys, drop=drop, append=append) + else: + result = df.set_index(keys, drop=drop, append=append) + + # to test against already-tested behavior, we add sequentially, + # hence second append always True; must wrap in list, otherwise + # list-box will be illegal + expected = df.set_index([keys[0]], drop=drop, append=append) + expected = expected.set_index([keys[1]], drop=drop, append=True) + + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_pass_multiindex(self, frame_of_index_cols, + drop, append): + df = frame_of_index_cols + keys = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B']) + + result = df.set_index(keys, drop=drop, append=append) + + # setting with a MultiIndex will never drop columns + expected = df.set_index(['A', 'B'], drop=False, append=append) + + tm.assert_frame_equal(result, expected) + + def test_set_index_verify_integrity(self, frame_of_index_cols): + df = frame_of_index_cols - # corner case with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): df.set_index('A', verify_integrity=True) - - # append - result = df.set_index(['A', 'B'], append=True) - xp = df.reset_index().set_index(['index', 'A', 'B']) - xp.index.names = [None, 'A', 'B'] - assert_frame_equal(result, xp) - - # append to existing multiindex - rdf = df.set_index(['A'], append=True) - rdf = rdf.set_index(['B', 'C'], append=True) - expected = df.set_index(['A', 'B', 'C'], append=True) - assert_frame_equal(rdf, expected) - - # Series - result = df.set_index(df.C) - assert result.index.name == 'C' - - @pytest.mark.parametrize( - 'level', ['a', pd.Series(range(0, 8, 2), name='a')]) - def test_set_index_duplicate_names(self, level): - # GH18872 - GH19029 - df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b']) - - # Pass an existing level name: - df.index.name = 'a' - expected = pd.MultiIndex.from_tuples([(0, 0), (1, 2), (2, 4), (3, 6)], - names=['a', 'a']) - result = df.set_index(level, append=True) - tm.assert_index_equal(result.index, expected) - result = df.set_index([level], append=True) - tm.assert_index_equal(result.index, expected) - - # Pass twice the same level name (only works with passing actual data) - if isinstance(level, pd.Series): - result = df.set_index([level, level]) - expected = pd.MultiIndex.from_tuples( - [(0, 0), (2, 2), (4, 4), (6, 6)], names=['a', 'a']) - tm.assert_index_equal(result.index, expected) - - def test_set_index_nonuniq(self): - df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'], - 'B': ['one', 'two', 'three', 'one', 'two'], - 'C': ['a', 'b', 'c', 'd', 'e'], - 'D': np.random.randn(5), - 'E': np.random.randn(5)}) + # with MultiIndex with tm.assert_raises_regex(ValueError, 'Index has duplicate keys'): - df.set_index('A', verify_integrity=True, inplace=True) - assert 'A' in df - - def test_set_index_bug(self): - # GH1590 - df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']}) - xp = DataFrame({'val': [1, 2]}, - Index(['b', 'c'], name='key')) - - df2 = df.loc[df.index.map(lambda indx: indx >= 1)] - rs = df2.set_index('key') - assert_frame_equal(rs, xp) + df.set_index([df['A'], df['A']], verify_integrity=True) - def test_set_index_pass_arrays(self): - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_raise(self, frame_of_index_cols, drop, append): + df = frame_of_index_cols - # multiple columns - result = df.set_index(['A', df['B'].values], drop=False) - expected = df.set_index(['A', 'B'], drop=False) + with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E + df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append) - # TODO should set_index check_names ? - assert_frame_equal(result, expected, check_names=False) + # non-existent key in list with arrays + with tm.assert_raises_regex(KeyError, '.*'): + df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append) def test_construction_with_categorical_index(self): - ci = tm.makeCategoricalIndex(10) + ci.name = 'B' # with Categorical df = DataFrame({'A': np.random.randn(10), 'B': ci.values}) idf = df.set_index('B') - str(idf) - tm.assert_index_equal(idf.index, ci, check_names=False) - assert idf.index.name == 'B' + tm.assert_index_equal(idf.index, ci) # from a CategoricalIndex df = DataFrame({'A': np.random.randn(10), 'B': ci}) idf = df.set_index('B') - str(idf) - tm.assert_index_equal(idf.index, ci, check_names=False) - assert idf.index.name == 'B' - - idf = df.set_index('B').reset_index().set_index('B') - str(idf) - tm.assert_index_equal(idf.index, ci, check_names=False) - assert idf.index.name == 'B' + tm.assert_index_equal(idf.index, ci) - new_df = idf.reset_index() - new_df.index = df.B - tm.assert_index_equal(new_df.index, ci, check_names=False) - assert idf.index.name == 'B' + # round-trip + idf = idf.reset_index().set_index('B') + tm.assert_index_equal(idf.index, ci) def test_set_index_cast_datetimeindex(self): df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i) @@ -224,48 +262,46 @@ def test_set_index_cast_datetimeindex(self): 'B': np.random.randn(1000)}) idf = df.set_index('A') - assert isinstance(idf.index, pd.DatetimeIndex) + assert isinstance(idf.index, DatetimeIndex) + def test_convert_dti_to_series(self): # don't cast a DatetimeIndex WITH a tz, leave as object # GH 6032 - i = (pd.DatetimeIndex( - to_datetime(['2013-1-1 13:00', - '2013-1-2 14:00'], errors="raise")) - .tz_localize('US/Pacific')) + idx = DatetimeIndex(to_datetime(['2013-1-1 13:00', + '2013-1-2 14:00']), + name='B').tz_localize('US/Pacific') df = DataFrame(np.random.randn(2, 1), columns=['A']) - expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800', - tz='US/Pacific'), - pd.Timestamp('2013-01-02 14:00:00-0800', - tz='US/Pacific')], - dtype="object")) + expected = Series(np.array([Timestamp('2013-01-01 13:00:00-0800', + tz='US/Pacific'), + Timestamp('2013-01-02 14:00:00-0800', + tz='US/Pacific')], + dtype="object"), name='B') # convert index to series - result = Series(i) - assert_series_equal(result, expected) + result = Series(idx) + tm.assert_series_equal(result, expected) - # assignt to frame - df['B'] = i + # assign to frame + df['B'] = idx result = df['B'] - assert_series_equal(result, expected, check_names=False) - assert result.name == 'B' + tm.assert_series_equal(result, expected) - # keep the timezone - result = i.to_series(keep_tz=True) - assert_series_equal(result.reset_index(drop=True), expected) + # convert to series while keeping the timezone + result = idx.to_series(keep_tz=True, index=[0, 1]) + tm.assert_series_equal(result, expected) # convert to utc - df['C'] = i.to_series().reset_index(drop=True) - result = df['C'] - comp = pd.DatetimeIndex(expected.values) - comp = comp.tz_localize(None) - tm.assert_numpy_array_equal(result.values, comp.values) + df['B'] = idx.to_series(index=[0, 1]) + result = df['B'] + comp = Series(DatetimeIndex(expected.values).tz_localize(None), + name='B') + tm.assert_series_equal(result, comp) # list of datetimes with a tz - df['D'] = i.to_pydatetime() - result = df['D'] - assert_series_equal(result, expected, check_names=False) - assert result.name == 'D' + df['B'] = idx.to_pydatetime() + result = df['B'] + tm.assert_series_equal(result, expected) # GH 6785 # set the index manually @@ -275,96 +311,97 @@ def test_set_index_cast_datetimeindex(self): expected = df.set_index('ts') df.index = df['ts'] df.pop('ts') - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_reset_index_tz(self, tz_aware_fixture): # GH 3950 # reset_index with single level tz = tz_aware_fixture - idx = pd.date_range('1/1/2011', periods=5, - freq='D', tz=tz, name='idx') - df = pd.DataFrame( - {'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) - - expected = pd.DataFrame({'idx': [datetime(2011, 1, 1), - datetime(2011, 1, 2), - datetime(2011, 1, 3), - datetime(2011, 1, 4), - datetime(2011, 1, 5)], - 'a': range(5), - 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx', 'a', 'b']) - expected['idx'] = expected['idx'].apply( - lambda d: pd.Timestamp(d, tz=tz)) - assert_frame_equal(df.reset_index(), expected) + idx = date_range('1/1/2011', periods=5, + freq='D', tz=tz, name='idx') + df = DataFrame({'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, + index=idx) + + expected = DataFrame({'idx': [datetime(2011, 1, 1), + datetime(2011, 1, 2), + datetime(2011, 1, 3), + datetime(2011, 1, 4), + datetime(2011, 1, 5)], + 'a': range(5), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx', 'a', 'b']) + expected['idx'] = expected['idx'].apply(lambda d: Timestamp(d, tz=tz)) + tm.assert_frame_equal(df.reset_index(), expected) def test_set_index_timezone(self): # GH 12358 # tz-aware Series should retain the tz - i = pd.to_datetime(["2014-01-01 10:10:10"], - utc=True).tz_convert('Europe/Rome') - df = DataFrame({'i': i}) - assert df.set_index(i).index[0].hour == 11 - assert pd.DatetimeIndex(pd.Series(df.i))[0].hour == 11 - assert df.set_index(df.i).index[0].hour == 11 + idx = to_datetime(["2014-01-01 10:10:10"], + utc=True).tz_convert('Europe/Rome') + df = DataFrame({'A': idx}) + assert df.set_index(idx).index[0].hour == 11 + assert DatetimeIndex(Series(df.A))[0].hour == 11 + assert df.set_index(df.A).index[0].hour == 11 def test_set_index_dst(self): - di = pd.date_range('2006-10-29 00:00:00', periods=3, - freq='H', tz='US/Pacific') + di = date_range('2006-10-29 00:00:00', periods=3, + freq='H', tz='US/Pacific') - df = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, - index=di).reset_index() + df = DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, + index=di).reset_index() # single level res = df.set_index('index') - exp = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, - index=pd.Index(di, name='index')) + exp = DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]}, + index=Index(di, name='index')) tm.assert_frame_equal(res, exp) # GH 12920 res = df.set_index(['index', 'a']) - exp_index = pd.MultiIndex.from_arrays([di, [0, 1, 2]], - names=['index', 'a']) - exp = pd.DataFrame({'b': [3, 4, 5]}, index=exp_index) + exp_index = MultiIndex.from_arrays([di, [0, 1, 2]], + names=['index', 'a']) + exp = DataFrame({'b': [3, 4, 5]}, index=exp_index) tm.assert_frame_equal(res, exp) def test_reset_index_with_intervals(self): - idx = pd.IntervalIndex.from_breaks(np.arange(11), name='x') - original = pd.DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']] + idx = IntervalIndex.from_breaks(np.arange(11), name='x') + original = DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']] result = original.set_index('x') - expected = pd.DataFrame({'y': np.arange(10)}, index=idx) - assert_frame_equal(result, expected) + expected = DataFrame({'y': np.arange(10)}, index=idx) + tm.assert_frame_equal(result, expected) result2 = result.reset_index() - assert_frame_equal(result2, original) + tm.assert_frame_equal(result2, original) def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) df = DataFrame(np.random.randn(3, 3), columns=columns) - rs = df.set_index(df.columns[0]) - xp = df.iloc[:, 1:] - xp.index = df.iloc[:, 0].values - xp.index.names = [df.columns[0]] - assert_frame_equal(rs, xp) + result = df.set_index(df.columns[0]) + expected = df.iloc[:, 1:] + expected.index = df.iloc[:, 0].values + expected.index.names = [df.columns[0]] + tm.assert_frame_equal(result, expected) def test_set_index_empty_column(self): - # #1971 + # GH 1971 df = DataFrame([ - dict(a=1, p=0), - dict(a=2, m=10), - dict(a=3, m=11, p=20), - dict(a=4, m=12, p=21) + {'a': 1, 'p': 0}, + {'a': 2, 'm': 10}, + {'a': 3, 'm': 11, 'p': 20}, + {'a': 4, 'm': 12, 'p': 21} ], columns=('a', 'm', 'p', 'x')) - # it works! result = df.set_index(['a', 'x']) - repr(result) - - def test_set_columns(self): - cols = Index(np.arange(len(self.mixed_frame.columns))) - self.mixed_frame.columns = cols + expected = df[['m', 'p']] + expected.index = MultiIndex.from_arrays([df['a'], df['x']], + names=['a', 'x']) + tm.assert_frame_equal(result, expected) + + def test_set_columns(self, float_string_frame): + cols = Index(np.arange(len(float_string_frame.columns))) + float_string_frame.columns = cols with tm.assert_raises_regex(ValueError, 'Length mismatch'): - self.mixed_frame.columns = cols[::2] + float_string_frame.columns = cols[::2] def test_dti_set_index_reindex(self): # GH 6631 @@ -377,7 +414,7 @@ def test_dti_set_index_reindex(self): df = df.reindex(idx2) tm.assert_index_equal(df.index, idx2) - # 11314 + # GH 11314 # with tz index = date_range(datetime(2015, 10, 1), datetime(2015, 10, 1, 23), @@ -387,14 +424,12 @@ def test_dti_set_index_reindex(self): datetime(2015, 10, 2, 23), freq='H', tz='US/Eastern') - # TODO: unused? - result = df.set_index(new_index) # noqa - - assert new_index.freq == index.freq + result = df.set_index(new_index) + assert result.index.freq == index.freq # Renaming - def test_rename(self): + def test_rename(self, float_frame): mapping = { 'A': 'a', 'B': 'b', @@ -402,12 +437,12 @@ def test_rename(self): 'D': 'd' } - renamed = self.frame.rename(columns=mapping) - renamed2 = self.frame.rename(columns=str.lower) + renamed = float_frame.rename(columns=mapping) + renamed2 = float_frame.rename(columns=str.lower) - assert_frame_equal(renamed, renamed2) - assert_frame_equal(renamed2.rename(columns=str.upper), - self.frame, check_names=False) + tm.assert_frame_equal(renamed, renamed2) + tm.assert_frame_equal(renamed2.rename(columns=str.upper), + float_frame, check_names=False) # index data = { @@ -417,52 +452,49 @@ def test_rename(self): # gets sorted alphabetical df = DataFrame(data) renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'}) - tm.assert_index_equal(renamed.index, pd.Index(['foo', 'bar'])) + tm.assert_index_equal(renamed.index, Index(['foo', 'bar'])) renamed = df.rename(index=str.upper) - tm.assert_index_equal(renamed.index, pd.Index(['BAR', 'FOO'])) + tm.assert_index_equal(renamed.index, Index(['BAR', 'FOO'])) # have to pass something - pytest.raises(TypeError, self.frame.rename) + pytest.raises(TypeError, float_frame.rename) # partial columns - renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'}) - tm.assert_index_equal(renamed.columns, - pd.Index(['A', 'B', 'foo', 'bar'])) + renamed = float_frame.rename(columns={'C': 'foo', 'D': 'bar'}) + tm.assert_index_equal(renamed.columns, Index(['A', 'B', 'foo', 'bar'])) # other axis - renamed = self.frame.T.rename(index={'C': 'foo', 'D': 'bar'}) - tm.assert_index_equal(renamed.index, - pd.Index(['A', 'B', 'foo', 'bar'])) + renamed = float_frame.T.rename(index={'C': 'foo', 'D': 'bar'}) + tm.assert_index_equal(renamed.index, Index(['A', 'B', 'foo', 'bar'])) # index with name index = Index(['foo', 'bar'], name='name') renamer = DataFrame(data, index=index) renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'}) tm.assert_index_equal(renamed.index, - pd.Index(['bar', 'foo'], name='name')) + Index(['bar', 'foo'], name='name')) assert renamed.index.name == renamer.index.name - def test_rename_axis_inplace(self): + def test_rename_axis_inplace(self, float_frame): # GH 15704 - frame = self.frame.copy() - expected = frame.rename_axis('foo') - result = frame.copy() + expected = float_frame.rename_axis('foo') + result = float_frame.copy() no_return = result.rename_axis('foo', inplace=True) assert no_return is None - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) - expected = frame.rename_axis('bar', axis=1) - result = frame.copy() + expected = float_frame.rename_axis('bar', axis=1) + result = float_frame.copy() no_return = result.rename_axis('bar', axis=1, inplace=True) assert no_return is None - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_rename_axis_warns(self): # https://github.com/pandas-dev/pandas/issues/17833 - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}) + df = DataFrame({"A": [1, 2], "B": [1, 2]}) with tm.assert_produces_warning(FutureWarning) as w: df.rename_axis(id, axis=0) assert 'rename' in str(w[0].message) @@ -554,23 +586,23 @@ def test_rename_multiindex(self): level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self): - renamed = self.frame.rename(columns={'C': 'foo'}, copy=False) + def test_rename_nocopy(self, float_frame): + renamed = float_frame.rename(columns={'C': 'foo'}, copy=False) renamed['foo'] = 1. - assert (self.frame['C'] == 1.).all() + assert (float_frame['C'] == 1.).all() - def test_rename_inplace(self): - self.frame.rename(columns={'C': 'foo'}) - assert 'C' in self.frame - assert 'foo' not in self.frame + def test_rename_inplace(self, float_frame): + float_frame.rename(columns={'C': 'foo'}) + assert 'C' in float_frame + assert 'foo' not in float_frame - c_id = id(self.frame['C']) - frame = self.frame.copy() - frame.rename(columns={'C': 'foo'}, inplace=True) + c_id = id(float_frame['C']) + float_frame = float_frame.copy() + float_frame.rename(columns={'C': 'foo'}, inplace=True) - assert 'C' not in frame - assert 'foo' in frame - assert id(frame['foo']) != c_id + assert 'C' not in float_frame + assert 'foo' in float_frame + assert id(float_frame['foo']) != c_id def test_rename_bug(self): # GH 5344 @@ -585,7 +617,7 @@ def test_rename_bug(self): [('foo', 'bah'), ('bar', 'bas')], names=['a', 'b']), columns=['2001-01-01']) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_rename_bug2(self): # GH 19497 @@ -596,7 +628,7 @@ def test_rename_bug2(self): df = df.rename({(1, 1): (5, 4)}, axis="index") expected = DataFrame(data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], @@ -608,11 +640,11 @@ def test_reorder_levels(self): # no change, position result = df.reorder_levels([0, 1, 2]) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # no change, labels result = df.reorder_levels(['L0', 'L1', 'L2']) - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) # rotate, position result = df.reorder_levels([1, 2, 0]) @@ -623,7 +655,7 @@ def test_reorder_levels(self): names=['L1', 'L2', 'L0']) expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=e_idx) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.reorder_levels([0, 0, 0]) e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']], @@ -633,13 +665,13 @@ def test_reorder_levels(self): names=['L0', 'L0', 'L0']) expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=e_idx) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.reorder_levels(['L0', 'L0', 'L0']) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) - def test_reset_index(self): - stacked = self.frame.stack()[::2] + def test_reset_index(self, float_frame): + stacked = float_frame.stack()[::2] stacked = DataFrame({'foo': stacked, 'bar': stacked}) names = ['first', 'second'] @@ -659,63 +691,62 @@ def test_reset_index(self): check_names=False) # default name assigned - rdf = self.frame.reset_index() - exp = pd.Series(self.frame.index.values, name='index') + rdf = float_frame.reset_index() + exp = Series(float_frame.index.values, name='index') tm.assert_series_equal(rdf['index'], exp) # default name assigned, corner case - df = self.frame.copy() + df = float_frame.copy() df['index'] = 'foo' rdf = df.reset_index() - exp = pd.Series(self.frame.index.values, name='level_0') + exp = Series(float_frame.index.values, name='level_0') tm.assert_series_equal(rdf['level_0'], exp) # but this is ok - self.frame.index.name = 'index' - deleveled = self.frame.reset_index() - tm.assert_series_equal(deleveled['index'], - pd.Series(self.frame.index)) + float_frame.index.name = 'index' + deleveled = float_frame.reset_index() + tm.assert_series_equal(deleveled['index'], Series(float_frame.index)) tm.assert_index_equal(deleveled.index, - pd.Index(np.arange(len(deleveled)))) + Index(np.arange(len(deleveled)))) # preserve column names - self.frame.columns.name = 'columns' - resetted = self.frame.reset_index() + float_frame.columns.name = 'columns' + resetted = float_frame.reset_index() assert resetted.columns.name == 'columns' # only remove certain columns - frame = self.frame.reset_index().set_index(['index', 'A', 'B']) - rs = frame.reset_index(['A', 'B']) + df = float_frame.reset_index().set_index(['index', 'A', 'B']) + rs = df.reset_index(['A', 'B']) # TODO should reset_index check_names ? - assert_frame_equal(rs, self.frame, check_names=False) + tm.assert_frame_equal(rs, float_frame, check_names=False) - rs = frame.reset_index(['index', 'A', 'B']) - assert_frame_equal(rs, self.frame.reset_index(), check_names=False) + rs = df.reset_index(['index', 'A', 'B']) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - rs = frame.reset_index(['index', 'A', 'B']) - assert_frame_equal(rs, self.frame.reset_index(), check_names=False) + rs = df.reset_index(['index', 'A', 'B']) + tm.assert_frame_equal(rs, float_frame.reset_index(), check_names=False) - rs = frame.reset_index('A') - xp = self.frame.reset_index().set_index(['index', 'B']) - assert_frame_equal(rs, xp, check_names=False) + rs = df.reset_index('A') + xp = float_frame.reset_index().set_index(['index', 'B']) + tm.assert_frame_equal(rs, xp, check_names=False) # test resetting in place - df = self.frame.copy() - resetted = self.frame.reset_index() + df = float_frame.copy() + resetted = float_frame.reset_index() df.reset_index(inplace=True) - assert_frame_equal(df, resetted, check_names=False) + tm.assert_frame_equal(df, resetted, check_names=False) - frame = self.frame.reset_index().set_index(['index', 'A', 'B']) - rs = frame.reset_index('A', drop=True) - xp = self.frame.copy() + df = float_frame.reset_index().set_index(['index', 'A', 'B']) + rs = df.reset_index('A', drop=True) + xp = float_frame.copy() del xp['A'] xp = xp.set_index(['B'], append=True) - assert_frame_equal(rs, xp, check_names=False) + tm.assert_frame_equal(rs, xp, check_names=False) def test_reset_index_level(self): - df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], - columns=['A', 'B', 'C', 'D']) + df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], + columns=['A', 'B', 'C', 'D']) for levels in ['A', 'B'], [0, 1]: # With MultiIndex @@ -772,17 +803,17 @@ def test_reset_index_multiindex_col(self): rs = df.reset_index() xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'], ['', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index(col_fill=None) xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index(col_level=1, col_fill='blah') xp = DataFrame(full, columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) df = DataFrame(vals, MultiIndex.from_arrays([[0, 1, 2], ['x', 'y', 'z']], @@ -792,73 +823,73 @@ def test_reset_index_multiindex_col(self): xp = DataFrame(full, Index([0, 1, 2], name='d'), columns=[['a', 'b', 'b', 'c'], ['', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill=None) xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill='blah', col_level=1) xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_reset_index_multiindex_nan(self): # GH6322, testing reset_index on MultiIndexes # when we have a nan or all nan - df = pd.DataFrame({'A': ['a', 'b', 'c'], - 'B': [0, 1, np.nan], - 'C': np.random.rand(3)}) + df = DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, np.nan], + 'C': np.random.rand(3)}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) - df = pd.DataFrame({'A': [np.nan, 'b', 'c'], - 'B': [0, 1, 2], - 'C': np.random.rand(3)}) + df = DataFrame({'A': [np.nan, 'b', 'c'], + 'B': [0, 1, 2], + 'C': np.random.rand(3)}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) - df = pd.DataFrame({'A': ['a', 'b', 'c'], - 'B': [0, 1, 2], - 'C': [np.nan, 1.1, 2.2]}) + df = DataFrame({'A': ['a', 'b', 'c'], + 'B': [0, 1, 2], + 'C': [np.nan, 1.1, 2.2]}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) - df = pd.DataFrame({'A': ['a', 'b', 'c'], - 'B': [np.nan, np.nan, np.nan], - 'C': np.random.rand(3)}) + df = DataFrame({'A': ['a', 'b', 'c'], + 'B': [np.nan, np.nan, np.nan], + 'C': np.random.rand(3)}) rs = df.set_index(['A', 'B']).reset_index() - assert_frame_equal(rs, df) + tm.assert_frame_equal(rs, df) def test_reset_index_with_datetimeindex_cols(self): # GH5818 # - df = pd.DataFrame([[1, 2], [3, 4]], - columns=pd.date_range('1/1/2013', '1/2/2013'), - index=['A', 'B']) + df = DataFrame([[1, 2], [3, 4]], + columns=date_range('1/1/2013', '1/2/2013'), + index=['A', 'B']) result = df.reset_index() - expected = pd.DataFrame([['A', 1, 2], ['B', 3, 4]], - columns=['index', datetime(2013, 1, 1), - datetime(2013, 1, 2)]) - assert_frame_equal(result, expected) + expected = DataFrame([['A', 1, 2], ['B', 3, 4]], + columns=['index', datetime(2013, 1, 1), + datetime(2013, 1, 2)]) + tm.assert_frame_equal(result, expected) def test_reset_index_range(self): # GH 12071 - df = pd.DataFrame([[0, 0], [1, 1]], columns=['A', 'B'], - index=RangeIndex(stop=2)) + df = DataFrame([[0, 0], [1, 1]], columns=['A', 'B'], + index=RangeIndex(stop=2)) result = df.reset_index() assert isinstance(result.index, RangeIndex) - expected = pd.DataFrame([[0, 0, 0], [1, 1, 1]], - columns=['index', 'A', 'B'], - index=RangeIndex(stop=2)) - assert_frame_equal(result, expected) + expected = DataFrame([[0, 0, 0], [1, 1, 1]], + columns=['index', 'A', 'B'], + index=RangeIndex(stop=2)) + tm.assert_frame_equal(result, expected) def test_set_index_names(self): - df = pd.util.testing.makeDataFrame() + df = tm.makeDataFrame() df.index.name = 'name' assert df.set_index(df.index).index.names == ['name'] @@ -886,63 +917,63 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_rename_objects(self): - renamed = self.mixed_frame.rename(columns=str.upper) + def test_rename_objects(self, float_string_frame): + renamed = float_string_frame.rename(columns=str.upper) assert 'FOO' in renamed assert 'foo' not in renamed def test_rename_axis_style(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y']) - expected = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y']) + expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) result = df.rename(str.lower, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename(str.lower, axis='columns') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({"A": 'a', 'B': 'b'}, axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({"A": 'a', 'B': 'b'}, axis='columns') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Index - expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) result = df.rename(str.lower, axis=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename(str.lower, axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({'X': 'x', 'Y': 'y'}, axis=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename({'X': 'x', 'Y': 'y'}, axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.rename(mapper=str.lower, axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_rename_mapper_multi(self): - df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], - 'C': [1, 2]}).set_index(["A", "B"]) + df = DataFrame({"A": ['a', 'b'], "B": ['c', 'd'], + 'C': [1, 2]}).set_index(["A", "B"]) result = df.rename(str.upper) expected = df.rename(index=str.upper) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_rename_positional_named(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) + df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y']) result = df.rename(str.lower, columns=str.upper) - expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) - assert_frame_equal(result, expected) + expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y']) + tm.assert_frame_equal(result, expected) def test_rename_axis_style_raises(self): # https://github.com/pandas-dev/pandas/issues/12392 - df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) + df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1']) # Named target and axis with tm.assert_raises_regex(TypeError, None): @@ -1000,41 +1031,42 @@ def test_reindex_api_equivalence(self): tm.assert_frame_equal(res1, res) def test_rename_positional(self): - df = pd.DataFrame(columns=['A', 'B']) + df = DataFrame(columns=['A', 'B']) with tm.assert_produces_warning(FutureWarning) as rec: result = df.rename(None, str.lower) - expected = pd.DataFrame(columns=['a', 'b']) - assert_frame_equal(result, expected) + expected = DataFrame(columns=['a', 'b']) + tm.assert_frame_equal(result, expected) assert len(rec) == 1 message = str(rec[0].message) assert 'rename' in message assert 'Use named arguments' in message - def test_assign_columns(self): - self.frame['hi'] = 'there' + def test_assign_columns(self, float_frame): + float_frame['hi'] = 'there' - frame = self.frame.copy() - frame.columns = ['foo', 'bar', 'baz', 'quux', 'foo2'] - assert_series_equal(self.frame['C'], frame['baz'], check_names=False) - assert_series_equal(self.frame['hi'], frame['foo2'], check_names=False) + df = float_frame.copy() + df.columns = ['foo', 'bar', 'baz', 'quux', 'foo2'] + tm.assert_series_equal(float_frame['C'], df['baz'], check_names=False) + tm.assert_series_equal(float_frame['hi'], df['foo2'], + check_names=False) def test_set_index_preserve_categorical_dtype(self): # GH13743, GH13854 df = DataFrame({'A': [1, 2, 1, 1, 2], 'B': [10, 16, 22, 28, 34], - 'C1': pd.Categorical(list("abaab"), - categories=list("bac"), - ordered=False), - 'C2': pd.Categorical(list("abaab"), - categories=list("bac"), - ordered=True)}) + 'C1': Categorical(list("abaab"), + categories=list("bac"), + ordered=False), + 'C2': Categorical(list("abaab"), + categories=list("bac"), + ordered=True)}) for cols in ['C1', 'C2', ['A', 'C1'], ['A', 'C2'], ['C1', 'C2']]: result = df.set_index(cols).reset_index() result = result.reindex(columns=df.columns) tm.assert_frame_equal(result, df) def test_ambiguous_warns(self): - df = pd.DataFrame({"A": [1, 2]}) + df = DataFrame({"A": [1, 2]}) with tm.assert_produces_warning(FutureWarning): df.rename(id, id) @@ -1043,14 +1075,14 @@ def test_ambiguous_warns(self): @pytest.mark.skipif(PY2, reason="inspect.signature") def test_rename_signature(self): - sig = inspect.signature(pd.DataFrame.rename) + sig = inspect.signature(DataFrame.rename) parameters = set(sig.parameters) assert parameters == {"self", "mapper", "index", "columns", "axis", "inplace", "copy", "level"} @pytest.mark.skipif(PY2, reason="inspect.signature") def test_reindex_signature(self): - sig = inspect.signature(pd.DataFrame.reindex) + sig = inspect.signature(DataFrame.reindex) parameters = set(sig.parameters) assert parameters == {"self", "labels", "index", "columns", "axis", "limit", "copy", "level", "method", @@ -1058,25 +1090,25 @@ def test_reindex_signature(self): def test_droplevel(self): # GH20342 - df = pd.DataFrame([ + df = DataFrame([ [1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12] ]) df = df.set_index([0, 1]).rename_axis(['a', 'b']) - df.columns = pd.MultiIndex.from_tuples([('c', 'e'), ('d', 'f')], - names=['level_1', 'level_2']) + df.columns = MultiIndex.from_tuples([('c', 'e'), ('d', 'f')], + names=['level_1', 'level_2']) # test that dropping of a level in index works expected = df.reset_index('a', drop=True) result = df.droplevel('a', axis='index') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # test that dropping of a level in columns works expected = df.copy() - expected.columns = pd.Index(['c', 'd'], name='level_1') + expected.columns = Index(['c', 'd'], name='level_1') result = df.droplevel('level_2', axis='columns') - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) class TestIntervalIndex(object): @@ -1084,7 +1116,7 @@ class TestIntervalIndex(object): def test_setitem(self): df = DataFrame({'A': range(10)}) - s = pd.cut(df.A, 5) + s = cut(df.A, 5) assert isinstance(s.cat.categories, IntervalIndex) # B & D end up as Categoricals @@ -1122,7 +1154,7 @@ def test_setitem(self): def test_set_reset_index(self): df = DataFrame({'A': range(10)}) - s = pd.cut(df.A, 5) + s = cut(df.A, 5) df['B'] = s df = df.set_index('B') From a507946c8cd06c21cc3ddae60069217707bc8f8f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 16 Sep 2018 14:39:52 -0500 Subject: [PATCH 55/86] TST: Mock clipboard IO (#22715) * Attempt to fix clipboard tests * note * update * update * doc --- pandas/tests/io/test_clipboard.py | 62 +++++++++++++++++++++++++++---- setup.cfg | 1 + 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index a6b331685e72a..bb73c6bc6b38b 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -13,7 +13,6 @@ from pandas.util import testing as tm from pandas.util.testing import makeCustomDataframe as mkdf from pandas.io.clipboard.exceptions import PyperclipException -from pandas.io.clipboard import clipboard_set, clipboard_get try: @@ -76,10 +75,53 @@ def df(request): raise ValueError +@pytest.fixture +def mock_clipboard(mock, request): + """Fixture mocking clipboard IO. + + This mocks pandas.io.clipboard.clipboard_get and + pandas.io.clipboard.clipboard_set. + + This uses a local dict for storing data. The dictionary + key used is the test ID, available with ``request.node.name``. + + This returns the local dictionary, for direct manipulation by + tests. + """ + + # our local clipboard for tests + _mock_data = {} + + def _mock_set(data): + _mock_data[request.node.name] = data + + def _mock_get(): + return _mock_data[request.node.name] + + mock_set = mock.patch("pandas.io.clipboard.clipboard_set", + side_effect=_mock_set) + mock_get = mock.patch("pandas.io.clipboard.clipboard_get", + side_effect=_mock_get) + with mock_get, mock_set: + yield _mock_data + + +@pytest.mark.clipboard +def test_mock_clipboard(mock_clipboard): + import pandas.io.clipboard + pandas.io.clipboard.clipboard_set("abc") + assert "abc" in set(mock_clipboard.values()) + result = pandas.io.clipboard.clipboard_get() + assert result == "abc" + + @pytest.mark.single +@pytest.mark.clipboard @pytest.mark.skipif(not _DEPS_INSTALLED, reason="clipboard primitives not installed") +@pytest.mark.usefixtures("mock_clipboard") class TestClipboard(object): + def check_round_trip_frame(self, data, excel=None, sep=None, encoding=None): data.to_clipboard(excel=excel, sep=sep, encoding=encoding) @@ -118,15 +160,18 @@ def test_copy_delim_warning(self, df): # delimited and excel="True" @pytest.mark.parametrize('sep', ['\t', None, 'default']) @pytest.mark.parametrize('excel', [True, None, 'default']) - def test_clipboard_copy_tabs_default(self, sep, excel, df): + def test_clipboard_copy_tabs_default(self, sep, excel, df, request, + mock_clipboard): kwargs = build_kwargs(sep, excel) df.to_clipboard(**kwargs) if PY2: # to_clipboard copies unicode, to_csv produces bytes. This is # expected behavior - assert clipboard_get().encode('utf-8') == df.to_csv(sep='\t') + result = mock_clipboard[request.node.name].encode('utf-8') + expected = df.to_csv(sep='\t') + assert result == expected else: - assert clipboard_get() == df.to_csv(sep='\t') + assert mock_clipboard[request.node.name] == df.to_csv(sep='\t') # Tests reading of white space separated tables @pytest.mark.parametrize('sep', [None, 'default']) @@ -138,7 +183,8 @@ def test_clipboard_copy_strings(self, sep, excel, df): assert result.to_string() == df.to_string() assert df.shape == result.shape - def test_read_clipboard_infer_excel(self): + def test_read_clipboard_infer_excel(self, request, + mock_clipboard): # gh-19010: avoid warnings clip_kwargs = dict(engine="python") @@ -147,7 +193,7 @@ def test_read_clipboard_infer_excel(self): 1 2 4 Harry Carney """.strip()) - clipboard_set(text) + mock_clipboard[request.node.name] = text df = pd.read_clipboard(**clip_kwargs) # excel data is parsed correctly @@ -159,7 +205,7 @@ def test_read_clipboard_infer_excel(self): 1 2 3 4 """.strip()) - clipboard_set(text) + mock_clipboard[request.node.name] = text res = pd.read_clipboard(**clip_kwargs) text = dedent(""" @@ -167,7 +213,7 @@ def test_read_clipboard_infer_excel(self): 1 2 3 4 """.strip()) - clipboard_set(text) + mock_clipboard[request.node.name] = text exp = pd.read_clipboard(**clip_kwargs) tm.assert_frame_equal(res, exp) diff --git a/setup.cfg b/setup.cfg index 5fc0236066b93..021159bad99de 100644 --- a/setup.cfg +++ b/setup.cfg @@ -38,6 +38,7 @@ markers = slow: mark a test as slow network: mark a test as network high_memory: mark a test as a high-memory only + clipboard: mark a pd.read_clipboard test doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL addopts = --strict-data-files From 9fe3faf63d2fde13fb209e578a58672ec014c82f Mon Sep 17 00:00:00 2001 From: Sandrine Pataut Date: Mon, 17 Sep 2018 18:10:25 +0100 Subject: [PATCH 56/86] removing superfluous reference to axis in Series.reorder_levels docstring (#22734) --- pandas/core/series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a4d403e4bcd94..ba34a3e95e5d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2886,7 +2886,6 @@ def reorder_levels(self, order): ---------- order : list of int representing new level order. (reference level by number or key) - axis : where to reorder levels Returns ------- From 7afa8a0db57e18e84249cebe71f883e9b2475762 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 17 Sep 2018 12:10:55 -0700 Subject: [PATCH 57/86] CLN/DOC: Refactor timeseries.rst intro and overview (#22728) * CLN/DOC: Refactor timeseries.rst intro and overview * Address review * Forgot missing is --- doc/source/timeseries.rst | 117 ++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 36 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 5dfac98d069e7..71bc064ffb0c2 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -21,51 +21,59 @@ Time Series / Date functionality ******************************** -pandas has proven very successful as a tool for working with time series data, -especially in the financial data analysis space. Using the NumPy ``datetime64`` and ``timedelta64`` dtypes, -we have consolidated a large number of features from other Python libraries like ``scikits.timeseries`` as well as created +pandas contains extensive capabilities and features for working with time series data for all domains. +Using the NumPy ``datetime64`` and ``timedelta64`` dtypes, pandas has consolidated a large number of +features from other Python libraries like ``scikits.timeseries`` as well as created a tremendous amount of new functionality for manipulating time series data. -In working with time series data, we will frequently seek to: +For example, pandas supports: -* generate sequences of fixed-frequency dates and time spans -* conform or convert time series to a particular frequency -* compute "relative" dates based on various non-standard time increments - (e.g. 5 business days before the last business day of the year), or "roll" - dates forward or backward +Parsing time series information from various sources and formats -pandas provides a relatively compact and self-contained set of tools for -performing the above tasks. +.. ipython:: python + + dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), datetime(2018, 1, 1)]) + dti -Create a range of dates: +Generate sequences of fixed-frequency dates and time spans .. ipython:: python - # 72 hours starting with midnight Jan 1st, 2011 - rng = pd.date_range('1/1/2011', periods=72, freq='H') - rng[:5] + dti = pd.date_range('2018-01-01', periods=3, freq='H') + dti -Index pandas objects with dates: +Manipulating and converting date times with timezone information .. ipython:: python - ts = pd.Series(np.random.randn(len(rng)), index=rng) - ts.head() + dti = dti.tz_localize('UTC') + dti + dti.tz_convert('US/Pacific') -Change frequency and fill gaps: +Resampling or converting a time series to a particular frequency .. ipython:: python - # to 45 minute frequency and forward fill - converted = ts.asfreq('45Min', method='pad') - converted.head() + idx = pd.date_range('2018-01-01', periods=5, freq='H') + ts = pd.Series(range(len(idx)), index=idx) + ts + ts.resample('2H').mean() -Resample the series to a daily frequency: +Performing date and time arithmetic with absolute or relative time increments .. ipython:: python - # Daily means - ts.resample('D').mean() + friday = pd.Timestamp('2018-01-05') + friday.day_name() + # Add 1 day + saturday = friday + pd.Timedelta('1 day') + saturday.day_name() + # Add 1 business day (Friday --> Monday) + monday = friday + pd.tseries.offsets.BDay() + monday.day_name() + +pandas provides a relatively compact and self-contained set of tools for +performing the above tasks and more. .. _timeseries.overview: @@ -73,17 +81,54 @@ Resample the series to a daily frequency: Overview -------- -The following table shows the type of time-related classes pandas can handle and -how to create them. +pandas captures 4 general time related concepts: + +#. Date times: A specific date and time with timezone support. Similar to ``datetime.datetime`` from the standard library. +#. Time deltas: An absolute time duration. Similar to ``datetime.timedelta`` from the standard library. +#. Time spans: A span of time defined by a point in time and its associated frequency. +#. Date offsets: A relative time duration that respects calendar arithmetic. Similar to ``dateutil.relativedelta.relativedelta`` from the ``dateutil`` package. -================= =============================== =================================================================== -Class Remarks How to create -================= =============================== =================================================================== -``Timestamp`` Represents a single timestamp ``to_datetime``, ``Timestamp`` -``DatetimeIndex`` Index of ``Timestamp`` ``to_datetime``, ``date_range``, ``bdate_range``, ``DatetimeIndex`` -``Period`` Represents a single time span ``Period`` -``PeriodIndex`` Index of ``Period`` ``period_range``, ``PeriodIndex`` -================= =============================== =================================================================== +===================== ================= =================== ============================================ ======================================== +Concept Scalar Class Array Class pandas Data Type Primary Creation Method +===================== ================= =================== ============================================ ======================================== +Date times ``Timestamp`` ``DatetimeIndex`` ``datetime64[ns]`` or ``datetime64[ns, tz]`` ``to_datetime`` or ``date_range`` +Time deltas ``Timedelta`` ``TimedeltaIndex`` ``timedelta64[ns]`` ``to_timedelta`` or ``timedelta_range`` +Time spans ``Period`` ``PeriodIndex`` ``period[freq]`` ``Period`` or ``period_range`` +Date offsets ``DateOffset`` ``None`` ``None`` ``DateOffset`` +===================== ================= =================== ============================================ ======================================== + +For time series data, it's conventional to represent the time component in the index of a :class:`Series` or :class:`DataFrame` +so manipulations can be performed with respect to the time element. + +.. ipython:: python + + pd.Series(range(3), index=pd.date_range('2000', freq='D', periods=3)) + +However, :class:`Series` and :class:`DataFrame` can directly also support the time component as data itself. + +.. ipython:: python + + pd.Series(pd.date_range('2000', freq='D', periods=3)) + +:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime`` and ``timedelta`` +data when the time data is used as data itself. The ``Period`` and ``DateOffset`` data will be stored as ``object`` data. + +.. ipython:: python + + pd.Series(pd.period_range('1/1/2011', freq='M', periods=3)) + pd.Series(pd.date_range('1/1/2011', freq='M', periods=3)) + +Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which +is useful for representing missing or null date like values and behaves similar +as ``np.nan`` does for float data. + +.. ipython:: python + + pd.Timestamp(pd.NaT) + pd.Timedelta(pd.NaT) + pd.Period(pd.NaT) + # Equality acts as np.nan would + pd.NaT == pd.NaT .. _timeseries.representation: @@ -1443,7 +1488,7 @@ time. The method for this is :meth:`~Series.shift`, which is available on all of the pandas objects. .. ipython:: python - + ts = pd.Series(range(len(rng)), index=rng) ts = ts[:5] ts.shift(1) From 006c0133f445c30f5bf8d04155a91a2087832487 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 18 Sep 2018 04:10:56 -0700 Subject: [PATCH 58/86] CLN: Remove unused imports in pyx files (#22739) --- pandas/_libs/sparse.pyx | 3 --- pandas/_libs/tslib.pyx | 2 -- 2 files changed, 5 deletions(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 2993114a668bb..d852711d3b707 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -1,7 +1,4 @@ # -*- coding: utf-8 -*- -import operator -import sys - import cython import numpy as np diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 16fea0615f199..9012ebefe0975 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -15,8 +15,6 @@ import numpy as np cnp.import_array() import pytz -from dateutil.tz import tzlocal, tzutc as dateutil_utc - from util cimport (is_integer_object, is_float_object, is_string_object, is_datetime64_object) From 845b21a82f704c25349bf09103fe5a1189abf616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vit=C3=B3ria=20Helena?= Date: Tue, 18 Sep 2018 08:15:24 -0300 Subject: [PATCH 59/86] CLN: Removes module pandas.json (#22737) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/__init__.py | 3 --- pandas/json.py | 7 ------- pandas/tests/api/test_api.py | 9 +-------- 4 files changed, 2 insertions(+), 18 deletions(-) delete mode 100644 pandas/json.py diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 649629714c3b1..34eb5d8d7ed0f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -577,6 +577,7 @@ Removal of prior version deprecations/changes - Removed the ``pandas.formats.style`` shim for :class:`pandas.io.formats.style.Styler` (:issue:`16059`) - :meth:`Categorical.searchsorted` and :meth:`Series.searchsorted` have renamed the ``v`` argument to ``value`` (:issue:`14645`) - :meth:`TimedeltaIndex.searchsorted`, :meth:`DatetimeIndex.searchsorted`, and :meth:`PeriodIndex.searchsorted` have renamed the ``key`` argument to ``value`` (:issue:`14645`) +- Removal of the previously deprecated module ``pandas.json`` (:issue:`19944`) .. _whatsnew_0240.performance: diff --git a/pandas/__init__.py b/pandas/__init__.py index 97ae73174c09c..f91d0aa84e0ff 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -61,9 +61,6 @@ # extension module deprecations from pandas.util._depr_module import _DeprecatedModule -json = _DeprecatedModule(deprmod='pandas.json', - moved={'dumps': 'pandas.io.json.dumps', - 'loads': 'pandas.io.json.loads'}) parser = _DeprecatedModule(deprmod='pandas.parser', removals=['na_values'], moved={'CParserError': 'pandas.errors.ParserError'}) diff --git a/pandas/json.py b/pandas/json.py deleted file mode 100644 index 16d6580c87951..0000000000000 --- a/pandas/json.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -import warnings -warnings.warn("The pandas.json module is deprecated and will be " - "removed in a future version. Please import from " - "pandas.io.json instead", FutureWarning, stacklevel=2) -from pandas._libs.json import dumps, loads diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index bf9e14b427015..199700b304a4e 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -35,7 +35,7 @@ class TestPDApi(Base): 'util', 'options', 'io'] # these are already deprecated; awaiting removal - deprecated_modules = ['parser', 'json', 'lib', 'tslib'] + deprecated_modules = ['parser', 'lib', 'tslib'] # misc misc = ['IndexSlice', 'NaT'] @@ -173,13 +173,6 @@ def test_get_store(self): s.close() -class TestJson(object): - - def test_deprecation_access_func(self): - with catch_warnings(record=True): - pd.json.dumps([]) - - class TestParser(object): def test_deprecation_access_func(self): From 3ec461fbc326dda6198b89642e80dc5e6b037318 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Tue, 18 Sep 2018 12:17:20 +0100 Subject: [PATCH 60/86] TST/CLN: remove duplicate data file used in tests (unicode_series.csv) (#22723) --- .../tests/io/formats/data/unicode_series.csv | 18 ------------------ pandas/tests/io/formats/test_format.py | 2 +- 2 files changed, 1 insertion(+), 19 deletions(-) delete mode 100644 pandas/tests/io/formats/data/unicode_series.csv diff --git a/pandas/tests/io/formats/data/unicode_series.csv b/pandas/tests/io/formats/data/unicode_series.csv deleted file mode 100644 index 2485e149edb06..0000000000000 --- a/pandas/tests/io/formats/data/unicode_series.csv +++ /dev/null @@ -1,18 +0,0 @@ -1617,King of New York (1990) -1618,All Things Fair (1996) -1619,"Sixth Man, The (1997)" -1620,Butterfly Kiss (1995) -1621,"Paris, France (1993)" -1622,"C�r�monie, La (1995)" -1623,Hush (1998) -1624,Nightwatch (1997) -1625,Nobody Loves Me (Keiner liebt mich) (1994) -1626,"Wife, The (1995)" -1627,Lamerica (1994) -1628,Nico Icon (1995) -1629,"Silence of the Palace, The (Saimt el Qusur) (1994)" -1630,"Slingshot, The (1993)" -1631,Land and Freedom (Tierra y libertad) (1995) -1632,� k�ldum klaka (Cold Fever) (1994) -1633,Etz Hadomim Tafus (Under the Domin Tree) (1994) -1634,Two Friends (1986) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c19f8e57f9ae7..ffbc978b92ba5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -955,7 +955,7 @@ def test_unicode_problem_decoding_as_ascii(self): compat.text_type(dm.to_string()) def test_string_repr_encoding(self, datapath): - filepath = datapath('io', 'formats', 'data', 'unicode_series.csv') + filepath = datapath('io', 'parser', 'data', 'unicode_series.csv') df = pd.read_csv(filepath, header=None, encoding='latin1') repr(df) repr(df[1]) From 9465a596c5916742f3411d63f37b1c402370f499 Mon Sep 17 00:00:00 2001 From: Troels Nielsen Date: Tue, 18 Sep 2018 14:13:45 +0200 Subject: [PATCH 61/86] BUG: Some sas7bdat files with many columns are not parseable by read_sas (#22628) --- doc/source/whatsnew/v0.24.0.txt | 2 + pandas/io/sas/sas.pyx | 10 +-- pandas/io/sas/sas7bdat.py | 61 ++++++++++-------- pandas/tests/io/sas/data/load_log.sas7bdat | Bin 0 -> 589824 bytes pandas/tests/io/sas/data/many_columns.csv | 4 ++ .../tests/io/sas/data/many_columns.sas7bdat | Bin 0 -> 81920 bytes pandas/tests/io/sas/test_sas7bdat.py | 16 +++++ 7 files changed, 62 insertions(+), 31 deletions(-) create mode 100644 pandas/tests/io/sas/data/load_log.sas7bdat create mode 100644 pandas/tests/io/sas/data/many_columns.csv create mode 100644 pandas/tests/io/sas/data/many_columns.sas7bdat diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 34eb5d8d7ed0f..cccbe47073fbd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -743,6 +743,8 @@ I/O - :func:`read_excel()` will correctly show the deprecation warning for previously deprecated ``sheetname`` (:issue:`17994`) - :func:`read_csv()` will correctly parse timezone-aware datetimes (:issue:`22256`) - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) +- :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) +- :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) Plotting ^^^^^^^^ diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 221c07a0631d2..a5bfd5866a261 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -244,8 +244,8 @@ cdef class Parser(object): self.parser = parser self.header_length = self.parser.header_length self.column_count = parser.column_count - self.lengths = parser._column_data_lengths - self.offsets = parser._column_data_offsets + self.lengths = parser.column_data_lengths() + self.offsets = parser.column_data_offsets() self.byte_chunk = parser._byte_chunk self.string_chunk = parser._string_chunk self.row_length = parser.row_length @@ -257,7 +257,7 @@ cdef class Parser(object): # page indicators self.update_next_page() - column_types = parser.column_types + column_types = parser.column_types() # map column types for j in range(self.column_count): @@ -375,7 +375,7 @@ cdef class Parser(object): if done: return True return False - elif self.current_page_type == page_data_type: + elif self.current_page_type & page_data_type == page_data_type: self.process_byte_array_with_data( bit_offset + subheader_pointers_offset + self.current_row_on_page_index * self.row_length, @@ -437,7 +437,7 @@ cdef class Parser(object): elif column_types[j] == column_type_string: # string string_chunk[js, current_row] = np.array(source[start:( - start + lngt)]).tostring().rstrip() + start + lngt)]).tostring().rstrip(b"\x00 ") js += 1 self.current_row_on_page_index += 1 diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index efeb306b618d1..3582f538c16bf 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -82,7 +82,6 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self.compression = "" self.column_names_strings = [] self.column_names = [] - self.column_types = [] self.column_formats = [] self.columns = [] @@ -90,6 +89,8 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._cached_page = None self._column_data_lengths = [] self._column_data_offsets = [] + self._column_types = [] + self._current_row_in_file_index = 0 self._current_row_on_page_index = 0 self._current_row_in_file_index = 0 @@ -102,6 +103,19 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._get_properties() self._parse_metadata() + def column_data_lengths(self): + """Return a numpy int64 array of the column data lengths""" + return np.asarray(self._column_data_lengths, dtype=np.int64) + + def column_data_offsets(self): + """Return a numpy int64 array of the column offsets""" + return np.asarray(self._column_data_offsets, dtype=np.int64) + + def column_types(self): + """Returns a numpy character array of the column types: + s (string) or d (double)""" + return np.asarray(self._column_types, dtype=np.dtype('S1')) + def close(self): try: self.handle.close() @@ -287,8 +301,10 @@ def _process_page_meta(self): pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types if self._current_page_type in pt: self._process_page_metadata() - return ((self._current_page_type in [256] + const.page_mix_types) or - (self._current_page_data_subheader_pointers is not None)) + is_data_page = self._current_page_type & const.page_data_type + is_mix_page = self._current_page_type in const.page_mix_types + return (is_data_page or is_mix_page + or self._current_page_data_subheader_pointers != []) def _read_page_header(self): bit_offset = self._page_bit_offset @@ -503,12 +519,6 @@ def _process_columnattributes_subheader(self, offset, length): int_len = self._int_length column_attributes_vectors_count = ( length - 2 * int_len - 12) // (int_len + 8) - self.column_types = np.empty( - column_attributes_vectors_count, dtype=np.dtype('S1')) - self._column_data_lengths = np.empty( - column_attributes_vectors_count, dtype=np.int64) - self._column_data_offsets = np.empty( - column_attributes_vectors_count, dtype=np.int64) for i in range(column_attributes_vectors_count): col_data_offset = (offset + int_len + const.column_data_offset_offset + @@ -520,16 +530,13 @@ def _process_columnattributes_subheader(self, offset, length): const.column_type_offset + i * (int_len + 8)) x = self._read_int(col_data_offset, int_len) - self._column_data_offsets[i] = x + self._column_data_offsets.append(x) x = self._read_int(col_data_len, const.column_data_length_length) - self._column_data_lengths[i] = x + self._column_data_lengths.append(x) x = self._read_int(col_types, const.column_type_length) - if x == 1: - self.column_types[i] = b'd' - else: - self.column_types[i] = b's' + self._column_types.append(b'd' if x == 1 else b's') def _process_columnlist_subheader(self, offset, length): # unknown purpose @@ -586,7 +593,7 @@ def _process_format_subheader(self, offset, length): col.name = self.column_names[current_column_number] col.label = column_label col.format = column_format - col.ctype = self.column_types[current_column_number] + col.ctype = self._column_types[current_column_number] col.length = self._column_data_lengths[current_column_number] self.column_formats.append(column_format) @@ -599,7 +606,7 @@ def read(self, nrows=None): elif nrows is None: nrows = self.row_count - if len(self.column_types) == 0: + if len(self._column_types) == 0: self.close() raise EmptyDataError("No columns to parse from file") @@ -610,8 +617,8 @@ def read(self, nrows=None): if nrows > m: nrows = m - nd = (self.column_types == b'd').sum() - ns = (self.column_types == b's').sum() + nd = self._column_types.count(b'd') + ns = self._column_types.count(b's') self._string_chunk = np.empty((ns, nrows), dtype=np.object) self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8) @@ -639,11 +646,13 @@ def _read_next_page(self): self._page_length)) self._read_page_header() - if self._current_page_type == const.page_meta_type: + page_type = self._current_page_type + if page_type == const.page_meta_type: self._process_page_metadata() - pt = [const.page_meta_type, const.page_data_type] - pt += [const.page_mix_types] - if self._current_page_type not in pt: + + is_data_page = page_type & const.page_data_type + pt = [const.page_meta_type] + const.page_mix_types + if not is_data_page and self._current_page_type not in pt: return self._read_next_page() return False @@ -660,7 +669,7 @@ def _chunk_to_dataframe(self): name = self.column_names[j] - if self.column_types[j] == b'd': + if self._column_types[j] == b'd': rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) @@ -674,7 +683,7 @@ def _chunk_to_dataframe(self): rslt[name] = pd.to_datetime(rslt[name], unit=unit, origin="1960-01-01") jb += 1 - elif self.column_types[j] == b's': + elif self._column_types[j] == b's': rslt[name] = self._string_chunk[js, :] if self.convert_text and (self.encoding is not None): rslt[name] = rslt[name].str.decode( @@ -686,6 +695,6 @@ def _chunk_to_dataframe(self): else: self.close() raise ValueError("unknown column type %s" % - self.column_types[j]) + self._column_types[j]) return rslt diff --git a/pandas/tests/io/sas/data/load_log.sas7bdat b/pandas/tests/io/sas/data/load_log.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..dc78925471baf4cc3dea8d568d27c59327c7d578 GIT binary patch literal 589824 zcmeI53%F!gS*CY~iya0YVL-*m<%lg839-+mZvsv=Bp^dBNIDMq5W_LunY4D&q5A}a z2oy7zAjAPBOpyB}ARyN?GAN)$AmqY;10)h6LPSJRK@bE%B(wHewf5R;|KIwn`XIaN z`=3=joIbT{RcF2HTkl@CI;X2DKCb%vXFc)A-R|<|@B6*Y)v?zsUV8U?-}x@f_o+yqh58v&AUJTw@-M~t>3(PcT=7En}s(Is;d6aZ>l!e&s&dOCv08Wx_HX+ z$@|a0@S@iBr4zQFu)Or3ZCh7PSloWX(h04X*1x@I=k~LkH**Uo?fob{f@uzxMR{6wtU+^KXKFe`uNXl|J6T#LMT4Ie*F0Q%R1LT|1E#v z7dqFk7+)WR>*GK7X0C6^T>tzx{TsV~&lQ>Ldo$O!WUhbyU+~`_u8*r-g=TWkSDt^~ z&AazJcK4~LFDxuPZ1?GBEp1)gzPQyr0T1{3qfgpXzi@_D47^iZI(+cL0|zeLx+q>4 zUpRej_58i7hxRU3xqmYAp}I%^ zc-If1e;VOS)(?-`b4q^k;O-WW+H=w={-)vcLyl@UmDX#$-Cx@04-FrZFX7`{KLkEF zI3#>D^UvgbCGh3;m%9Bk6LYH@X20#Le`3B&#R>V6^+S`N9lm7!V303aKN#do)(;II z8r+lnC+GSh@J%~DWSSv+(`>!a*Zng&UkQBEjt{haQ{F#m`7#ynj(?eAt!|k8cDDU9 zQ*lB*v3>~aBeXWC`RdaL*3U21_m{wj6pVbl>xVHP93mng@A@I+H%xaOHhWO|o@A_fPH`Vxn+9176W}o$S{}k(o zkl!@p12f+=;{!7vs(bX0^ZcPb|1`q)og2XCBMp5%OYnvJ3(Al1d@SK>_FoX3yVckJ z`8z(1e}YfTkMRC-D9jWm$Lq$x_A9UWm1gnvzhQU{$(m&_fKDA&irb=;2%Ql`iIJ76H8y{E5B1eg~q>3 z#iM_I{g)}0lHg}EaQ!1ud)C zvi#M){1bd3KZGd#rI?jJ(z`p3rC_=n(te~f&=KZMxzkAUxszk$ZT%*3+mU#3`o)Qhk5<)7dS z`JuXZ{X=CUg8JP|_!|E*6_?|m%AmbCGd?Z9u78-a-4tq^Oa07{O0s(U+W{m7xF`OkN$D;HU1$u*Zy_?^fB=AY57I}X!!o& zF!B#=?z-Tje}4VGZT`>y1YgJxAx8f=`5ONaT;d-upO#FXEfXH(M1Vl(3l;~zro z`iII$6nLdCzUN$n)_<9b6Z$9kwEPJDOZW`>C-}7d2>si9+(jGN|D?Hp3O+4ALjMvz zgZ>FVEx+O)9Lv$gzQ*??Kj&BT`6V>O7ynRH@Y+oBtM4zy{JZ~;J=OI1C-}7d2>na= z432+-Ps@*R{Cm!?BmcI;dtU4O&!2B*is3h>SNpnu3cipZs(bX0ldtg)!MXOY`=^hA zmru(t`bWd}#XsASe}4U!i8(b5KkLgs!KdXH{o~}D-alSGEx+g=4d3&wLjIw{73Y!f zpS4h)Z(IBPefv>;7^W}(1YgJxA$I*^<7@mwa4xkt{&D74^J)2Y{bS3|^*{gl7qmHH z%s+%^#yX>kfVfEnmnF zA)4_I!5Q-pA)4_o^ZTn(=QZkk4JRdIoz5lkGj{Cmee^Yh84ZeR=ZFcW`RdufozTdy?+-l4B_5R0H zcl^HI|B&kYYyacp`+fYku2k1Pv#!$d<9cnb{!_JSy{+mCx2e8;YgK*k*4vG*eaAif zuKRy{e0Ti!JZF5VZsq#1{^xCvsqXBbRtxo??)ld7^E=dkZa${^x$*N&r+k+W7fG{ipl;UOIk07(X9BUH@ZTwOfCoxwvJ_zcT*wL`Pdco=_dT{^zCD zUDp4+wyM=}k4?8-|6`%)_v=sHmwzAs@-n0AkJtsZp!2Vx7fAze5 z)yd;O&)aw2`twu9f9_pfTRpsQZO>CL*jJtCzh9`I9K2xPq1F0#9J{~%wDA4HntNs8 zey85=q37>=%JB>LJL7&2z3>9}H|*PcLQ~%TCHwa5cmKtz{`>YVte(GjVa+{XIC##& z+PUtJ@RzyQckelA&!bK`skwCe!3z(ay|40rxZmD-LcjyfF|MBa4_q+d+`!Ae*?&|sfec$1#I{V;x=j}Uxt$O(Q zPv27Guj9@W+R(Rk)%4M%?0>+MJC5ZEeKtgQ`|;o1eI7Cn*F*if&qF^8*KgPP`@-|@ zjSGfy!R7Ar@vS-W4B>xg{P&^P+Sk54Wx4iyn7{v~d(`z0_oBe^E%&Iu|LXX{6Ym~g z@qgZB>wVV0xn+ELar_tfd&Bsm`=k5!hI{8JO_7kOzwo2ur=g#OKbzv^8?^;SM=tL+ zxmrH=Vo2qfae?rBe6M$7oPYQ+Am;q=PWk_k%8dWc^pDL|xW`o0-CF(Zj_cpwqje>H zU;6B}j%1S?$--R8zq%#hRiIxtuhnHgkZJGb@SoY#{9T#fpTBJdEfLJDUlE)e@9XuQ zp98S}{zROgne{9BaIOo^_s=>(&iq{c$N&D%wiSeE#y{}7@X{}7xp{}7@X|ImkXjroTV&G?7ljQNKU&G?u3SeNgg zE=d0Khh>Cl#y?G~*wFGv*&c zB=qkW?)Dou@80v+-KU=J$G5YVwk~d8M4L1*yY z$*YI2Z0)mVcCd^}lbcRlM$>zM;<0ignu(J)cQ_t$)84 zcF~r9EPTi8x9y+cGv;5yXV5>vr{zcJ-?jUmW_tV+d|G~l<6pvO&_BVa%l;Jk zhdx~3W+8F?ck251o+*arE8S~-&0hpx$Pd*$`p3!F_=n(J0&)Jy%ctcR{iEUg)4Tm9 z82`}WX5wF1KOEY3&cXv1#xfqLf0Weyr0Hr`Ht@VJ{{&yi4+gkzK|cdg4U{S(!_>mMqUVq~B7#rMTuMf1;0#R>frd|G~l{v~_{ z{S$mzeuVy=`w83r2|g`9LjMvzgZ>FVEk8#8-uZN}|6*tSJGaZH<;Un>o6n+uT|O;8 zLjMk*`3&G+X8%miKU@2sI~V40|5NaV{1Bq_kBhJI55c*mPTfEKiYKi9`1rK^q<>s| zuKyj?{^yPbKE9A2+CMYnAA&RHA3`+a-_-L@UjGoH8UGNRG5-*v8UN5Pw{iLT{#nmp z{QMFC`X~6b{0RL^_ze0d__X{8{rkqT8}iS8zAmHbG`E<(?k|E*%P;!J z$v3@!ynI@I(LWl#LqGXUu>YA6i*WoCd|H0dKbrY? zEx+g=3tx4?8?fARgZp27J0OQu*|VA7+f{ZGjk z{6lr``iII)0vo?R6TW8r%TzqhKRNTO`Lz7H{$-kBnh1N!uX_EzoAyugY5DQ_m-5;3 zPx5K`@%cA8d(X4L{%7XK!8iU%J}o~!|584C{z*P9KR*Ae2cG&rEc++hp*A`4_hF4lAieJY5gbpwDm*RKQ_KF|IFoA_fMaUpPw=EY58^iBj9`P z%WeB7__X{8{Y&@^`X~6b{P_GEeft}x{gZrJetiC=eD?g4d|G~d{#B3u>cw*ZGdF+X zn}13^Ek8d0Qa*eBNj@zKDY#oe@l4(B#wXn zd^1xFzd60y)BcO(3;ChCNB=na8vhWSYyY}``WSfmwEUufEPTVCcoFc={oo>eME}D6 zS?l~^X&%qtCEqwdh#39j`% zmtWmKeKLN2#>l7T*Y%Hp?>&!r&PMiMXz~+$T7HE7C42_`6MR~Jg#LYH*KeElPw;8^ z5&D<#8T3!^Y5DQ_H(337)BZ_5Ek8d0Qa*eBNj@z%Z3d z-|{@3|4F`aeh@MG$H^D`gK&v|ynI@I(LYW;*Z+>{=WmzVzyF%dKlBj;GyWksWBwsT zEB?X5OrDekC9Kyuj?NH-<8{*E6?AzqQTvDt(JdhpL1wo|KYVX4KUAXXqy{l{c_Aa<9OIuejIJC5NaeMy1SNX?K z@Qw3>h%Nsp`GS8C&b3v`KUzL5zm|V2e1E+I_?NqXw*0Gmf38^GiulST|4v_9J%8`& zp}h-dtZNtFIOTW2Kf$NnKU@A$@&*5L!Lna=4EiVd zwEXz|tA74D&y(}d-2Ibp{2LF0|G|G;b>#BR=&xK37dLI$H2zk!uhxI~{7?IjJ^v-2 zmLH%0!#7`Q+JDKX<;Uk=%4g3%$*1MV=ilh+TabS%c$F8&U;p{OOfmfC^lDG*PstbZ zLv@e-aq>0(Avo9mb^r7+@bYQ-MgM5{esSBSVE#LK|8=~+ZhiiAWgb6&Iud*#KZF?l z^PgW_ZhyXyfUofn!6p9j@@e@+|2X+v|MS0p1|6=t{Nnh>%NO!Ph-UmlaK`*Yh-UoD z9GqJ64+UnHjkG=AV*J%a6~$l+T`jl26Nz(7*S6+O~g! zPs@+czl6`Ae}YfTkI%p1U4Qm}f$?t}UV_K*&!2B*is3h>S9@B&OTLgFs(bX0ldtg) z!MXOY`=^hAmru(t`p3do{p_=lf9P<<1sDAb`)955_igid{x11Keh4x8$H~|Dhu{+b zc=@#aqJNxxuK)S-cXYU7%s+%^#y5>MM%(@gJ}o~&{}Mif{s}%UKR*A4ul?u?!2Ty1+}&MKj(@HB=k|Hr zf02A4KZGd#Y4Zd zT`>M-ez95f&!4Ykip|mbLGp$BC#rk&kCU$%{}5c_A1|MlU-XZKZ+O<)?`_0Czkbie zJV*XXJ}tlKA1B}R{_*l@`9=R&_(m_f$h3cwPs@+bzm(6Of09qjkI%p1^FECH+kuC$ z?))=T48J+O+SC5E#`o6#(vErD|CD?oKZF?lFEk8d0(){fCC;7De`24FjUu)Vw$*1MV=U>Wa z&p*kh<;UmW-~sn~5g7k=;+1zC|NQw%rWk&6dbOwZyW|V`p}I%^IQbg?5S(lOx_|l@ zc=@#aqJJ!W!xPU#{-MoX7hLqupRep_|N96#=kfec@`d~mV)T!bukjDTCI0d9Y57I} zIQd-v^XGq4um4)-?_~ZVL^J*&IAi`HL_YsUH(Y?mzs&rlJN{*g`6-V6wWs-~d{qKq%kLj8Dt2>)-tI4IcPG)BZ_5Ek8d0Qa*eB zNj@zW&FR&i*6)%p zGHEQ{vkv& z{vkMH{vkv&{$&m>E%}EK&G?7ljQNKU&G?sDKUne)A)4_I!5Q-pA)4_I4en+Bd0=&I z-@@M2wN=*?lOBET?ZF^q~e*Pg-aYFwDpOzn?e+i#K{{)|wAD@52kNxWJ zTlP=#Y5DQ_m-5;3Px5K`5&C!He%t;DJ}o~&{}Mif{s}%UKR*Ae8~+6Ox4acE0pk2~ z{pUM!#qgWct39nBB;PncSoi23CtvUn!nyXZ`=^hAmru(t`p3dIT)V?d!2AoJ)@h?}*PjU3GJjbo`^^3*%p|;&J}DK0f4%>B;$5 zXOdsdr{&l2kAQFVsk2P`C;7De`20)x?D;48wEXz|8(jTIru~zAT7G=~rF{1MlYCl! zeEyAYJ@Qhy|B@TH`R1RJPs@+bzm(6Of09qjkI%pAySINC@NW?>0iu6?e>YPMzd60y z)A~X3h5S(6qko)yjeiKvwSV0|eGI&OT7J<#7QVqHj|2WKq0J>1T=Xxj-&^~ii}QH? zAo<4mLB!}ECtvUn!X^Ik@@e@+|2X+v|64zQn123A>mNk4;va;wr#Sl8p5~vDPn*AV{bS<`<6kbnIREtH10$c7U)R6+=c_*PcGLbzJ}o~! z|584C{z*P9KR*A4Z@t~iH?sf2a{rWkT7G=~rF{1MlYCl!g#PXPcia96J}o~&{}Mif z{s}%UKR*AeJ0J53%j2Kq)AHlo-@`e0R-J^e;e2sqy&b5EtKYa|md|H0dKNh~Kx&rw(@#p`-`fuLPA0%JM4`=6N`2jBQt{Xia9AGv%p`iqyt#Z6l_`EO^G|2*H)LhJl-c`kqcQ1FHP5MtLq zHonF`1n2UL;~!^!HJ_GW*FU!WT>tauk5lje@cM@k&G?7ljQNKU`TQIH<7sI8%SPx5K`@%fkX+4E2GY55WQcjVQ! z{S$mzeuVxdd;CCu;N{cu zi~iB@J^6*mKeV~)f{Xrz{j=8j!?t-me;)|GkRL*f{&Dg({vo);KVCj9zvv$)pX-1A z{C(>6AFqE1(Tslx&X|7)k+%MVoPw;8^5&D<#8T3!^Y5DQ_H$3y< zuiwc23(Nge@@e_;`Iqw9^H1_=`4Ree{f<8{?VsS&@+0&w;WOx;;M4Ns^KbN_focCF zpOzn=e<`0m|0JK5AD@4NTV8zJJ!Q`SX=bG5qHAYESET$rtiNb&vjW@-_Y; zIM@Dl|MW5N@@e@+|5*5{5UuzJ;cWQ_5zY9QIXJQ8A3`+aAA&RHA425wZ}_<{ zfcfWgX8q9hFHWa&p*khahJT&^7uCp zd|G~l{v~_{{S$mzetiB7&wRdV|0JK5AD@3IpFRI1pOzn=f1?k77WuaWZ=Lx5S-pBBC;t%azhnk(zWHY*__X{8{Y&@^`X~6b{0RL!{At_%2|g`9LjMvzgZ>FVEk8p4 ze&qN!gYj?T&;R-Jl}s^Z+1|X=)B3#~uluL3A7_3wpO#L zt(N_hd|G~d{-u2O{F8iIetiCowmitRf09qjkI%o9&z^shPs@+czZ0I0{M&_>sJ?&J zule)MOfmfC^lDG*+mYZ4`JuW;|2X*?{}7yO|GIzr7jp-^u(#=eK73LvY6Y zLx^VlLx-C#KTG2uLNwzaf-~kHLNw!F=HSwje+bcxe+bT)e+ZG#zo*@R)_Cdf-kHeP~E%!p)wQ6e1VzpHS51j#q0j*tLN88nPSr*&WumXuj?NH-)FYG z4V?dFDo*I1;M4LW^e^Ev=%3)z^5gSwc;!<~`zQId{P_G!`Rw^8`Lz7_{2RUO@Y_v~ ze}YfTkI%m}KYRX3J}o~!|EhalWZFN;r{%}zU&?3CKgp-%$LHVZ{Eq{9xUqf1G^5KM3dAzwVzt23|fbzvv$e-|!*Fy#vfY(cz{G zF8UYt&*JA_T;>N>*Y+*!U0qvERfErZ@=x-G`zJz-{&DgJ|EB%-H+Qu@Kf#b+&8OuT z{o~Bf^*{gl7j(F3%s+%^#yCw~rPx5K&_l|#bd|~~U zt9WPrnJYFayZZH+yK}E=9+2$B%dw+Qa*eBNj@zvr{%}z-{8#aO#3JK zwEXz|OZn{iC;7De2>rYGxFcZvTg0o>IR5$17i5azH>X#7TEAEDaocK>``=xv`z5b# z*_1q0o_z4g<(tvV%j1i~P2ukvi1rQr2en=FpQU|MzQ%t9=kl-p@ZEryPs=a*&%!sl z#{uB~GCEv!!A1Yq&yU?Sxpn@rIFFydk$mI)AY$~7lP~xO;S&FN`Lz6^f1G@-|M}0~ zpu<&T{vkv&{vkMH{vkv&{-MKFWBwsTGyWksWBwsTEB;Nt|4KXmgosxBgK)O|gNS_o z4R#*{>%W!E`rY?W7v%W!H<@B}fk}^^_J1UwwtnyW$Ho`df4ThX{^^79>mx=!Ex)dR z^Uqh^{C?B^Nj@zveUOp|q=pPN=&;2p-Z{qKN z3hTet`NPs&e*Q)9h5Qg=^pBIT@ejcz{_*l@`9=RY`CR{7|NKjC|2mF;ynG=)v^Q)-tIRWJPEcY*!SOvU;9lYCl!eEy|;_WYB4T7HE7-Q#TA{s}%U zKSKW!K7;-VJ}o~&|K7f4+dsjlmNci;~#=E<{v`j^RK$`j%fVL%wM|WU#6I!;^<#{ntw{ZaQ{Sg@A`+z z=vd9GGvRB-zf8sB{L_yQnPLru^3BZnwEVjM%|GApU1yp0Px5K`@%fkX+4E2GY55WQ zck4TC`zQFc{0RL^_ze0d__X}^{2P4qjeiXGUorzX-~3bZY5DQ_m-5;3Px5K`5&HMO zWBvsAw}NN>(LcYxn<<9hoL=o|{V))GAwN|2=pQFv;~#=^?O*p#9|JF+mS6Obg|E8m z4CG&C|2p~?*6*$T&y{&Re~^43KZF?l{{9(EM`>)CT zL;Kfe{6lcY{6mOl{F{3H$2lDqif7|dB+4s--HGjU6DTd#iUhQdqJrsN)KUDYVA17bqAA)o3 zU-wTR123PJU-XZKZ*C-}7d`1~84 zeCnTC_D}L@`SJOe^4arG@@e_;`B&ZPmB_#CcnRS9XZ@N#U&$21Z%(iFw0@9$AwN|2 z=pQFv;~#=^?O*p#9|JF+mS6Obg>P`$H;{j5bIAo4{R{KY*7@J|c|8AE4}mgpKJssC zDoc14{R`t?YyWfSJbwOVDELBt2r>G{$=CRY;1d6M`Lz6^f1G@-|M|~(d|G~d{-u2O z{F8iIetiB7u7B_cH?sdpbN>{4T7G=~rTN+OPx5K`5&C!WgReI2pWxH-BlIufGw7e- z)AA$qZ`bpXf4lG!!1vGkHGjU6DTd#iUhQlBAoxOlsP54}PQJ!J1n1hn?w>veUOp|q z=pPH;@TYG<{$+lCI{FvppRM!1U2}QG{$=CRY;1d6M`Lz6^f1G@-{~guu zf7#jo{S##Vq5W$!{vkMH{vkv&{$)09EscK&(Tslx&X|7)kpy({r+oJOmwZ}&eEyHV{oHFzkH3OX%a6~$G(UU(Nj@z0zHxrA?$JL^zTh8(b4_3OPoDrUpO#U5Y709;Eef)5DESJ^dcDla`TtY_?Ih|ANArZeT{#DPn*AV{G;Ow z<6o}go$)VMET)wIZYKHFd|G}T{|NX-XFl7sf09qjkI%o9&z^shPs@+bzu|*!GVPz_ z)AHlIDk)5pNer{x#@W8oX!_Zh&y6|}kJ zf{XsG@9(-{vbF!YIFIKKl5dF1xc{y{`5 z{y{if{y{`O{|0Y-5g7lrW#%v4@h?-%PjU3Gea$}wpEiH#`p3o>#=l&CasKJY2Sz?E zzpj7t&o_G94W|8*d|G~d{-u2O{F8iIetiB7PyOi+Z)E?4<^C!8wEXz|OZn{iC;7De z2>l!0_y3yqPw;8^5&D<#8T3!^Y55WQ_u8i*|1!Tn$oJ2BVdwXEGsX0kpMSNd_1{qN zh5S(6qko)yjeiKvwR_z^ee?73Y57I}Soj7XxE}bolF2Xn7v`VyzW*ut#`!_S=pQFv z@DIWz{_*l@`9=RY`CR`ynxB7g+ibl4p(9Q+{vkMH{vkvw{=vf)m!F~W4#y4>gZ1CGDc66QVlL^4kDm5l27*spzjys(;|uG* zTz+-`^vU@35hI_LU)MhZzF%AT2snSpRJ=R>Wr|Id&lTv)Kf$NvN9bR|XV5>vr{zcJ z-~K1t_D}F>`4Re;@EP<^@M-xG`uFIykD4C;1fP~4p??XVLH`7wmLH#gqeCB({_WU` z7PoHF80Vk$`DV7*9L+x^pU)51J^IJVH}((1xm4=@>BHgW)AEb{vG5IU{P8~re+Vw|kC#u&FZ##H=lY-j`M-(dA1`0X4+dbv{GrN9B{mT}s1)cQh>HIlao)&FZGXsZ*baqru~zAT7G=~rF{1MlYCl!eEtoeewAteB%hWa zpMNQzJ^v)1mLH#gqnob!e`5bLJ8<*OKP8`*AD@3IpFRI1pOzn=f5T_q?PI_{w77LE zy6B%jU&$1kwDzlC?`!=h_(FcD?$JL^zQ#WU=Ni54pFSL3J}tlK9}8b~yK|*~+jF1a zjsAu8d+YpfX&%r2B%jX@BS!x?`NsZXxWqqRJ}tlKA19ydfA#s_QOrMC{lkbx{KIgj z{KJTR{tXWvkmKKut(o~tcl^r~^HUuCYhUwE!Kck%y8f~8O+Wwi;{zj~mS5LD0>0+!+Iz{{uQ7so#uzK30n z{6jz7;ew0)h4o+S{9$<>&;Le(FXV?1qko)yjeiI(@sF2J%P;!J$>;i?KmW^Y+*fZGamC>=9S7*Z4jDMMm z$N8rpA2P)n2<4lZ@oD*W{bS&}(+yz%GgEO!|GIoyevJOL`7HX^<asKJ| zcQeKCo71a3tsf*`$Pd*$`p3!F_=n(J``7)`$H2>{<;k%{!3o!nn%_SFH^e?R6 zTjvie^LYL+5PTs&gc$wf{8RFU`zNY<*FRK7$7)`k312h*Whx%$pMHGE6l)-q zZ)V1)<=6FZ{`m$M{E}(^B%hWapMNQzJ^v)1mLH#gqldrQw11LM%a6~$l+T`jl26Nz z(7(Tb>nFkfOJ?Ban}3c3pOzn?e+i#K{{)|wAD@4NuYC{sw++wyqkn#XH&YD1IlbD` z`a$xA{7~Jaf1G@ce+bUCf89TQ47_|=e$hV`zR_nNdlMM{(B_g0F8UYN@2&mMZS#2k zAo)Um2r>G{$=CRY;1d6M`Lz6^f1G@-|M~NWspp@({vkv&{vkMH{vkv@|AtpT4vl}A z`Ac{F%M|ld9Q|ug^H0ea?w_dcUH?!S9jkeDCVb8Km#KK1fBNwuQ>=kdzL^=HmS5LD z0=~63*!EBGY55WQm+%?%Pw;8^@%dLh_q(S3lYCl!eEy|;_WYB4T7G=~4gcx8p91?Y znSq;d{wesh{P_G!^RwrlsW$`vw&R(9^w00_W{TlAr&oJgKS;iiAF6xw zkCU(Q55c+iuluKuftOFqFZxHrxA)b^zp2N+*8b=Ac|3m@2)>XXLX7@}{Ru9<#y9;Tt{m3glnr{u#$V&zC8dj&%50Pvf8D3;ChCcl|?UFpju76TZg3OvU5) z$C+Qvr{&l6FVhSnEO4Qx{HkC2rfL5qpOzn=e<`0m|0JK5AD@4thd=r+H?sbtxqk{i zEk8d0(){fCC;7De`24G0@aRul_D}L@`SJOe^4arG@@e_;`8WLF%aDINz>L^k^8K@Z z&F?Q|is3h>S9_XYOTLgFs(bX0ldtg)!MXOY`=^hAmru(t`p3dI_}#A}|1$e$(Z4YN zZ0!&3n9Kc7!58vFh|xbzzQ#WUm-xrar{x#@ejtwz9l3ln z`iqyt#Z6l_jlUJ`tMwl~|I_|s&wt6Ma;6x5b9%L}^^4#O`JuW;|2X*?{}7yO|GIzr7e1kju zm(POn5B+ih7hLo&>>sqwKX%UL`G?>O`60yUA17bqAA(E#P2B=VtsvaK`*Yh-UnodjFNzKZIz;KLlsYKZHo=-;X~G%|A2i2j4$kkPCi%&lIZ* zOnUUR{u>CsFn>XH@A`+zOadFfJ`=uX{+X$G-9LT(IPCdPd|G~d{-u2O{F8iIetiB_pZ&XA!2V}u;O3iu zN?Q{19UFkCU(Q55Xn= z@$zZ;MgKVYT>taue`s^pn12Y-jDHBun12Y-jDJ(F|9HngglNV;1ZT`YgvjUL=z+hB z=AW7MgYTa%$np7KrdVBI(xa#KpX3Yk7gYDIf2hnPu<`3N;cMofnTpr_)7Q_>&oafP zL7W+%mS5LD0=~VUv+bYY)AA$qFX1!jpWxH-d|)*SXl!_$T?a`Af$?I=(Re|^$#Li@ejh;@(&{N z`8PUk8yNo#=l&C-SLl+Ps^|C-~96p-u*1o z{z*P9KR*9bK70O2J}o~!|Ed??Xxcx?r{%}zU&?3CKgp-%$LHVZ;EjJL_g`}J7sC0c z;M4Ns^DoWMo_~^0%a6~$!JY2=Md05OUcN>D{QhpH7=CklwWsx;;i?KYz$<9L4dEmoMap5Y709;Eef)5DEQz@Nc5=FEfAX`j;u@r#Sl8p5~vG z;0yktx_A9UWpu3O)tT@$<6oxYasKJ=pP6C}g!0YI__X}G{>?w%;Kq-d_D}L@`SJOe z^4arG@@e@I`ghx(`qD=BUs&#+1Hq@|N9bR|XV5>vr{%}zU-ka`{=H@YB%hWapMNQz zJ^v)1mLH*i=R6ttw~S~0zJJ!Q`TgBYG5qHAYG3OI!58vFb&vjW@-_Y;IM@Dl|MW5N z@@e@+|5*42uekyEw>9(ccSQff{Im7>>*aYoe~^6R{2*fVkCQL>2jLR`c=@#aqJNxx zuKyj?`NLBC{Ey2&_z@o~{y{if{y{`D{$)094UK<`2+@px2+o*)2$9df>MNfF^UuZ1 z`l0JzrkG!1=wJI<{|P>A{m}J~jW5hUbNSW%)5qfHXN-JWeqI0OpKoySz5fC1e`YGq z=bz-$^5gR_<+JCX{ZE9*=ilJnKLPeX7c=|UzJJF2GR4*tp75ow^9R8f@BlPbrSK0PY@M-xG`j_w-^iS|< z`SJNTeER#pBG2D*^B2DPr{vS}M`Lz7_{7d=l`6v0b{P_H)T7J<#7QX7^@7|Dqe*T$>d5-*( zd|H0dKTf{s{p01+@{9hl@QogG>_3C~C;H(kXO-`twO*cYTl@Td`#he%OTLgFLhSm- z#@G0V;9P2P{Nv28=F{@)`p1@^>wiae{?5o3@x0T+$OCJ$-&c@`d>) zs(aTzRAwTXFEA6nX8o6`c-=pJ^;q^N()`-%KZMxz50#m({D!Z*$h3de59D#(k;^xu zzxb|KkFRce`1l*yzP9{N`Rw^G`Lz7_{I9M$^Xp*$HFM+Q8-FFAmLK2voATN7Px5K` z@%cA;-4jjwC;7De`20)x?D;48wEXz|8+`Ze$iE$UmLA7H|M`YYG5qHAYG3O!!58vF zb&vjW@-_Y;IM@Dl|MW5N@@e@+|7iGr`j~Hk`6t@ka=}Ia!u~<)_mA$F$MgSz;0yU7 z#ONOhY-#9hv1C)hY$(<+j@I6{$=Jb-SIC| z%ujLjuYJuw1z)&-qPlneLuGWV=GB?-HRE5V;&J}z$A?U@215B}W_((HUH|5vZ*#e5!lYCl!eEy|;_WYB4T7HE7{rOw|1?<0M25!Fj zXC?Tw{0RL^_ze0d__X}^{2TmU^-bX4PCWCE{`vjgOfmfC^lD%02f-KeLv@e-aq>0( zAvo9mb^r7+@bYQ-MgLg%M$bNDL;m^odnV@8H2kb5|0JK5U-XZYZ+icD`Lz6^e=K~% zXFmz~mpMO*(Kbbe&UKLlsYKZIz;zp3|MTIcU%;~zpa;~#=E<{v`j^RGJQO=$j^S=jmhSuf~V z=GT%hjDHBR>mMpp(@*i}X?>gWHS^C*#q0j*>&}^9d;WG*)l%-Q`V;T!kQY{k>@AzN%3#J=vI zf=|mY?Vl8UuNb^8?Vn3q7q>5G_Lt)PtUiCp7MrL11YgJxAx8f=`5ONaoJ%G8$IGYX z7yYB*d-4mBf9RL5x%{Gk_58ZNzcl+3wTuT=*Y+*!U0qw9LcqT6pMo#shY+KGoP3Rc z2rluDmru(t`p3!V`d__2pB=a@`8VDClk5J{_=n((`G*ks{2RUaay0&B<}Y3UGR3AC z{`%T~k$mC)iR#`R|4^A};9QxW#`l!3@h?;HIREtHL#Eg?h%@8U^6UCXz<2JKZ2Kqp zwEPJDOZW`>C-}7d`1~85`S90oWdDWb{wevi{P_G!`Rw^8`Lz59{kwk0ADH$}@M-xG z`j_w-^iS|<`SJNT`q03%f09qjkI%o9&z^shPs@+bzrigpK>nc*f4Wt<@1OPZ+@G&x zip|mbUGjzeP~D?{oP3Rc2+pNa_fH=VFQ1lQ^pAzFTD}GNx0Lx{S@bWg|62R=bNTP{ zNxpG@5Hb44$rt>CaEX7sd|H0dKTbZ^|NQx1=KR)>f6EBbihmH!mVXe@jDMMf6HERf zL^J*&IAi`HL_Yt9pZfxse=cX%4_*H<#rzUO|Ju{}t>n|z4_*J*_`>`%mtWmKeJp-{ z#>l7T*Y$7y`9@#b{swUVkf}JIf09qjkI%o9&z^shPs@+bzro6-ru~zAT7G=~rF{1M zlYCl!g#I0O$r~+?e*?j%lg`NI7Z)xGNna=4EiVdwEPJDJN#+e z{s}%UKSKW!K7;-VJ}o~&|9<57H-qtS;?Mv2^Oa07W!c`m)YJOC5_};)RQKo~Ctu?q zf^$t@_fMYyFQ1lQ^pAyaaL;ET|1#%Ca{k#mf0)aEA4u|r{19UFFYFI;@iqP-xWqqR zJ}tlKA19yde@Aux*ZTMIwl9-v{zhG`<~E5En>g>C-?pOzn?e+i#K{{)|wAD@4tO+WLNjqHE2+&?9s zmLH#gDW5(6B%hWapMQhvw!GD{f09qjkI%o9&z^shPs@+bztNTlnf6ceY5DQ_m-5;3 zPx5K`5&Cz+(~*C>@DkPc&-yigzL_b8-<)3UX?;5qd?7zn_vjxdU*jKwbM0UEPagv> zpO#Ex+g=C!gzo zM|J+bv;FT+llh0vZ_W6J;Eef)5Y7094mVwXmc~DXXvRMTXUso@XvV+H!KEet5TY6X z5S%gp5F(*}PrCuF|1#_Mu78o9b@Nnc?Q|P^LyTpbIx<#^PV$v#whjZ^^gC#Z^utouK3q~`13lo=${jZRz0_T zMM|mPP&F#?sMaSft(+F@Ztt%Dz0NJ0rUmC99qm2s^*^frky52CU;E2<+aHlStJki7 z7E!qEw;B_Ti5DAxkV?MxYIoNwFH^awWI!??8ITM}1|$QL0m*=5Kr$d1kPJu$BmKVJ{25^INL%k8+3P#{&unXJ7twDSyb4|iZ>pzl|F=>9E;tEJp41JfKv%#^q@v>UDry5bvp3V(aZdd^h4F;8;zaEia+p1K?rsD0l)q z4W0upf@Sb3)^i=a3El?pf|KABs1{)V!R6p8uzrEt--ZQldpE!v!8F(kZUiH+1KbR5 z0lOBsd~XAHfV;tw1unm1Ugz8PJol(L$K}{93QYA+yJ(M9bgyO3+{f#?f(ck2A04B;9>A6cmh2A zjLYXa@FG|SuY%W6|0Z}FybDf(Q=qDI+f@fH2Umgh;JP|@9ZADC)-ATzjm?PnfIGl^ z-Ev#Lx9*oL5260iI(I92y3XB-UPSx~;_o0nQRn7|53s%u!H0D&54DRNmx3$7HDCj{ z0ZfA%!47cCBDeoNVDBO~zh=SwBDWny_+D^7;)lRD!Q&`D1wXsUUANA`FT&qij*t*#5 zZv@|h_B~+lVwd+Um|yJ1!5HfA2M;5D9Q972{aN&X0elO*0=^Bt1HK1NfFFP#f)Byk zC2qTyf-9G}{ayn$fE&OxxDo6Cw}3rhFPH`MU=iF4?gtNnZ-U3cQ{dSpF5efio#iF& zI9vg*E^)`{I(QSjjrMopli(Ekt6l2aEnVtvp;y7{m*V(>tzZY(1@@wT7R)0)w$z=c z2M|BJ)E)n$;EAPfUN{TCfO>CXJFbH7ApRaW0e*n>Of7ZesCJoK-^yiff7UE>>uUfv zEOW;-4Q@od1HJ|A?4)CV{SV$-Z1eAZb+jHYU(0;Az5A8tRaZyH%g=XqsLZaM`Po%$ zZBy;t-J80*+rb2w0@Gj<*bKI)ALeqee{a+C-|rtD+G)P9ZhBtxD zU<;T?WNiIJq7Uo`vzmoL9kw+68+ZK{23@~}!KRFhH=&;Ew=md(cnjjKXy2x}>G>B@ zx&B>+;#g4`?k*_H5pYjo)VgM7YyiteS7%SL-JF{z4lQ+umbybr-JzxK&{B73sXMgP9U3OlpIIL5 zA1(|Qb|&;sQvam%Pg?&p>7Qo()1rS`^-r7rF`Jw#fi4-ChC zFzeWi@@AAbqr4gAdH~sWdH^}1yana0C~rl1E6Q6@-iGovS8k4j=_jf8!xGz>)W^@d zK5kvFY}EcO0c>ix3R+i$72(^7AbWuh_Tm;jSt z3QU7dU^CbPwt{Vr$;OP?X4_}7u`jmVA6w4GmhHLX>L+4t6S20*SUeez$NEgh`b@_9 zOvT!!Vr|p0csdquip85^@#a{(IUbK~Lvw5!ke?*-lSF<}$af0)P9fhZhWxIr^l_O9-o$a99rt}W|?U0b4-9qFa@T;Ca@W70b9W~M}3?tV^SaI z*m8etIU8HHbD`aaWFpo!5o?=_#gp-Ptj}bu&t$C6RIF_();1lBr(^M^SiC6~Z;r*A zZ_f8!BeCy`Qa(5^^~Zez=_y0$D<*UvM>f?KaHvg=P{Zc8UzKj|cx z0@EPQu{6%HG|sU!&apJkv2>!*^_xiGQl~8SJ{g)ZZm2J5j)?22*t)san{sol&y4Ep zuCBMUFW3LNc5|;}?vKoTXzpk9{fE7e(AQdbUDNTM_JU(r$n7$2H^wt<`yL%A<_ekq zT%mJN-_B)Kp{v9EV(RB*B{`?!+oLPP; zr(0@n%IgExmp8Fpxgot-N(~J4k7jhh@Qu+CL%qU5!-DRuXc{7xDe87Mo*6a6$Sh}e z>e@-25WTE>wCxKyormne=!nUmp}vG!n!Fh|`5G9?jhWmSntFz&o}sB{X!2|5>zjKi z;MxsG))z0qDp4VP_xu^5RP4S=q(l6buI#6SNKYIPQem%1A(r@DV4?WhbG~j+K z1%9F4FHEO@zT`Q*R7^7PRms5D)z|f=X5EJ(>i8;bH_o>P#3S0B3HPCi;`pgRJR68N z1mY9!!?&6J2jba4yde;usC7FuWBUW~Y#`nch)-a`nbChBo(;qs0`Z9jf$a~(vw^tl zWq!Z^_aiq8&b1e5FZnYWknuCO49NNO#hyP8-ReI+;IExHjONl%s}|sULCStV$GCfb zqLg_aQLF#+ofW9>eKq-7^7Z8AeVcjt_U3(oLuGvov0d&tfE0Q2(e z-{-XA2g#3+A0t0Weun%!`6cqp-lPAdA$lJ*~$+wbkC(n=%knbTMC*MbY zko*YwG4hk-XUNZ!Un0LuevSMF`7QE0-?7UroN2d_8%Byp6n_ zypwz@`F8RQ`2hJI@^SKgJozQ^%jDO{Z;;<2ze9eH{66^ua(uD2 za{a>>Ybw41?;-v8SWUi`d_8%Byp6n_ypwz@`F8RQ`2hJI@^SKg=u$`^XQHA0ak>4P{MSh3;9{GLp z2ju<>>Hhr27w;aO|M(2dm#>bw84qj8*OMp6+sNC=JIS|_Zzs=?50LL6A1B{OevteK z`7!d7Hi^-MajV~%!d?l5yA#WhxK%OSw zNZvudg}jHnmpn_JCohukCErhei2P0R!0YFPks%=vQQaKl)V)#G^pmei`T!`YEq;eMXPEwo9czJPO2BAYOhp z(0?Ev1>!0YFaL6&|3Ewn#8n_(UJ>X&5RU?J|1dam-Bo^$A=e=fY< zQXn1$;wlg?yFc2UIX(mNC=geHco~0-H)Hz)@hA{ifp{6$*ctr?;!z;30`W5L5oYus zh)03A3dGB}2c6M>ARYzcDiAN@Jt%T zHWBjo|86|U-~YQj$lw2CJXC?af0Orb^8W3~eg7usznuSa{>%AaKi_@zjrPc{S?q6-@7E^1IGK-(IO`T3n^zE~*r@o3;D) zpkLo?`KWo+{=Ju8e&xmXEjCnsC35U@RNKM2vNEp|zO6XeUYtM`hjDFJn!gtuC_Urw>p;k;IBGH zh(|u&<>Ctc`24*;;o_zLuq;hFs(=4C)W`9>or71~&o8QcIkqoE8V7#d`#h*>FTY=0 z&&QcukKNY~#Y5${F>i1?@4lU<-)~PJpQ3(zoco!t`+k|Vk?pyR@$ISBxOnCG@OGmg zUoQNy&Eqd>9_-GUUzpE%KYd(bJo|F*>YI`6q7sLzG{xbnjpF3KpHG`n#;v#X`;LB| zj&{I%h@VFu!0r7d z0re{FeZ5(7MmxX0&x(G#=Gy<{cf0bL{#@U%>%eyx;O4)HY(3Arf9+gtEZeweG(O#3 zq5h3}K20I2Rn-35aL;vAPV4vUsc-}8gMPi0a%#`z-jD8&lzE-_BS-JP-19K%={wy_ z-)Hq^!ETB2tNg0d?TwoA{`fZwU-mrAcR#=E`}d@8U*$^Y4|IrriVcHC?l&*&%ek?9Ps$JO_55s8`DKk^|Fl`+Hw)cvJ4*a!!M~aC=M~fM_v~4Dv%pP!*FNrD-Yocxe75xY zp6goX_GZD4Y<{!g<4nJG9G|cIemy^}{(L{&uYYs!SZ@}5Cql`9WI!??8ITM}2EH^I zsHw4=@)>=khYz9s4aHZT{;17$Kjb65U+m|0a~&6-ce`1Cr01VQ`+*;#e;?*qtsa;A zcAi`w^3eAJd_SI{_FNuvA7^rYxZH1_kE4DG-&d*nkzS?$$nL@CJx*kOU(9?}8LoKP z%*Ubf{2$}Xmn%QsFm5VwKaS|5I{#6gYMMU2y!-9&{e0Ry(dX8ya6Tyiyx)J+hj_`y z{dvUn8#;Sd{YZ~jAs^|@KDqO$>GM9HTixb{w4V8K Date: Tue, 18 Sep 2018 13:18:08 +0100 Subject: [PATCH 62/86] DOC: improve doc string for .aggregate and .transform (#22641) --- ci/doctests.sh | 4 +- pandas/core/frame.py | 7 ++- pandas/core/generic.py | 102 +++++++++++++++++++++++++---------------- pandas/core/series.py | 12 +++-- 4 files changed, 76 insertions(+), 49 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index 2af5dbd26aeb1..654bd57107904 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata -transform" + -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1 @@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transform -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transpose -values -xs" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5261d0ea94c7e..8daef91849773 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -109,10 +109,9 @@ _shared_doc_kwargs = dict( axes='index, columns', klass='DataFrame', axes_single_arg="{0 or 'index', 1 or 'columns'}", - axis=""" - axis : {0 or 'index', 1 or 'columns'}, default 0 - - 0 or 'index': apply function to each column. - - 1 or 'columns': apply function to each row.""", + axis="""axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row.""", optional_by=""" by : str or list of str Name or list of names to sort by. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdc5b4310bce2..96a956764ce06 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4545,17 +4545,16 @@ def pipe(self, func, *args, **kwargs): Parameters ---------- - func : function, string, dictionary, or list of string/functions + func : function, str, list or dict Function to use for aggregating the data. If a function, must either - work when passed a %(klass)s or when passed to %(klass)s.apply. For - a DataFrame, can pass a dict, if the keys are DataFrame column names. + work when passed a %(klass)s or when passed to %(klass)s.apply. Accepted combinations are: - - string function name. - - function. - - list of functions. - - dict of column names -> functions (or list of functions). + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. %(axis)s *args Positional arguments to pass to `func`. @@ -4564,7 +4563,11 @@ def pipe(self, func, *args, **kwargs): Returns ------- - aggregated : %(klass)s + DataFrame, Series or scalar + if DataFrame.agg is called with a single function, returns a Series + if DataFrame.agg is called with several functions, returns a DataFrame + if Series.agg is called with single function, returns a scalar + if Series.agg is called with several functions, returns a Series Notes ----- @@ -4574,50 +4577,71 @@ def pipe(self, func, *args, **kwargs): """) _shared_docs['transform'] = (""" - Call function producing a like-indexed %(klass)s - and return a %(klass)s with the transformed values + Call ``func`` on self producing a %(klass)s with transformed values + and that has the same axis length as self. .. versionadded:: 0.20.0 Parameters ---------- - func : callable, string, dictionary, or list of string/callables - To apply to column + func : function, str, list or dict + Function to use for transforming the data. If a function, must either + work when passed a %(klass)s or when passed to %(klass)s.apply. - Accepted Combinations are: + Accepted combinations are: - - string function name - function - - list of functions - - dict of column names -> functions (or list of functions) + - string function name + - list of functions and/or function names, e.g. ``[np.exp. 'sqrt']`` + - dict of axis labels -> functions, function names or list of such. + %(axis)s + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. Returns ------- - transformed : %(klass)s + %(klass)s + A %(klass)s that must have the same length as self. - Examples + Raises + ------ + ValueError : If the returned %(klass)s has a different length than self. + + See Also -------- - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - ... index=pd.date_range('1/1/2000', periods=10)) - df.iloc[3:7] = np.nan - - >>> df.transform(lambda x: (x - x.mean()) / x.std()) - A B C - 2000-01-01 0.579457 1.236184 0.123424 - 2000-01-02 0.370357 -0.605875 -1.231325 - 2000-01-03 1.455756 -0.277446 0.288967 - 2000-01-04 NaN NaN NaN - 2000-01-05 NaN NaN NaN - 2000-01-06 NaN NaN NaN - 2000-01-07 NaN NaN NaN - 2000-01-08 -0.498658 1.274522 1.642524 - 2000-01-09 -0.540524 -1.012676 -0.828968 - 2000-01-10 -1.366388 -0.614710 0.005378 - - See also + %(klass)s.agg : Only perform aggregating type operations. + %(klass)s.apply : Invoke function on a %(klass)s. + + Examples -------- - pandas.%(klass)s.aggregate - pandas.%(klass)s.apply + >>> df = pd.DataFrame({'A': range(3), 'B': range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting %(klass)s must have the same length as the + input %(klass)s, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 """) # ---------------------------------------------------------------------- @@ -9401,7 +9425,7 @@ def ewm(self, com=None, span=None, halflife=None, alpha=None, cls.ewm = ewm - @Appender(_shared_docs['transform'] % _shared_doc_kwargs) + @Appender(_shared_docs['transform'] % dict(axis="", **_shared_doc_kwargs)) def transform(self, func, *args, **kwargs): result = self.agg(func, *args, **kwargs) if is_scalar(result) or len(result) != len(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index ba34a3e95e5d3..0268b8e9c3149 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -89,10 +89,8 @@ _shared_doc_kwargs = dict( axes='index', klass='Series', axes_single_arg="{0 or 'index'}", - axis=""" - axis : {0 or 'index'} - Parameter needed for compatibility with DataFrame. - """, + axis="""axis : {0 or 'index'} + Parameter needed for compatibility with DataFrame.""", inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", unique='np.ndarray', duplicated='Series', @@ -3097,6 +3095,12 @@ def aggregate(self, func, axis=0, *args, **kwargs): agg = aggregate + @Appender(generic._shared_docs['transform'] % _shared_doc_kwargs) + def transform(self, func, axis=0, *args, **kwargs): + # Validate the axis parameter + self._get_axis_number(axis) + return super(Series, self).transform(func, *args, **kwargs) + def apply(self, func, convert_dtype=True, args=(), **kwds): """ Invoke function on values of Series. Can be ufunc (a NumPy function From 48de0db2abc38185ace96b163b12507cda195779 Mon Sep 17 00:00:00 2001 From: Hannah Ferchland <32065449+HannahFerch@users.noreply.github.com> Date: Tue, 18 Sep 2018 14:28:59 +0200 Subject: [PATCH 63/86] BUG: DataFrame.apply not adding a frequency if freq=None (#22150) (#22561) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/indexes/datetimes.py | 2 -- pandas/tests/frame/test_apply.py | 23 +++++++++++++++++++++++ 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index cccbe47073fbd..8ae7f06352510 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -636,7 +636,7 @@ Datetimelike - Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`, :issue:`22163`) - Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise ``OverflowError`` (:issue:`22492`, :issue:`22508`) - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) -- +- Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 46741ab15aa31..9b00f21668bf5 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -860,8 +860,6 @@ def union_many(self, others): if isinstance(this, DatetimeIndex): this._tz = timezones.tz_standardize(tz) - if this.freq is None: - this.freq = to_offset(this.inferred_freq) return this def join(self, other, how='left', level=None, return_indexers=False, diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 8beab3fb816df..1452e1ab8d98d 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -11,6 +11,8 @@ import warnings import numpy as np +from hypothesis import given +from hypothesis.strategies import composite, dates, integers, sampled_from from pandas import (notna, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) @@ -1155,3 +1157,24 @@ def test_agg_cython_table_raises(self, df, func, expected, axis): # GH21224 with pytest.raises(expected): df.agg(func, axis=axis) + + @composite + def indices(draw, max_length=5): + date = draw( + dates( + min_value=Timestamp.min.ceil("D").to_pydatetime().date(), + max_value=Timestamp.max.floor("D").to_pydatetime().date(), + ).map(Timestamp) + ) + periods = draw(integers(0, max_length)) + freq = draw(sampled_from(list("BDHTS"))) + dr = date_range(date, periods=periods, freq=freq) + return pd.DatetimeIndex(list(dr)) + + @given(index=indices(5), num_columns=integers(0, 5)) + def test_frequency_is_original(self, index, num_columns): + # GH22150 + original = index.copy() + df = DataFrame(True, index=index, columns=range(num_columns)) + df.apply(lambda x: x) + assert index.freq == original.freq From 3c6ad7d43b6d92127cf8a78d03c03cf04719a8b0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 18 Sep 2018 05:29:48 -0700 Subject: [PATCH 64/86] [ENH] pull in warning for dialect change from pandas-gbq. (#22557) --- doc/source/whatsnew/v0.24.0.txt | 6 +++--- pandas/io/gbq.py | 19 ++++++++++++++++++- pandas/tests/io/test_gbq.py | 18 +++++++++++++++++- 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 8ae7f06352510..1dd8bd401face 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -170,9 +170,9 @@ Other Enhancements - :meth:`Series.droplevel` and :meth:`DataFrame.droplevel` are now implemented (:issue:`20342`) - Added support for reading from Google Cloud Storage via the ``gcsfs`` library (:issue:`19454`) - :func:`to_gbq` and :func:`read_gbq` signature and documentation updated to - reflect changes from the `Pandas-GBQ library version 0.5.0 - `__. - (:issue:`21627`) + reflect changes from the `Pandas-GBQ library version 0.6.0 + `__. + (:issue:`21627`, :issue:`22557`) - New method :meth:`HDFStore.walk` will recursively walk the group hierarchy of an HDF5 file (:issue:`10932`) - :func:`read_html` copies cell data across ``colspan`` and ``rowspan``, and it treats all-``th`` table rows as headers if ``header`` kwarg is not given and there is no ``thead`` (:issue:`17054`) - :meth:`Series.nlargest`, :meth:`Series.nsmallest`, :meth:`DataFrame.nlargest`, and :meth:`DataFrame.nsmallest` now accept the value ``"all"`` for the ``keep`` argument. This keeps all ties for the nth largest/smallest value (:issue:`16818`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index 87a0e4d5d1747..46e1b13631f07 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -1,5 +1,7 @@ """ Google BigQuery support """ +import warnings + def _try_import(): # since pandas is a dependency of pandas-gbq @@ -23,7 +25,7 @@ def _try_import(): def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, private_key=None, auth_local_webserver=False, - dialect='legacy', location=None, configuration=None, + dialect=None, location=None, configuration=None, verbose=None): """ Load data from Google BigQuery. @@ -65,6 +67,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, *New in version 0.2.0 of pandas-gbq*. dialect : str, default 'legacy' + Note: The default value is changing to 'standard' in a future verion. + SQL syntax dialect to use. Value can be one of: ``'legacy'`` @@ -76,6 +80,8 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, compliant with the SQL 2011 standard. For more information see `BigQuery Standard SQL Reference `__. + + .. versionchanged:: 0.24.0 location : str, optional Location where the query job should run. See the `BigQuery locations documentation @@ -108,6 +114,17 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None, pandas.DataFrame.to_gbq : Write a DataFrame to Google BigQuery. """ pandas_gbq = _try_import() + + if dialect is None: + dialect = "legacy" + warnings.warn( + 'The default value for dialect is changing to "standard" in a ' + 'future version of pandas-gbq. Pass in dialect="legacy" to ' + "disable this warning.", + FutureWarning, + stacklevel=2, + ) + return pandas_gbq.read_gbq( query, project_id=project_id, index_col=index_col, col_order=col_order, reauth=reauth, verbose=verbose, diff --git a/pandas/tests/io/test_gbq.py b/pandas/tests/io/test_gbq.py index dc6c319bb3366..68413d610e615 100644 --- a/pandas/tests/io/test_gbq.py +++ b/pandas/tests/io/test_gbq.py @@ -4,11 +4,17 @@ import platform import os +try: + from unittest import mock +except ImportError: + mock = pytest.importorskip("mock") + import numpy as np import pandas as pd from pandas import compat, DataFrame - from pandas.compat import range +import pandas.util.testing as tm + pandas_gbq = pytest.importorskip('pandas_gbq') @@ -93,6 +99,16 @@ def make_mixed_dataframe_v2(test_size): index=range(test_size)) +def test_read_gbq_without_dialect_warns_future_change(monkeypatch): + # Default dialect is changing to standard SQL. See: + # https://github.com/pydata/pandas-gbq/issues/195 + mock_read_gbq = mock.Mock() + mock_read_gbq.return_value = DataFrame([[1.0]]) + monkeypatch.setattr(pandas_gbq, 'read_gbq', mock_read_gbq) + with tm.assert_produces_warning(FutureWarning): + pd.read_gbq("SELECT 1") + + @pytest.mark.single class TestToGBQIntegrationWithServiceAccountKeyPath(object): From 43106715232204be7f219ccf8613cfab0f772e74 Mon Sep 17 00:00:00 2001 From: Jesper Dramsch Date: Tue, 18 Sep 2018 14:40:45 +0200 Subject: [PATCH 65/86] DOC: Updating str_repeat docstring (#22571) --- pandas/core/strings.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 08709d15c48bf..b46c6a4557ff3 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -678,20 +678,42 @@ def str_replace(arr, pat, repl, n=-1, case=None, flags=0, regex=True): def str_repeat(arr, repeats): """ - Duplicate each string in the Series/Index by indicated number - of times. + Duplicate each string in the Series or Index. Parameters ---------- - repeats : int or array - Same value for all (int) or different value per (array) + repeats : int or sequence of int + Same value for all (int) or different value per (sequence). Returns ------- - repeated : Series/Index of objects + Series or Index of object + Series or Index of repeated string objects specified by + input parameter repeats. + + Examples + -------- + >>> s = pd.Series(['a', 'b', 'c']) + >>> s + 0 a + 1 b + 2 c + + Single int repeats string in Series + + >>> s.str.repeat(repeats=2) + 0 aa + 1 bb + 2 cc + + Sequence of int repeats corresponding string in Series + + >>> s.str.repeat(repeats=[1, 2, 3]) + 0 a + 1 bb + 2 ccc """ if is_scalar(repeats): - def rep(x): try: return compat.binary_type.__mul__(x, repeats) From 49f7fc73c9ea4286812e0b33260fe3f26a7486bc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Sep 2018 05:43:14 -0700 Subject: [PATCH 66/86] use fused types for reshape (#22454) --- pandas/_libs/reshape.pyx | 96 +++++++++++++++++++++++++++--- pandas/_libs/reshape_helper.pxi.in | 81 ------------------------- setup.py | 3 +- 3 files changed, 89 insertions(+), 91 deletions(-) delete mode 100644 pandas/_libs/reshape_helper.pxi.in diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 8d7e314517ed8..9f4e67ca4e256 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -1,15 +1,95 @@ # -*- coding: utf-8 -*- -cimport cython -from cython cimport Py_ssize_t +import cython +from cython import Py_ssize_t -import numpy as np -from numpy cimport (ndarray, - int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, +from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float32_t, float64_t) -cdef double NaN = np.NaN -cdef double nan = NaN +ctypedef fused reshape_t: + uint8_t + uint16_t + uint32_t + uint64_t + int8_t + int16_t + int32_t + int64_t + float32_t + float64_t + object -include "reshape_helper.pxi" + +@cython.wraparound(False) +@cython.boundscheck(False) +def unstack(reshape_t[:, :] values, uint8_t[:] mask, + Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, + reshape_t[:, :] new_values, uint8_t[:, :] new_mask): + """ + transform long sorted_values to wide new_values + + Parameters + ---------- + values : typed ndarray + mask : boolean ndarray + stride : int + length : int + width : int + new_values : typed ndarray + result array + new_mask : boolean ndarray + result mask + """ + cdef: + Py_ssize_t i, j, w, nulls, s, offset + + if reshape_t is not object: + # evaluated at compile-time + with nogil: + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + else: + # object-dtype, identical to above but we cannot use nogil + for i in range(stride): + + nulls = 0 + for j in range(length): + + for w in range(width): + + offset = j * width + w + + if mask[offset]: + s = i * width + w + new_values[j, s] = values[offset - nulls, i] + new_mask[j, s] = 1 + else: + nulls += 1 + + +unstack_uint8 = unstack["uint8_t"] +unstack_uint16 = unstack["uint16_t"] +unstack_uint32 = unstack["uint32_t"] +unstack_uint64 = unstack["uint64_t"] +unstack_int8 = unstack["int8_t"] +unstack_int16 = unstack["int16_t"] +unstack_int32 = unstack["int32_t"] +unstack_int64 = unstack["int64_t"] +unstack_float32 = unstack["float32_t"] +unstack_float64 = unstack["float64_t"] +unstack_object = unstack["object"] diff --git a/pandas/_libs/reshape_helper.pxi.in b/pandas/_libs/reshape_helper.pxi.in deleted file mode 100644 index bb9a5977f8b45..0000000000000 --- a/pandas/_libs/reshape_helper.pxi.in +++ /dev/null @@ -1,81 +0,0 @@ -""" -Template for each `dtype` helper function for take - -WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in -""" - -# ---------------------------------------------------------------------- -# reshape -# ---------------------------------------------------------------------- - -{{py: - -# name, c_type -dtypes = [('uint8', 'uint8_t'), - ('uint16', 'uint16_t'), - ('uint32', 'uint32_t'), - ('uint64', 'uint64_t'), - ('int8', 'int8_t'), - ('int16', 'int16_t'), - ('int32', 'int32_t'), - ('int64', 'int64_t'), - ('float32', 'float32_t'), - ('float64', 'float64_t'), - ('object', 'object')] -}} - -{{for dtype, c_type in dtypes}} - - -@cython.wraparound(False) -@cython.boundscheck(False) -def unstack_{{dtype}}(ndarray[{{c_type}}, ndim=2] values, - ndarray[uint8_t, ndim=1] mask, - Py_ssize_t stride, - Py_ssize_t length, - Py_ssize_t width, - ndarray[{{c_type}}, ndim=2] new_values, - ndarray[uint8_t, ndim=2] new_mask): - """ - transform long sorted_values to wide new_values - - Parameters - ---------- - values : typed ndarray - mask : boolean ndarray - stride : int - length : int - width : int - new_values : typed ndarray - result array - new_mask : boolean ndarray - result mask - - """ - - cdef: - Py_ssize_t i, j, w, nulls, s, offset - - {{if dtype == 'object'}} - if True: - {{else}} - with nogil: - {{endif}} - - for i in range(stride): - - nulls = 0 - for j in range(length): - - for w in range(width): - - offset = j * width + w - - if mask[offset]: - s = i * width + w - new_values[j, s] = values[offset - nulls, i] - new_mask[j, s] = 1 - else: - nulls += 1 - -{{endfor}} diff --git a/setup.py b/setup.py index 19438d950e8a7..2aca048dcd4fb 100755 --- a/setup.py +++ b/setup.py @@ -77,7 +77,6 @@ def is_platform_windows(): '_libs/algos_rank_helper.pxi.in'], 'groupby': ['_libs/groupby_helper.pxi.in'], 'join': ['_libs/join_helper.pxi.in', '_libs/join_func_helper.pxi.in'], - 'reshape': ['_libs/reshape_helper.pxi.in'], 'hashtable': ['_libs/hashtable_class_helper.pxi.in', '_libs/hashtable_func_helper.pxi.in'], 'index': ['_libs/index_class_helper.pxi.in'], @@ -558,7 +557,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): 'include': []}, '_libs.reshape': { 'pyxfile': '_libs/reshape', - 'depends': _pxi_dep['reshape']}, + 'depends': []}, '_libs.skiplist': { 'pyxfile': '_libs/skiplist', 'depends': ['pandas/_libs/src/skiplist.h']}, From c15d8c052b5365e4338bc8aa5928b5692be4c244 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 18 Sep 2018 05:44:59 -0700 Subject: [PATCH 67/86] use fused types for parts of algos_common_helper (#22452) --- pandas/_libs/algos.pyx | 517 ++++++++++++++++++++++++ pandas/_libs/algos_common_helper.pxi.in | 437 -------------------- 2 files changed, 517 insertions(+), 437 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 415e7026e09c8..d2914dc8ac751 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -353,6 +353,523 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1): return result +# ---------------------------------------------------------------------- + +ctypedef fused algos_t: + float64_t + float32_t + object + int32_t + int64_t + uint64_t + uint8_t + + +# TODO: unused; needed? +@cython.wraparound(False) +@cython.boundscheck(False) +cpdef map_indices(ndarray[algos_t] index): + """ + Produce a dict mapping the values of the input array to their respective + locations. + + Example: + array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} + + Better to do this with Cython because of the enormous speed boost. + """ + cdef: + Py_ssize_t i, length + dict result = {} + + length = len(index) + + for i in range(length): + result[index[i]] = i + + return result + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t, ndim=1] indexer + algos_t cur, next + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.int64) + indexer.fill(-1) + + if limit is None: + lim = nright + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: + return indexer + + i = j = 0 + + cur = old[0] + + while j <= nright - 1 and new[j] < cur: + j += 1 + + while True: + if j == nright: + break + + if i == nleft - 1: + while j < nright: + if new[j] == cur: + indexer[j] = i + elif new[j] > cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + break + + next = old[i + 1] + + while j < nright and cur <= new[j] < next: + if new[j] == cur: + indexer[j] = i + elif fill_count < lim: + indexer[j] = i + fill_count += 1 + j += 1 + + fill_count = 0 + i += 1 + cur = next + + return indexer + + +pad_float64 = pad["float64_t"] +pad_float32 = pad["float32_t"] +pad_object = pad["object"] +pad_int64 = pad["int64_t"] +pad_int32 = pad["int32_t"] +pad_uint64 = pad["uint64_t"] +pad_bool = pad["uint8_t"] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_inplace(ndarray[algos_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): + cdef: + Py_ssize_t i, N + algos_t val + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + if limit is None: + lim = N + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + val = values[0] + for i in range(N): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + else: + fill_count = 0 + val = values[i] + + +pad_inplace_float64 = pad_inplace["float64_t"] +pad_inplace_float32 = pad_inplace["float32_t"] +pad_inplace_object = pad_inplace["object"] +pad_inplace_int64 = pad_inplace["int64_t"] +pad_inplace_int32 = pad_inplace["int32_t"] +pad_inplace_uint64 = pad_inplace["uint64_t"] +pad_inplace_bool = pad_inplace["uint8_t"] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def pad_2d_inplace(ndarray[algos_t, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): + cdef: + Py_ssize_t i, j, N, K + algos_t val + int lim, fill_count = 0 + + K, N = ( values).shape + + # GH#2778 + if N == 0: + return + + if limit is None: + lim = N + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + for j in range(K): + fill_count = 0 + val = values[j, 0] + for i in range(N): + if mask[j, i]: + if fill_count >= lim: + continue + fill_count += 1 + values[j, i] = val + else: + fill_count = 0 + val = values[j, i] + + +pad_2d_inplace_float64 = pad_2d_inplace["float64_t"] +pad_2d_inplace_float32 = pad_2d_inplace["float32_t"] +pad_2d_inplace_object = pad_2d_inplace["object"] +pad_2d_inplace_int64 = pad_2d_inplace["int64_t"] +pad_2d_inplace_int32 = pad_2d_inplace["int32_t"] +pad_2d_inplace_uint64 = pad_2d_inplace["uint64_t"] +pad_2d_inplace_bool = pad_2d_inplace["uint8_t"] + + +""" +Backfilling logic for generating fill vector + +Diagram of what's going on + +Old New Fill vector Mask + . 0 1 + . 0 1 + . 0 1 +A A 0 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 + . 1 1 +B B 1 1 + . 2 1 + . 2 1 + . 2 1 +C C 2 1 + . 0 + . 0 +D +""" + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): + cdef: + Py_ssize_t i, j, nleft, nright + ndarray[int64_t, ndim=1] indexer + algos_t cur, prev + int lim, fill_count = 0 + + nleft = len(old) + nright = len(new) + indexer = np.empty(nright, dtype=np.int64) + indexer.fill(-1) + + if limit is None: + lim = nright + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: + return indexer + + i = nleft - 1 + j = nright - 1 + + cur = old[nleft - 1] + + while j >= 0 and new[j] > cur: + j -= 1 + + while True: + if j < 0: + break + + if i == 0: + while j >= 0: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + break + + prev = old[i - 1] + + while j >= 0 and prev < new[j] <= cur: + if new[j] == cur: + indexer[j] = i + elif new[j] < cur and fill_count < lim: + indexer[j] = i + fill_count += 1 + j -= 1 + + fill_count = 0 + i -= 1 + cur = prev + + return indexer + + +backfill_float64 = backfill["float64_t"] +backfill_float32 = backfill["float32_t"] +backfill_object = backfill["object"] +backfill_int64 = backfill["int64_t"] +backfill_int32 = backfill["int32_t"] +backfill_uint64 = backfill["uint64_t"] +backfill_bool = backfill["uint8_t"] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill_inplace(ndarray[algos_t] values, + ndarray[uint8_t, cast=True] mask, + limit=None): + cdef: + Py_ssize_t i, N + algos_t val + int lim, fill_count = 0 + + N = len(values) + + # GH#2778 + if N == 0: + return + + if limit is None: + lim = N + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + val = values[N - 1] + for i in range(N - 1, -1, -1): + if mask[i]: + if fill_count >= lim: + continue + fill_count += 1 + values[i] = val + else: + fill_count = 0 + val = values[i] + + +backfill_inplace_float64 = backfill_inplace["float64_t"] +backfill_inplace_float32 = backfill_inplace["float32_t"] +backfill_inplace_object = backfill_inplace["object"] +backfill_inplace_int64 = backfill_inplace["int64_t"] +backfill_inplace_int32 = backfill_inplace["int32_t"] +backfill_inplace_uint64 = backfill_inplace["uint64_t"] +backfill_inplace_bool = backfill_inplace["uint8_t"] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def backfill_2d_inplace(ndarray[algos_t, ndim=2] values, + ndarray[uint8_t, ndim=2] mask, + limit=None): + cdef: + Py_ssize_t i, j, N, K + algos_t val + int lim, fill_count = 0 + + K, N = ( values).shape + + # GH#2778 + if N == 0: + return + + if limit is None: + lim = N + else: + if not util.is_integer_object(limit): + raise ValueError('Limit must be an integer') + if limit < 1: + raise ValueError('Limit must be greater than 0') + lim = limit + + for j in range(K): + fill_count = 0 + val = values[j, N - 1] + for i in range(N - 1, -1, -1): + if mask[j, i]: + if fill_count >= lim: + continue + fill_count += 1 + values[j, i] = val + else: + fill_count = 0 + val = values[j, i] + + +backfill_2d_inplace_float64 = backfill_2d_inplace["float64_t"] +backfill_2d_inplace_float32 = backfill_2d_inplace["float32_t"] +backfill_2d_inplace_object = backfill_2d_inplace["object"] +backfill_2d_inplace_int64 = backfill_2d_inplace["int64_t"] +backfill_2d_inplace_int32 = backfill_2d_inplace["int32_t"] +backfill_2d_inplace_uint64 = backfill_2d_inplace["uint64_t"] +backfill_2d_inplace_bool = backfill_2d_inplace["uint8_t"] + + +@cython.wraparound(False) +@cython.boundscheck(False) +def arrmap(ndarray[algos_t] index, object func): + cdef: + Py_ssize_t length = index.shape[0] + Py_ssize_t i = 0 + ndarray[object] result = np.empty(length, dtype=np.object_) + + from pandas._libs.lib import maybe_convert_objects + + for i in range(length): + result[i] = func(index[i]) + + return maybe_convert_objects(result) + + +arrmap_float64 = arrmap["float64_t"] +arrmap_float32 = arrmap["float32_t"] +arrmap_object = arrmap["object"] +arrmap_int64 = arrmap["int64_t"] +arrmap_int32 = arrmap["int32_t"] +arrmap_uint64 = arrmap["uint64_t"] +arrmap_bool = arrmap["uint8_t"] + + +@cython.boundscheck(False) +@cython.wraparound(False) +def is_monotonic(ndarray[algos_t] arr, bint timelike): + """ + Returns + ------- + is_monotonic_inc, is_monotonic_dec, is_unique + """ + cdef: + Py_ssize_t i, n + algos_t prev, cur + bint is_monotonic_inc = 1 + bint is_monotonic_dec = 1 + bint is_unique = 1 + bint is_strict_monotonic = 1 + + n = len(arr) + + if n == 1: + if arr[0] != arr[0] or (timelike and arr[0] == iNaT): + # single value is NaN + return False, False, True + else: + return True, True, True + elif n < 2: + return True, True, True + + if timelike and arr[0] == iNaT: + return False, False, True + + if algos_t is not object: + with nogil: + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == iNaT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + else: + # object-dtype, identical to above except we cannot use `with nogil` + prev = arr[0] + for i in range(1, n): + cur = arr[i] + if timelike and cur == iNaT: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if cur < prev: + is_monotonic_inc = 0 + elif cur > prev: + is_monotonic_dec = 0 + elif cur == prev: + is_unique = 0 + else: + # cur or prev is NaN + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + if not is_monotonic_inc and not is_monotonic_dec: + is_monotonic_inc = 0 + is_monotonic_dec = 0 + break + prev = cur + + is_strict_monotonic = is_unique and (is_monotonic_inc or is_monotonic_dec) + return is_monotonic_inc, is_monotonic_dec, is_strict_monotonic + + +is_monotonic_float64 = is_monotonic["float64_t"] +is_monotonic_float32 = is_monotonic["float32_t"] +is_monotonic_object = is_monotonic["object"] +is_monotonic_int64 = is_monotonic["int64_t"] +is_monotonic_int32 = is_monotonic["int32_t"] +is_monotonic_uint64 = is_monotonic["uint64_t"] +is_monotonic_bool = is_monotonic["uint8_t"] + + # generated from template include "algos_common_helper.pxi" include "algos_rank_helper.pxi" diff --git a/pandas/_libs/algos_common_helper.pxi.in b/pandas/_libs/algos_common_helper.pxi.in index ed4c0e4c59609..40b1b1a282670 100644 --- a/pandas/_libs/algos_common_helper.pxi.in +++ b/pandas/_libs/algos_common_helper.pxi.in @@ -15,443 +15,6 @@ Template for each `dtype` helper function using 1-d template WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- -# 1-d template -#---------------------------------------------------------------------- - -{{py: - -# name, c_type, dtype, can_hold_na, nogil -dtypes = [('float64', 'float64_t', 'np.float64', True, True), - ('float32', 'float32_t', 'np.float32', True, True), - ('object', 'object', 'object', True, False), - ('int32', 'int32_t', 'np.int32', False, True), - ('int64', 'int64_t', 'np.int64', False, True), - ('uint64', 'uint64_t', 'np.uint64', False, True), - ('bool', 'uint8_t', 'np.bool', False, True)] - -def get_dispatch(dtypes): - - for name, c_type, dtype, can_hold_na, nogil in dtypes: - - nogil_str = 'with nogil:' if nogil else '' - tab = ' ' if nogil else '' - yield name, c_type, dtype, can_hold_na, nogil_str, tab -}} - -{{for name, c_type, dtype, can_hold_na, nogil_str, tab - in get_dispatch(dtypes)}} - - -@cython.wraparound(False) -@cython.boundscheck(False) -def map_indices_{{name}}(ndarray[{{c_type}}] index): - """ - Produce a dict mapping the values of the input array to their respective - locations. - - Example: - array(['hi', 'there']) --> {'hi' : 0 , 'there' : 1} - - Better to do this with Cython because of the enormous speed boost. - """ - cdef: - Py_ssize_t i, length - dict result = {} - - length = len(index) - - for i in range(length): - result[index[i]] = i - - return result - - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None): - cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t, ndim=1] indexer - {{c_type}} cur, next - int lim, fill_count = 0 - - nleft = len(old) - nright = len(new) - indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) - - if limit is None: - lim = nright - else: - if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - lim = limit - - if nleft == 0 or nright == 0 or new[nright - 1] < old[0]: - return indexer - - i = j = 0 - - cur = old[0] - - while j <= nright - 1 and new[j] < cur: - j += 1 - - while True: - if j == nright: - break - - if i == nleft - 1: - while j < nright: - if new[j] == cur: - indexer[j] = i - elif new[j] > cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j += 1 - break - - next = old[i + 1] - - while j < nright and cur <= new[j] < next: - if new[j] == cur: - indexer[j] = i - elif fill_count < lim: - indexer[j] = i - fill_count += 1 - j += 1 - - fill_count = 0 - i += 1 - cur = next - - return indexer - - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_inplace_{{name}}(ndarray[{{c_type}}] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef: - Py_ssize_t i, N - {{c_type}} val - int lim, fill_count = 0 - - N = len(values) - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - lim = limit - - val = values[0] - for i in range(N): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef: - Py_ssize_t i, j, N, K - {{c_type}} val - int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - lim = limit - - for j in range(K): - fill_count = 0 - val = values[j, 0] - for i in range(N): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] - -""" -Backfilling logic for generating fill vector - -Diagram of what's going on - -Old New Fill vector Mask - . 0 1 - . 0 1 - . 0 1 -A A 0 1 - . 1 1 - . 1 1 - . 1 1 - . 1 1 - . 1 1 -B B 1 1 - . 2 1 - . 2 1 - . 2 1 -C C 2 1 - . 0 - . 0 -D -""" - - -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, - limit=None): - cdef: - Py_ssize_t i, j, nleft, nright - ndarray[int64_t, ndim=1] indexer - {{c_type}} cur, prev - int lim, fill_count = 0 - - nleft = len(old) - nright = len(new) - indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) - - if limit is None: - lim = nright - else: - if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - lim = limit - - if nleft == 0 or nright == 0 or new[0] > old[nleft - 1]: - return indexer - - i = nleft - 1 - j = nright - 1 - - cur = old[nleft - 1] - - while j >= 0 and new[j] > cur: - j -= 1 - - while True: - if j < 0: - break - - if i == 0: - while j >= 0: - if new[j] == cur: - indexer[j] = i - elif new[j] < cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j -= 1 - break - - prev = old[i - 1] - - while j >= 0 and prev < new[j] <= cur: - if new[j] == cur: - indexer[j] = i - elif new[j] < cur and fill_count < lim: - indexer[j] = i - fill_count += 1 - j -= 1 - - fill_count = 0 - i -= 1 - cur = prev - - return indexer - - -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_inplace_{{name}}(ndarray[{{c_type}}] values, - ndarray[uint8_t, cast=True] mask, - limit=None): - cdef: - Py_ssize_t i, N - {{c_type}} val - int lim, fill_count = 0 - - N = len(values) - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - lim = limit - - val = values[N - 1] - for i in range(N - 1, -1, -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - else: - fill_count = 0 - val = values[i] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values, - ndarray[uint8_t, ndim=2] mask, - limit=None): - cdef: - Py_ssize_t i, j, N, K - {{c_type}} val - int lim, fill_count = 0 - - K, N = ( values).shape - - # GH 2778 - if N == 0: - return - - if limit is None: - lim = N - else: - if not util.is_integer_object(limit): - raise ValueError('Limit must be an integer') - if limit < 1: - raise ValueError('Limit must be greater than 0') - lim = limit - - for j in range(K): - fill_count = 0 - val = values[j, N - 1] - for i in range(N - 1, -1, -1): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] - - -@cython.boundscheck(False) -@cython.wraparound(False) -def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike): - """ - Returns - ------- - is_monotonic_inc, is_monotonic_dec, is_unique - """ - cdef: - Py_ssize_t i, n - {{c_type}} prev, cur - bint is_monotonic_inc = 1 - bint is_monotonic_dec = 1 - bint is_unique = 1 - - n = len(arr) - - if n == 1: - if arr[0] != arr[0] or (timelike and arr[0] == iNaT): - # single value is NaN - return False, False, True - else: - return True, True, True - elif n < 2: - return True, True, True - - if timelike and arr[0] == iNaT: - return False, False, True - - {{nogil_str}} - {{tab}}prev = arr[0] - {{tab}}for i in range(1, n): - {{tab}} cur = arr[i] - {{tab}} if timelike and cur == iNaT: - {{tab}} is_monotonic_inc = 0 - {{tab}} is_monotonic_dec = 0 - {{tab}} break - {{tab}} if cur < prev: - {{tab}} is_monotonic_inc = 0 - {{tab}} elif cur > prev: - {{tab}} is_monotonic_dec = 0 - {{tab}} elif cur == prev: - {{tab}} is_unique = 0 - {{tab}} else: - {{tab}} # cur or prev is NaN - {{tab}} is_monotonic_inc = 0 - {{tab}} is_monotonic_dec = 0 - {{tab}} break - {{tab}} if not is_monotonic_inc and not is_monotonic_dec: - {{tab}} is_monotonic_inc = 0 - {{tab}} is_monotonic_dec = 0 - {{tab}} break - {{tab}} prev = cur - return is_monotonic_inc, is_monotonic_dec, \ - is_unique and (is_monotonic_inc or is_monotonic_dec) - - -@cython.wraparound(False) -@cython.boundscheck(False) -def arrmap_{{name}}(ndarray[{{c_type}}] index, object func): - cdef: - Py_ssize_t length = index.shape[0] - Py_ssize_t i = 0 - ndarray[object] result = np.empty(length, dtype=np.object_) - - from pandas._libs.lib import maybe_convert_objects - - for i in range(length): - result[i] = func(index[i]) - - return maybe_convert_objects(result) - -{{endfor}} - -#---------------------------------------------------------------------- -# put template -#---------------------------------------------------------------------- - {{py: # name, c_type, dest_type, dest_dtype From d03ef77284c2b177d9ac888521bf67abe88de40a Mon Sep 17 00:00:00 2001 From: Luca Donini Date: Tue, 18 Sep 2018 13:46:36 +0100 Subject: [PATCH 68/86] DOC: Updating the docstring of Series.str.extractall (#22565) --- pandas/core/strings.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b46c6a4557ff3..ed091ce4956bc 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -957,19 +957,23 @@ def str_extractall(arr, pat, flags=0): Parameters ---------- - pat : string - Regular expression pattern with capturing groups + pat : str + Regular expression pattern with capturing groups. flags : int, default 0 (no flags) - re module flags, e.g. re.IGNORECASE + A ``re`` module flag, for example ``re.IGNORECASE``. These allow + to modify regular expression matching for things like case, spaces, + etc. Multiple flags can be combined with the bitwise OR operator, + for example ``re.IGNORECASE | re.MULTILINE``. Returns ------- - A DataFrame with one row for each match, and one column for each - group. Its rows have a MultiIndex with first levels that come from - the subject Series. The last level is named 'match' and indicates - the order in the subject. Any capture group names in regular - expression pat will be used for column names; otherwise capture - group numbers will be used. + DataFrame + A ``DataFrame`` with one row for each match, and one column for each + group. Its rows have a ``MultiIndex`` with first levels that come from + the subject ``Series``. The last level is named 'match' and indexes the + matches in each item of the ``Series``. Any capture group names in + regular expression pat will be used for column names; otherwise capture + group numbers will be used. See Also -------- @@ -1015,7 +1019,6 @@ def str_extractall(arr, pat, flags=0): 1 a 2 B 0 b 1 C 0 NaN 1 - """ regex = re.compile(pat, flags=flags) From 52a480d9377f36e98ae364ecc56f7fd084799367 Mon Sep 17 00:00:00 2001 From: realead Date: Tue, 18 Sep 2018 14:51:15 +0200 Subject: [PATCH 69/86] BUG: don't mangle NaN-float-values and pd.NaT (GH 22295) (#22296) --- doc/source/whatsnew/v0.24.0.txt | 5 ++- pandas/_libs/hashtable_class_helper.pxi.in | 52 +++------------------- pandas/conftest.py | 12 +++++ pandas/core/indexes/base.py | 5 --- pandas/core/indexes/numeric.py | 8 ++++ pandas/tests/indexes/test_base.py | 20 ++++++++- pandas/tests/test_algos.py | 30 +++++++++++++ 7 files changed, 79 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1dd8bd401face..30745f186edcc 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -721,13 +721,16 @@ Indexing - Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`) - ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) - Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`) +- :class:`Index` no longer mangles ``None``, ``NaN`` and ``NaT``, i.e. they are treated as three different keys. However, for numeric Index all three are still coerced to a ``NaN`` (:issue:`22332`) Missing ^^^^^^^ - Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`) - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`) -- :func:`Series.isin` now treats all nans as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`) +- :func:`Series.isin` now treats all NaN-floats as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`) +- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for `np.object`-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`) + MultiIndex ^^^^^^^^^^ diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 550cabd5e3192..f294fd141a9f1 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -470,7 +470,6 @@ cdef class {{name}}HashTable(HashTable): int ret = 0 {{dtype}}_t val khiter_t k - bint seen_na = 0 {{name}}Vector uniques = {{name}}Vector() {{name}}VectorData *ud @@ -479,22 +478,6 @@ cdef class {{name}}HashTable(HashTable): with nogil: for i in range(n): val = values[i] - {{if float_group}} - if val == val: - k = kh_get_{{dtype}}(self.table, val) - if k == self.table.n_buckets: - kh_put_{{dtype}}(self.table, val, &ret) - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{{dtype}}(ud, val) - elif not seen_na: - seen_na = 1 - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{{dtype}}(ud, NAN) - {{else}} k = kh_get_{{dtype}}(self.table, val) if k == self.table.n_buckets: kh_put_{{dtype}}(self.table, val, &ret) @@ -502,7 +485,6 @@ cdef class {{name}}HashTable(HashTable): with gil: uniques.resize() append_data_{{dtype}}(ud, val) - {{endif}} return uniques.to_array() {{endfor}} @@ -747,9 +729,6 @@ cdef class StringHashTable(HashTable): return np.asarray(labels) -na_sentinel = object - - cdef class PyObjectHashTable(HashTable): def __init__(self, size_hint=1): @@ -767,8 +746,7 @@ cdef class PyObjectHashTable(HashTable): def __contains__(self, object key): cdef khiter_t k hash(key) - if key != key or key is None: - key = na_sentinel + k = kh_get_pymap(self.table, key) return k != self.table.n_buckets @@ -780,8 +758,7 @@ cdef class PyObjectHashTable(HashTable): cpdef get_item(self, object val): cdef khiter_t k - if val != val or val is None: - val = na_sentinel + k = kh_get_pymap(self.table, val) if k != self.table.n_buckets: return self.table.vals[k] @@ -795,8 +772,7 @@ cdef class PyObjectHashTable(HashTable): char* buf hash(key) - if key != key or key is None: - key = na_sentinel + k = kh_put_pymap(self.table, key, &ret) # self.table.keys[k] = key if kh_exist_pymap(self.table, k): @@ -814,8 +790,6 @@ cdef class PyObjectHashTable(HashTable): for i in range(n): val = values[i] hash(val) - if val != val or val is None: - val = na_sentinel k = kh_put_pymap(self.table, val, &ret) self.table.vals[k] = i @@ -831,8 +805,6 @@ cdef class PyObjectHashTable(HashTable): for i in range(n): val = values[i] hash(val) - if val != val or val is None: - val = na_sentinel k = kh_get_pymap(self.table, val) if k != self.table.n_buckets: @@ -849,24 +821,14 @@ cdef class PyObjectHashTable(HashTable): object val khiter_t k ObjectVector uniques = ObjectVector() - bint seen_na = 0 for i in range(n): val = values[i] hash(val) - - # `val is None` below is exception to prevent mangling of None and - # other NA values; note however that other NA values (ex: pd.NaT - # and np.nan) will still get mangled, so many not be a permanent - # solution; see GH 20866 - if not checknull(val) or val is None: - k = kh_get_pymap(self.table, val) - if k == self.table.n_buckets: - kh_put_pymap(self.table, val, &ret) - uniques.append(val) - elif not seen_na: - seen_na = 1 - uniques.append(nan) + k = kh_get_pymap(self.table, val) + if k == self.table.n_buckets: + kh_put_pymap(self.table, val, &ret) + uniques.append(val) return uniques.to_array() diff --git a/pandas/conftest.py b/pandas/conftest.py index 28c24fc8c0640..621de3ffd4b12 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -286,6 +286,18 @@ def nulls_fixture(request): nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture +@pytest.fixture(params=[None, np.nan, pd.NaT]) +def unique_nulls_fixture(request): + """ + Fixture for each null type in pandas, each null type exactly once + """ + return request.param + + +# Generate cartesian product of unique_nulls_fixture: +unique_nulls_fixture2 = unique_nulls_fixture + + TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific', 'dateutil/Asia/Singapore'] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ca381160de0df..487d3975a6219 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3109,7 +3109,6 @@ def get_loc(self, key, method=None, tolerance=None): return self._engine.get_loc(key) except KeyError: return self._engine.get_loc(self._maybe_cast_indexer(key)) - indexer = self.get_indexer([key], method=method, tolerance=tolerance) if indexer.ndim > 1 or indexer.size > 1: raise TypeError('get_loc requires scalar valued input') @@ -4475,10 +4474,6 @@ def insert(self, loc, item): ------- new_index : Index """ - if is_scalar(item) and isna(item): - # GH 18295 - item = self._na_value - _self = np.asarray(self) item = self._coerce_scalar_to_index(item)._ndarray_values idx = np.concatenate((_self[:loc], item, _self[loc:])) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index e0627432cbc2e..8d616468a87d9 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -9,6 +9,7 @@ is_bool, is_bool_dtype, is_scalar) +from pandas.core.dtypes.missing import isna from pandas import compat from pandas.core import algorithms @@ -114,6 +115,13 @@ def is_all_dates(self): """ return False + @Appender(Index.insert.__doc__) + def insert(self, loc, item): + # treat NA values as nans: + if is_scalar(item) and isna(item): + item = self._na_value + return super(NumericIndex, self).insert(loc, item) + _num_index_shared_docs['class_descr'] = """ Immutable ndarray implementing an ordered, sliceable set. The basic object diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 755b3cc7f1dca..eab04419fe939 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -560,8 +560,9 @@ def test_insert(self): tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a')) def test_insert_missing(self, nulls_fixture): - # GH 18295 (test missing) - expected = Index(['a', np.nan, 'b', 'c']) + # GH 22295 + # test there is no mangling of NA values + expected = Index(['a', nulls_fixture, 'b', 'c']) result = Index(list('abc')).insert(1, nulls_fixture) tm.assert_index_equal(result, expected) @@ -1364,6 +1365,21 @@ def test_get_indexer_numeric_index_boolean_target(self): expected = np.array([-1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_with_NA_values(self, unique_nulls_fixture, + unique_nulls_fixture2): + # GH 22332 + # check pairwise, that no pair of na values + # is mangled + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values are not unique + arr = np.array([unique_nulls_fixture, + unique_nulls_fixture2], dtype=np.object) + index = pd.Index(arr, dtype=np.object) + result = index.get_indexer([unique_nulls_fixture, + unique_nulls_fixture2, 'Unknown']) + expected = np.array([0, 1, -1], dtype=np.int64) + tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize("method", [None, 'pad', 'backfill', 'nearest']) def test_get_loc(self, method): index = pd.Index([0, 1, 2]) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 64d2e155aa9a9..b2ddbf715b480 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -520,6 +520,36 @@ def test_different_nans(self): expected = np.array([np.nan]) tm.assert_numpy_array_equal(result, expected) + def test_first_nan_kept(self): + # GH 22295 + # create different nans from bit-patterns: + bits_for_nan1 = 0xfff8000000000001 + bits_for_nan2 = 0x7ff8000000000001 + NAN1 = struct.unpack("d", struct.pack("=Q", bits_for_nan1))[0] + NAN2 = struct.unpack("d", struct.pack("=Q", bits_for_nan2))[0] + assert NAN1 != NAN1 + assert NAN2 != NAN2 + for el_type in [np.float64, np.object]: + a = np.array([NAN1, NAN2], dtype=el_type) + result = pd.unique(a) + assert result.size == 1 + # use bit patterns to identify which nan was kept: + result_nan_bits = struct.unpack("=Q", + struct.pack("d", result[0]))[0] + assert result_nan_bits == bits_for_nan1 + + def test_do_not_mangle_na_values(self, unique_nulls_fixture, + unique_nulls_fixture2): + # GH 22295 + if unique_nulls_fixture is unique_nulls_fixture2: + return # skip it, values not unique + a = np.array([unique_nulls_fixture, + unique_nulls_fixture2], dtype=np.object) + result = pd.unique(a) + assert result.size == 2 + assert a[0] is unique_nulls_fixture + assert a[1] is unique_nulls_fixture2 + class TestIsin(object): From 99353050321e33f7a7750b282179ed2ecad53daa Mon Sep 17 00:00:00 2001 From: "SEUNG HOON, SHIN" Date: Tue, 18 Sep 2018 21:54:31 +0900 Subject: [PATCH 70/86] DOC: Expose ExcelWriter as part of the Generated API (#22359) --- doc/source/api.rst | 6 ++++++ pandas/core/generic.py | 14 ++++++++++---- pandas/io/excel.py | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 4 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 9c3770a497cf8..e4b055c14ec27 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -61,6 +61,12 @@ Excel read_excel ExcelFile.parse +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + ExcelWriter + JSON ~~~~ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 96a956764ce06..373830ec7892e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1979,11 +1979,17 @@ def _repr_latex_(self): If you wish to write to more than one sheet in the workbook, it is necessary to specify an ExcelWriter object: - >>> writer = pd.ExcelWriter('output2.xlsx', engine='xlsxwriter') - >>> df1.to_excel(writer, sheet_name='Sheet1') >>> df2 = df1.copy() - >>> df2.to_excel(writer, sheet_name='Sheet2') - >>> writer.save() + >>> with pd.ExcelWriter('output.xlsx') as writer: + ... df1.to_excel(writer, sheet_name='Sheet_name_1') + ... df2.to_excel(writer, sheet_name='Sheet_name_2') + + To set the library that is used to write the Excel file, + you can pass the `engine` keyword (the default engine is + automatically chosen depending on the file extension): + + >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') + """ def to_json(self, path_or_buf=None, orient=None, date_format=None, diff --git a/pandas/io/excel.py b/pandas/io/excel.py index e2db6643c5ef0..00b4c704c681b 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -824,8 +824,43 @@ class ExcelWriter(object): Notes ----- + None of the methods and properties are considered public. + For compatibility with CSV writers, ExcelWriter serializes lists and dicts to strings before writing. + + Examples + -------- + Default usage: + + >>> with ExcelWriter('path_to_file.xlsx') as writer: + ... df.to_excel(writer) + + To write to separate sheets in a single file: + + >>> with ExcelWriter('path_to_file.xlsx') as writer: + ... df1.to_excel(writer, sheet_name='Sheet1') + ... df2.to_excel(writer, sheet_name='Sheet2') + + You can set the date format or datetime format: + + >>> with ExcelWriter('path_to_file.xlsx', + date_format='YYYY-MM-DD', + datetime_format='YYYY-MM-DD HH:MM:SS') as writer: + ... df.to_excel(writer) + + You can also append to an existing Excel file: + + >>> with ExcelWriter('path_to_file.xlsx', mode='a') as writer: + ... df.to_excel(writer, sheet_name='Sheet3') + + Attributes + ---------- + None + + Methods + ------- + None """ # Defining an ExcelWriter implementation (see abstract methods for more...) From bada2779632d3d5767827cfce4e10b41910b813c Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Tue, 18 Sep 2018 09:00:17 -0400 Subject: [PATCH 71/86] Test in scripts/validate_docstrings.py that the short summary is always one line long (#22617) --- scripts/tests/test_validate_docstrings.py | 13 ++++++++++++- scripts/validate_docstrings.py | 8 ++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 0c0757c6963d7..00496f771570b 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -362,6 +362,15 @@ def multi_line(self): which is not correct. """ + def two_paragraph_multi_line(self): + """ + Extends beyond one line + which is not correct. + + Extends beyond one line, which in itself is correct but the + previous short summary should still be an issue. + """ + class BadParameters(object): """ @@ -556,7 +565,9 @@ def test_bad_generic_functions(self, func): ('BadSummaries', 'no_capitalization', ('Summary must start with infinitive verb',)), ('BadSummaries', 'multi_line', - ('a short summary in a single line should be present',)), + ('Summary should fit in a single line.',)), + ('BadSummaries', 'two_paragraph_multi_line', + ('Summary should fit in a single line.',)), # Parameters tests ('BadParameters', 'missing_params', ('Parameters {**kwargs} not documented',)), diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 83bb382480eaa..790a62b53845b 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -163,10 +163,12 @@ def double_blank_lines(self): @property def summary(self): - if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: - return '' return ' '.join(self.doc['Summary']) + @property + def num_summary_lines(self): + return len(self.doc['Summary']) + @property def extended_summary(self): if not self.doc['Extended Summary'] and len(self.doc['Summary']) > 1: @@ -452,6 +454,8 @@ def validate_one(func_name): errs.append('Summary must start with infinitive verb, ' 'not third person (e.g. use "Generate" instead of ' '"Generates")') + if doc.num_summary_lines > 1: + errs.append("Summary should fit in a single line.") if not doc.extended_summary: wrns.append('No extended summary found') From 4f000f5b7d3cda287d708bb07cac11fac95ac2e1 Mon Sep 17 00:00:00 2001 From: Ben Nelson Date: Tue, 18 Sep 2018 06:25:25 -0700 Subject: [PATCH 72/86] fix raise of TypeError when subtracting timedelta array (#22054) closes #21980 --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/tslibs/timedeltas.pyx | 8 ++- .../tests/scalar/timedelta/test_arithmetic.py | 65 +++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 30745f186edcc..1b8e5757a15fd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -646,6 +646,7 @@ Timedelta - Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) +- Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) - - diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 9b13ef5982396..9c8be1901d1dc 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -541,10 +541,12 @@ def _binary_op_method_timedeltalike(op, name): elif hasattr(other, 'dtype'): # nd-array like - if other.dtype.kind not in ['m', 'M']: - # raise rathering than letting numpy return wrong answer + if other.dtype.kind in ['m', 'M']: + return op(self.to_timedelta64(), other) + elif other.dtype.kind == 'O': + return np.array([op(self, x) for x in other]) + else: return NotImplemented - return op(self.to_timedelta64(), other) elif not _validate_ops_compat(other): return NotImplemented diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 9636c92ec22d5..fce1ef29235cc 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -200,6 +200,57 @@ def test_td_rsub_numeric_raises(self): with pytest.raises(TypeError): 2.0 - td + def test_td_sub_timedeltalike_object_dtype_array(self): + # GH 21980 + arr = np.array([Timestamp('20130101 9:01'), + Timestamp('20121230 9:02')]) + exp = np.array([Timestamp('20121231 9:01'), + Timestamp('20121229 9:02')]) + res = arr - pd.Timedelta('1D') + tm.assert_numpy_array_equal(res, exp) + + def test_td_sub_mixed_most_timedeltalike_object_dtype_array(self): + # GH 21980 + now = pd.Timestamp.now() + arr = np.array([now, + pd.Timedelta('1D'), + np.timedelta64(2, 'h')]) + exp = np.array([now - pd.Timedelta('1D'), + pd.Timedelta('0D'), + np.timedelta64(2, 'h') - pd.Timedelta('1D')]) + res = arr - pd.Timedelta('1D') + tm.assert_numpy_array_equal(res, exp) + + def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self): + # GH 21980 + now = pd.Timestamp.now() + arr = np.array([now, + pd.Timedelta('1D'), + np.timedelta64(2, 'h')]) + with pytest.raises(TypeError): + pd.Timedelta('1D') - arr + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_timedeltalike_object_dtype_array(self, op): + # GH 21980 + arr = np.array([Timestamp('20130101 9:01'), + Timestamp('20121230 9:02')]) + exp = np.array([Timestamp('20130102 9:01'), + Timestamp('20121231 9:02')]) + res = op(arr, pd.Timedelta('1D')) + tm.assert_numpy_array_equal(res, exp) + + @pytest.mark.parametrize('op', [operator.add, ops.radd]) + def test_td_add_mixed_timedeltalike_object_dtype_array(self, op): + # GH 21980 + now = pd.Timestamp.now() + arr = np.array([now, + pd.Timedelta('1D')]) + exp = np.array([now + pd.Timedelta('1D'), + pd.Timedelta('2D')]) + res = op(arr, pd.Timedelta('1D')) + tm.assert_numpy_array_equal(res, exp) + class TestTimedeltaMultiplicationDivision(object): """ @@ -616,3 +667,17 @@ def test_rdivmod_invalid(self): with pytest.raises(TypeError): divmod(np.array([22, 24]), td) + + @pytest.mark.parametrize('op', [ + operator.mul, + ops.rmul, + operator.truediv, + ops.rdiv, + ops.rsub]) + @pytest.mark.parametrize('arr', [ + np.array([Timestamp('20130101 9:01'), Timestamp('20121230 9:02')]), + np.array([pd.Timestamp.now(), pd.Timedelta('1D')]) + ]) + def test_td_op_timedelta_timedeltalike_array(self, op, arr): + with pytest.raises(TypeError): + op(arr, pd.Timedelta('1D')) From 79b8763ac0994228105f840b7f60bc840e0d776b Mon Sep 17 00:00:00 2001 From: Mak Sze Chun Date: Tue, 18 Sep 2018 21:54:14 +0800 Subject: [PATCH 73/86] Bug: Logical operator of Series with Index (#22092) (#22293) * Fix bug #GH22092 * Update v0.24.0.txt * Update v0.24.0.txt * Update ops.py * Update test_operators.py * Update v0.24.0.txt * Update test_operators.py --- doc/source/whatsnew/v0.24.0.txt | 3 ++- pandas/core/ops.py | 2 +- pandas/tests/series/test_operators.py | 24 ++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 1b8e5757a15fd..39ed5d968707b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -803,7 +803,8 @@ Other - :meth:`~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) - Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - :meth:`~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`) -- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax``. ``NaN`` values are also handled properly. (:issue:`21548`, :issue:`21526`) +- :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. +- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) - - - diff --git a/pandas/core/ops.py b/pandas/core/ops.py index ca9c2528f0aef..a7fc2839ea101 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1533,7 +1533,7 @@ def na_op(x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) - if isinstance(y, (np.ndarray, ABCSeries)): + if isinstance(y, (np.ndarray, ABCSeries, ABCIndexClass)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 5e5e9c0895ccf..615f0c9247bd8 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -425,6 +425,30 @@ def test_comparison_flex_alignment_fill(self): exp = pd.Series([True, True, False, False], index=list('abcd')) assert_series_equal(left.gt(right, fill_value=0), exp) + def test_logical_ops_with_index(self): + # GH22092 + ser = Series([True, True, False, False]) + idx1 = Index([True, False, True, False]) + idx2 = Index([1, 0, 1, 0]) + + expected = Series([True, False, False, False]) + result1 = ser & idx1 + assert_series_equal(result1, expected) + result2 = ser & idx2 + assert_series_equal(result2, expected) + + expected = Series([True, True, True, False]) + result1 = ser | idx1 + assert_series_equal(result1, expected) + result2 = ser | idx2 + assert_series_equal(result2, expected) + + expected = Series([False, True, True, False]) + result1 = ser ^ idx1 + assert_series_equal(result1, expected) + result2 = ser ^ idx2 + assert_series_equal(result2, expected) + def test_ne(self): ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float) expected = [True, True, False, True, True] From 1aaefe5b042c8727ad257a44dcfe0b8c2d93d364 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Tue, 18 Sep 2018 09:58:22 -0400 Subject: [PATCH 74/86] DOC: Fix Series nsmallest and nlargest docstring/doctests (#22731) --- ci/doctests.sh | 2 +- pandas/core/series.py | 187 ++++++++++++++++++++++++++++++++---------- 2 files changed, 144 insertions(+), 45 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index 654bd57107904..a941515fde4ae 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -28,7 +28,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/series.py \ - -k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict" + -k"-nonzero -reindex -searchsorted -to_dict" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/series.py b/pandas/core/series.py index 0268b8e9c3149..8f69de973e7a3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2741,17 +2741,20 @@ def nlargest(self, n=5, keep='first'): Parameters ---------- - n : int - Return this many descending sorted values - keep : {'first', 'last'}, default 'first' - Where there are duplicate values: - - ``first`` : take the first occurrence. - - ``last`` : take the last occurrence. + n : int, default 5 + Return this many descending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + - ``first`` : take the first occurrences based on the index order + - ``last`` : take the last occurrences based on the index order + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns ------- - top_n : Series - The n largest values in the Series, in sorted order + Series + The `n` largest values in the Series, sorted in decreasing order. Notes ----- @@ -2760,23 +2763,70 @@ def nlargest(self, n=5, keep='first'): See Also -------- - Series.nsmallest + Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. Examples -------- - >>> s = pd.Series(np.random.randn(10**6)) - >>> s.nlargest(10) # only sorts up to the N requested - 219921 4.644710 - 82124 4.608745 - 421689 4.564644 - 425277 4.447014 - 718691 4.414137 - 43154 4.403520 - 283187 4.313922 - 595519 4.273635 - 503969 4.250236 - 121637 4.240952 - dtype: float64 + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 """ return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() @@ -2786,17 +2836,20 @@ def nsmallest(self, n=5, keep='first'): Parameters ---------- - n : int - Return this many ascending sorted values - keep : {'first', 'last'}, default 'first' - Where there are duplicate values: - - ``first`` : take the first occurrence. - - ``last`` : take the last occurrence. + n : int, default 5 + Return this many ascending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + - ``first`` : take the first occurrences based on the index order + - ``last`` : take the last occurrences based on the index order + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns ------- - bottom_n : Series - The n smallest values in the Series, in sorted order + Series + The `n` smallest values in the Series, sorted in increasing order. Notes ----- @@ -2805,23 +2858,69 @@ def nsmallest(self, n=5, keep='first'): See Also -------- - Series.nlargest + Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. Examples -------- - >>> s = pd.Series(np.random.randn(10**6)) - >>> s.nsmallest(10) # only sorts up to the N requested - 288532 -4.954580 - 732345 -4.835960 - 64803 -4.812550 - 446457 -4.609998 - 501225 -4.483945 - 669476 -4.472935 - 973615 -4.401699 - 621279 -4.355126 - 773916 -4.347355 - 359919 -4.331927 - dtype: float64 + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Brunei": 434000, "Malta": 434000, + ... "Maldives": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nsmallest() + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: int64 + + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. + + >>> s.nsmallest(3, keep='last') + Monserat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 """ return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() From 9fe0fbcd5f1ef1ff776faab72ad257e1f194d5d2 Mon Sep 17 00:00:00 2001 From: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Tue, 18 Sep 2018 16:33:55 +0200 Subject: [PATCH 75/86] Fixturize tests/frame/test_api and tests/sparse/frame/test_frame (#22738) --- pandas/tests/frame/test_api.py | 183 +++++---- pandas/tests/sparse/frame/conftest.py | 116 ++++++ pandas/tests/sparse/frame/test_frame.py | 477 ++++++++++++------------ 3 files changed, 439 insertions(+), 337 deletions(-) create mode 100644 pandas/tests/sparse/frame/conftest.py diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 78a19029db567..35f2f566ef85e 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -24,8 +24,6 @@ import pandas.util.testing as tm -from pandas.tests.frame.common import TestData - class SharedWithSparse(object): """ @@ -43,57 +41,57 @@ def _assert_series_equal(self, left, right): """Dispatch to series class dependent assertion""" raise NotImplementedError - def test_copy_index_name_checking(self): + def test_copy_index_name_checking(self, float_frame): # don't want to be able to modify the index stored elsewhere after # making a copy for attr in ('index', 'columns'): - ind = getattr(self.frame, attr) + ind = getattr(float_frame, attr) ind.name = None - cp = self.frame.copy() + cp = float_frame.copy() getattr(cp, attr).name = 'foo' - assert getattr(self.frame, attr).name is None + assert getattr(float_frame, attr).name is None - def test_getitem_pop_assign_name(self): - s = self.frame['A'] + def test_getitem_pop_assign_name(self, float_frame): + s = float_frame['A'] assert s.name == 'A' - s = self.frame.pop('A') + s = float_frame.pop('A') assert s.name == 'A' - s = self.frame.loc[:, 'B'] + s = float_frame.loc[:, 'B'] assert s.name == 'B' s2 = s.loc[:] assert s2.name == 'B' - def test_get_value(self): - for idx in self.frame.index: - for col in self.frame.columns: + def test_get_value(self, float_frame): + for idx in float_frame.index: + for col in float_frame.columns: with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = self.frame.get_value(idx, col) - expected = self.frame[col][idx] + result = float_frame.get_value(idx, col) + expected = float_frame[col][idx] tm.assert_almost_equal(result, expected) - def test_add_prefix_suffix(self): - with_prefix = self.frame.add_prefix('foo#') - expected = pd.Index(['foo#%s' % c for c in self.frame.columns]) + def test_add_prefix_suffix(self, float_frame): + with_prefix = float_frame.add_prefix('foo#') + expected = pd.Index(['foo#%s' % c for c in float_frame.columns]) tm.assert_index_equal(with_prefix.columns, expected) - with_suffix = self.frame.add_suffix('#foo') - expected = pd.Index(['%s#foo' % c for c in self.frame.columns]) + with_suffix = float_frame.add_suffix('#foo') + expected = pd.Index(['%s#foo' % c for c in float_frame.columns]) tm.assert_index_equal(with_suffix.columns, expected) - with_pct_prefix = self.frame.add_prefix('%') - expected = pd.Index(['%{}'.format(c) for c in self.frame.columns]) + with_pct_prefix = float_frame.add_prefix('%') + expected = pd.Index(['%{}'.format(c) for c in float_frame.columns]) tm.assert_index_equal(with_pct_prefix.columns, expected) - with_pct_suffix = self.frame.add_suffix('%') - expected = pd.Index(['{}%'.format(c) for c in self.frame.columns]) + with_pct_suffix = float_frame.add_suffix('%') + expected = pd.Index(['{}%'.format(c) for c in float_frame.columns]) tm.assert_index_equal(with_pct_suffix.columns, expected) - def test_get_axis(self): - f = self.frame + def test_get_axis(self, float_frame): + f = float_frame assert f._get_axis_number(0) == 0 assert f._get_axis_number(1) == 1 assert f._get_axis_number('index') == 0 @@ -118,13 +116,13 @@ def test_get_axis(self): tm.assert_raises_regex(ValueError, 'No axis named', f._get_axis_number, None) - def test_keys(self): - getkeys = self.frame.keys - assert getkeys() is self.frame.columns + def test_keys(self, float_frame): + getkeys = float_frame.keys + assert getkeys() is float_frame.columns - def test_column_contains_typeerror(self): + def test_column_contains_typeerror(self, float_frame): try: - self.frame.columns in self.frame + float_frame.columns in float_frame except TypeError: pass @@ -146,10 +144,10 @@ def test_tab_completion(self): assert key not in dir(df) assert isinstance(df.__getitem__('A'), pd.DataFrame) - def test_not_hashable(self): + def test_not_hashable(self, empty_frame): df = self.klass([1]) pytest.raises(TypeError, hash, df) - pytest.raises(TypeError, hash, self.empty) + pytest.raises(TypeError, hash, empty_frame) def test_new_empty_index(self): df1 = self.klass(randn(0, 3)) @@ -157,29 +155,29 @@ def test_new_empty_index(self): df1.index.name = 'foo' assert df2.index.name is None - def test_array_interface(self): + def test_array_interface(self, float_frame): with np.errstate(all='ignore'): - result = np.sqrt(self.frame) - assert isinstance(result, type(self.frame)) - assert result.index is self.frame.index - assert result.columns is self.frame.columns + result = np.sqrt(float_frame) + assert isinstance(result, type(float_frame)) + assert result.index is float_frame.index + assert result.columns is float_frame.columns - self._assert_frame_equal(result, self.frame.apply(np.sqrt)) + self._assert_frame_equal(result, float_frame.apply(np.sqrt)) - def test_get_agg_axis(self): - cols = self.frame._get_agg_axis(0) - assert cols is self.frame.columns + def test_get_agg_axis(self, float_frame): + cols = float_frame._get_agg_axis(0) + assert cols is float_frame.columns - idx = self.frame._get_agg_axis(1) - assert idx is self.frame.index + idx = float_frame._get_agg_axis(1) + assert idx is float_frame.index - pytest.raises(ValueError, self.frame._get_agg_axis, 2) + pytest.raises(ValueError, float_frame._get_agg_axis, 2) - def test_nonzero(self): - assert self.empty.empty + def test_nonzero(self, float_frame, float_string_frame, empty_frame): + assert empty_frame.empty - assert not self.frame.empty - assert not self.mixed_frame.empty + assert not float_frame.empty + assert not float_string_frame.empty # corner case df = DataFrame({'A': [1., 2., 3.], @@ -202,16 +200,16 @@ def test_items(self): assert isinstance(v, Series) assert (df[k] == v).all() - def test_iter(self): - assert tm.equalContents(list(self.frame), self.frame.columns) + def test_iter(self, float_frame): + assert tm.equalContents(list(float_frame), float_frame.columns) - def test_iterrows(self): - for k, v in self.frame.iterrows(): - exp = self.frame.loc[k] + def test_iterrows(self, float_frame, float_string_frame): + for k, v in float_frame.iterrows(): + exp = float_frame.loc[k] self._assert_series_equal(v, exp) - for k, v in self.mixed_frame.iterrows(): - exp = self.mixed_frame.loc[k] + for k, v in float_string_frame.iterrows(): + exp = float_string_frame.loc[k] self._assert_series_equal(v, exp) def test_iterrows_iso8601(self): @@ -226,11 +224,11 @@ def test_iterrows_iso8601(self): exp = s.loc[k] self._assert_series_equal(v, exp) - def test_itertuples(self): - for i, tup in enumerate(self.frame.itertuples()): + def test_itertuples(self, float_frame): + for i, tup in enumerate(float_frame.itertuples()): s = self.klass._constructor_sliced(tup[1:]) s.name = tup[0] - expected = self.frame.iloc[i, :].reset_index(drop=True) + expected = float_frame.iloc[i, :].reset_index(drop=True) self._assert_series_equal(s, expected) df = self.klass({'floats': np.random.randn(5), @@ -289,11 +287,11 @@ def test_sequence_like_with_categorical(self): for c, col in df.iteritems(): str(s) - def test_len(self): - assert len(self.frame) == len(self.frame.index) + def test_len(self, float_frame): + assert len(float_frame) == len(float_frame.index) - def test_values(self): - frame = self.frame + def test_values(self, float_frame, float_string_frame): + frame = float_frame arr = frame.values frame_cols = frame.columns @@ -306,20 +304,20 @@ def test_values(self): assert value == frame[col][i] # mixed type - arr = self.mixed_frame[['foo', 'A']].values + arr = float_string_frame[['foo', 'A']].values assert arr[0, 0] == 'bar' - df = self.klass({'real': [1, 2, 3], 'complex': [1j, 2j, 3j]}) + df = self.klass({'complex': [1j, 2j, 3j], 'real': [1, 2, 3]}) arr = df.values assert arr[0, 0] == 1j # single block corner case - arr = self.frame[['A', 'B']].values - expected = self.frame.reindex(columns=['A', 'B']).values + arr = float_frame[['A', 'B']].values + expected = float_frame.reindex(columns=['A', 'B']).values assert_almost_equal(arr, expected) - def test_transpose(self): - frame = self.frame + def test_transpose(self, float_frame): + frame = float_frame dft = frame.T for idx, series in compat.iteritems(dft): for col, value in compat.iteritems(series): @@ -343,8 +341,8 @@ def test_swapaxes(self): self._assert_frame_equal(df, df.swapaxes(0, 0)) pytest.raises(ValueError, df.swapaxes, 2, 5) - def test_axis_aliases(self): - f = self.frame + def test_axis_aliases(self, float_frame): + f = float_frame # reg name expected = f.sum(axis=0) @@ -361,23 +359,23 @@ def test_class_axis(self): assert pydoc.getdoc(DataFrame.index) assert pydoc.getdoc(DataFrame.columns) - def test_more_values(self): - values = self.mixed_frame.values - assert values.shape[1] == len(self.mixed_frame.columns) + def test_more_values(self, float_string_frame): + values = float_string_frame.values + assert values.shape[1] == len(float_string_frame.columns) - def test_repr_with_mi_nat(self): + def test_repr_with_mi_nat(self, float_string_frame): df = self.klass({'X': [1, 2]}, index=[[pd.NaT, pd.Timestamp('20130101')], ['a', 'b']]) res = repr(df) exp = ' X\nNaT a 1\n2013-01-01 b 2' assert res == exp - def test_iteritems_names(self): - for k, v in compat.iteritems(self.mixed_frame): + def test_iteritems_names(self, float_string_frame): + for k, v in compat.iteritems(float_string_frame): assert v.name == k - def test_series_put_names(self): - series = self.mixed_frame._series + def test_series_put_names(self, float_string_frame): + series = float_string_frame._series for k, v in compat.iteritems(series): assert v.name == k @@ -408,36 +406,37 @@ def test_with_datetimelikes(self): tm.assert_series_equal(result, expected) -class TestDataFrameMisc(SharedWithSparse, TestData): +class TestDataFrameMisc(SharedWithSparse): klass = DataFrame # SharedWithSparse tests use generic, klass-agnostic assertion _assert_frame_equal = staticmethod(assert_frame_equal) _assert_series_equal = staticmethod(assert_series_equal) - def test_values(self): - self.frame.values[:, 0] = 5. - assert (self.frame.values[:, 0] == 5).all() + def test_values(self, float_frame): + float_frame.values[:, 0] = 5. + assert (float_frame.values[:, 0] == 5).all() - def test_as_matrix_deprecated(self): + def test_as_matrix_deprecated(self, float_frame): # GH18458 with tm.assert_produces_warning(FutureWarning): - result = self.frame.as_matrix(columns=self.frame.columns.tolist()) - expected = self.frame.values + cols = float_frame.columns.tolist() + result = float_frame.as_matrix(columns=cols) + expected = float_frame.values tm.assert_numpy_array_equal(result, expected) - def test_deepcopy(self): - cp = deepcopy(self.frame) + def test_deepcopy(self, float_frame): + cp = deepcopy(float_frame) series = cp['A'] series[:] = 10 for idx, value in compat.iteritems(series): - assert self.frame['A'][idx] != value + assert float_frame['A'][idx] != value - def test_transpose_get_view(self): - dft = self.frame.T + def test_transpose_get_view(self, float_frame): + dft = float_frame.T dft.values[:, 5:10] = 5 - assert (self.frame.values[5:10] == 5).all() + assert (float_frame.values[5:10] == 5).all() def test_inplace_return_self(self): # re #1893 diff --git a/pandas/tests/sparse/frame/conftest.py b/pandas/tests/sparse/frame/conftest.py new file mode 100644 index 0000000000000..f36b4e643d10b --- /dev/null +++ b/pandas/tests/sparse/frame/conftest.py @@ -0,0 +1,116 @@ +import pytest + +import numpy as np + +from pandas import SparseDataFrame, SparseArray, DataFrame, bdate_range + +data = {'A': [np.nan, np.nan, np.nan, 0, 1, 2, 3, 4, 5, 6], + 'B': [0, 1, 2, np.nan, np.nan, np.nan, 3, 4, 5, 6], + 'C': np.arange(10, dtype=np.float64), + 'D': [0, 1, 2, 3, 4, 5, np.nan, np.nan, np.nan, np.nan]} +dates = bdate_range('1/1/2011', periods=10) + + +# fixture names must be compatible with the tests in +# tests/frame/test_api.SharedWithSparse + +@pytest.fixture +def float_frame_dense(): + """ + Fixture for dense DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + return DataFrame(data, index=dates) + + +@pytest.fixture +def float_frame(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; some entries are missing + """ + # default_kind='block' is the default + return SparseDataFrame(data, index=dates, default_kind='block') + + +@pytest.fixture +def float_frame_int_kind(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D'] and default_kind='integer'. + Some entries are missing. + """ + return SparseDataFrame(data, index=dates, default_kind='integer') + + +@pytest.fixture +def float_string_frame(): + """ + Fixture for sparse DataFrame of floats and strings with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D', 'foo']; some entries are missing + """ + sdf = SparseDataFrame(data, index=dates) + sdf['foo'] = SparseArray(['bar'] * len(dates)) + return sdf + + +@pytest.fixture +def float_frame_fill0_dense(): + """ + Fixture for dense DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 0 + return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates) + + +@pytest.fixture +def float_frame_fill0(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 0 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 0 + return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], + default_fill_value=0, index=dates) + + +@pytest.fixture +def float_frame_fill2_dense(): + """ + Fixture for dense DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 2 + return DataFrame(values, columns=['A', 'B', 'C', 'D'], index=dates) + + +@pytest.fixture +def float_frame_fill2(): + """ + Fixture for sparse DataFrame of floats with DatetimeIndex + + Columns are ['A', 'B', 'C', 'D']; missing entries have been filled with 2 + """ + values = SparseDataFrame(data).values + values[np.isnan(values)] = 2 + return SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], + default_fill_value=2, index=dates) + + +@pytest.fixture +def empty_frame(): + """ + Fixture for empty SparseDataFrame + """ + return SparseDataFrame() diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index be5a1710119ee..30938966b5d1a 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -28,42 +28,6 @@ class TestSparseDataFrame(SharedWithSparse): _assert_frame_equal = staticmethod(tm.assert_sp_frame_equal) _assert_series_equal = staticmethod(tm.assert_sp_series_equal) - def setup_method(self, method): - self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C': np.arange(10, dtype=np.float64), - 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - - self.dates = bdate_range('1/1/2011', periods=10) - - self.orig = pd.DataFrame(self.data, index=self.dates) - self.iorig = pd.DataFrame(self.data, index=self.dates) - - self.frame = SparseDataFrame(self.data, index=self.dates) - self.iframe = SparseDataFrame(self.data, index=self.dates, - default_kind='integer') - self.mixed_frame = self.frame.copy(False) - self.mixed_frame['foo'] = pd.SparseArray(['bar'] * len(self.dates)) - - values = self.frame.values.copy() - values[np.isnan(values)] = 0 - - self.zorig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], - index=self.dates) - self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], - default_fill_value=0, index=self.dates) - - values = self.frame.values.copy() - values[np.isnan(values)] = 2 - - self.fill_orig = pd.DataFrame(values, columns=['A', 'B', 'C', 'D'], - index=self.dates) - self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], - default_fill_value=2, - index=self.dates) - - self.empty = SparseDataFrame() - def test_fill_value_when_combine_const(self): # GH12723 dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float') @@ -73,8 +37,8 @@ def test_fill_value_when_combine_const(self): res = df.add(2, fill_value=0) tm.assert_sp_frame_equal(res, exp) - def test_values(self): - empty = self.empty.values + def test_values(self, empty_frame, float_frame): + empty = empty_frame.values assert empty.shape == (0, 0) no_cols = SparseDataFrame(index=np.arange(10)) @@ -85,28 +49,29 @@ def test_values(self): mat = no_index.values assert mat.shape == (0, 10) - def test_copy(self): - cp = self.frame.copy() + def test_copy(self, float_frame): + cp = float_frame.copy() assert isinstance(cp, SparseDataFrame) - tm.assert_sp_frame_equal(cp, self.frame) + tm.assert_sp_frame_equal(cp, float_frame) # as of v0.15.0 # this is now identical (but not is_a ) - assert cp.index.identical(self.frame.index) + assert cp.index.identical(float_frame.index) - def test_constructor(self): - for col, series in compat.iteritems(self.frame): + def test_constructor(self, float_frame, float_frame_int_kind, + float_frame_fill0): + for col, series in compat.iteritems(float_frame): assert isinstance(series, SparseSeries) - assert isinstance(self.iframe['A'].sp_index, IntIndex) + assert isinstance(float_frame_int_kind['A'].sp_index, IntIndex) # constructed zframe from matrix above - assert self.zframe['A'].fill_value == 0 + assert float_frame_fill0['A'].fill_value == 0 tm.assert_numpy_array_equal(pd.SparseArray([1., 2., 3., 4., 5., 6.]), - self.zframe['A'].values) + float_frame_fill0['A'].values) tm.assert_numpy_array_equal(np.array([0., 0., 0., 0., 1., 2., 3., 4., 5., 6.]), - self.zframe['A'].to_dense().values) + float_frame_fill0['A'].to_dense().values) # construct no data sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10)) @@ -115,29 +80,29 @@ def test_constructor(self): # construct from nested dict data = {} - for c, s in compat.iteritems(self.frame): + for c, s in compat.iteritems(float_frame): data[c] = s.to_dict() sdf = SparseDataFrame(data) - tm.assert_sp_frame_equal(sdf, self.frame) + tm.assert_sp_frame_equal(sdf, float_frame) # TODO: test data is copied from inputs # init dict with different index - idx = self.frame.index[:5] + idx = float_frame.index[:5] cons = SparseDataFrame( - self.frame, index=idx, columns=self.frame.columns, - default_fill_value=self.frame.default_fill_value, - default_kind=self.frame.default_kind, copy=True) - reindexed = self.frame.reindex(idx) + float_frame, index=idx, columns=float_frame.columns, + default_fill_value=float_frame.default_fill_value, + default_kind=float_frame.default_kind, copy=True) + reindexed = float_frame.reindex(idx) tm.assert_sp_frame_equal(cons, reindexed, exact_indices=False) # assert level parameter breaks reindex with pytest.raises(TypeError): - self.frame.reindex(idx, level=0) + float_frame.reindex(idx, level=0) - repr(self.frame) + repr(float_frame) def test_constructor_dict_order(self): # GH19018 @@ -151,24 +116,26 @@ def test_constructor_dict_order(self): expected = SparseDataFrame(data=d, columns=list('ab')) tm.assert_sp_frame_equal(frame, expected) - def test_constructor_ndarray(self): + def test_constructor_ndarray(self, float_frame): # no index or columns - sp = SparseDataFrame(self.frame.values) + sp = SparseDataFrame(float_frame.values) # 1d - sp = SparseDataFrame(self.data['A'], index=self.dates, columns=['A']) - tm.assert_sp_frame_equal(sp, self.frame.reindex(columns=['A'])) + sp = SparseDataFrame(float_frame['A'].values, index=float_frame.index, + columns=['A']) + tm.assert_sp_frame_equal(sp, float_frame.reindex(columns=['A'])) # raise on level argument - pytest.raises(TypeError, self.frame.reindex, columns=['A'], + pytest.raises(TypeError, float_frame.reindex, columns=['A'], level=1) # wrong length index / columns with tm.assert_raises_regex(ValueError, "^Index length"): - SparseDataFrame(self.frame.values, index=self.frame.index[:-1]) + SparseDataFrame(float_frame.values, index=float_frame.index[:-1]) with tm.assert_raises_regex(ValueError, "^Column length"): - SparseDataFrame(self.frame.values, columns=self.frame.columns[:-1]) + SparseDataFrame(float_frame.values, + columns=float_frame.columns[:-1]) # GH 9272 def test_constructor_empty(self): @@ -176,10 +143,10 @@ def test_constructor_empty(self): assert len(sp.index) == 0 assert len(sp.columns) == 0 - def test_constructor_dataframe(self): - dense = self.frame.to_dense() + def test_constructor_dataframe(self, float_frame): + dense = float_frame.to_dense() sp = SparseDataFrame(dense) - tm.assert_sp_frame_equal(sp, self.frame) + tm.assert_sp_frame_equal(sp, float_frame) def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) @@ -292,12 +259,13 @@ def test_dtypes(self): expected = Series({'float64': 4}) tm.assert_series_equal(result, expected) - def test_shape(self): + def test_shape(self, float_frame, float_frame_int_kind, + float_frame_fill0, float_frame_fill2): # see gh-10452 - assert self.frame.shape == (10, 4) - assert self.iframe.shape == (10, 4) - assert self.zframe.shape == (10, 4) - assert self.fill_frame.shape == (10, 4) + assert float_frame.shape == (10, 4) + assert float_frame_int_kind.shape == (10, 4) + assert float_frame_fill0.shape == (10, 4) + assert float_frame_fill2.shape == (10, 4) def test_str(self): df = DataFrame(np.random.randn(10000, 4)) @@ -306,12 +274,14 @@ def test_str(self): sdf = df.to_sparse() str(sdf) - def test_array_interface(self): - res = np.sqrt(self.frame) - dres = np.sqrt(self.frame.to_dense()) + def test_array_interface(self, float_frame): + res = np.sqrt(float_frame) + dres = np.sqrt(float_frame.to_dense()) tm.assert_frame_equal(res.to_dense(), dres) - def test_pickle(self): + def test_pickle(self, float_frame, float_frame_int_kind, float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _test_roundtrip(frame, orig): result = tm.round_trip_pickle(frame) @@ -319,7 +289,10 @@ def _test_roundtrip(frame, orig): tm.assert_frame_equal(result.to_dense(), orig, check_dtype=False) _test_roundtrip(SparseDataFrame(), DataFrame()) - self._check_all(_test_roundtrip) + _test_roundtrip(float_frame, float_frame_dense) + _test_roundtrip(float_frame_int_kind, float_frame_dense) + _test_roundtrip(float_frame_fill0, float_frame_fill0_dense) + _test_roundtrip(float_frame_fill2, float_frame_fill2_dense) def test_dense_to_sparse(self): df = DataFrame({'A': [nan, nan, nan, 1, 2], @@ -353,17 +326,17 @@ def test_density(self): def test_sparse_to_dense(self): pass - def test_sparse_series_ops(self): - self._check_frame_ops(self.frame) + def test_sparse_series_ops(self, float_frame): + self._check_frame_ops(float_frame) - def test_sparse_series_ops_i(self): - self._check_frame_ops(self.iframe) + def test_sparse_series_ops_i(self, float_frame_int_kind): + self._check_frame_ops(float_frame_int_kind) - def test_sparse_series_ops_z(self): - self._check_frame_ops(self.zframe) + def test_sparse_series_ops_z(self, float_frame_fill0): + self._check_frame_ops(float_frame_fill0) - def test_sparse_series_ops_fill(self): - self._check_frame_ops(self.fill_frame) + def test_sparse_series_ops_fill(self, float_frame_fill2): + self._check_frame_ops(float_frame_fill2) def _check_frame_ops(self, frame): @@ -417,18 +390,18 @@ def _compare_to_dense(a, b, da, db, op): _compare_to_dense(s, frame, s, frame.to_dense(), op) # it works! - result = self.frame + self.frame.loc[:, ['A', 'B']] # noqa + result = frame + frame.loc[:, ['A', 'B']] # noqa - def test_op_corners(self): - empty = self.empty + self.empty + def test_op_corners(self, float_frame, empty_frame): + empty = empty_frame + empty_frame assert empty.empty - foo = self.frame + self.empty + foo = float_frame + empty_frame assert isinstance(foo.index, DatetimeIndex) - tm.assert_frame_equal(foo, self.frame * np.nan) + tm.assert_frame_equal(foo, float_frame * np.nan) - foo = self.empty + self.frame - tm.assert_frame_equal(foo, self.frame * np.nan) + foo = empty_frame + float_frame + tm.assert_frame_equal(foo, float_frame * np.nan) def test_scalar_ops(self): pass @@ -443,12 +416,12 @@ def test_getitem(self): pytest.raises(Exception, sdf.__getitem__, ['a', 'd']) - def test_iloc(self): + def test_iloc(self, float_frame): - # 2227 - result = self.frame.iloc[:, 0] + # GH 2227 + result = float_frame.iloc[:, 0] assert isinstance(result, SparseSeries) - tm.assert_sp_series_equal(result, self.frame['A']) + tm.assert_sp_series_equal(result, float_frame['A']) # preserve sparse index type. #2251 data = {'A': [0, 1]} @@ -456,22 +429,22 @@ def test_iloc(self): tm.assert_class_equal(iframe['A'].sp_index, iframe.iloc[:, 0].sp_index) - def test_set_value(self): + def test_set_value(self, float_frame): # ok, as the index gets converted to object - frame = self.frame.copy() + frame = float_frame.copy() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): res = frame.set_value('foobar', 'B', 1.5) assert res.index.dtype == 'object' - res = self.frame + res = float_frame res.index = res.index.astype(object) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - res = self.frame.set_value('foobar', 'B', 1.5) - assert res is not self.frame + res = float_frame.set_value('foobar', 'B', 1.5) + assert res is not float_frame assert res.index[-1] == 'foobar' with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -482,38 +455,42 @@ def test_set_value(self): res2 = res.set_value('foobar', 'qux', 1.5) assert res2 is not res tm.assert_index_equal(res2.columns, - pd.Index(list(self.frame.columns) + ['qux'])) + pd.Index(list(float_frame.columns) + ['qux'])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert res2.get_value('foobar', 'qux') == 1.5 - def test_fancy_index_misc(self): + def test_fancy_index_misc(self, float_frame): # axis = 0 - sliced = self.frame.iloc[-2:, :] - expected = self.frame.reindex(index=self.frame.index[-2:]) + sliced = float_frame.iloc[-2:, :] + expected = float_frame.reindex(index=float_frame.index[-2:]) tm.assert_sp_frame_equal(sliced, expected) # axis = 1 - sliced = self.frame.iloc[:, -2:] - expected = self.frame.reindex(columns=self.frame.columns[-2:]) + sliced = float_frame.iloc[:, -2:] + expected = float_frame.reindex(columns=float_frame.columns[-2:]) tm.assert_sp_frame_equal(sliced, expected) - def test_getitem_overload(self): + def test_getitem_overload(self, float_frame): # slicing - sl = self.frame[:20] - tm.assert_sp_frame_equal(sl, self.frame.reindex(self.frame.index[:20])) + sl = float_frame[:20] + tm.assert_sp_frame_equal(sl, + float_frame.reindex(float_frame.index[:20])) # boolean indexing - d = self.frame.index[5] - indexer = self.frame.index > d + d = float_frame.index[5] + indexer = float_frame.index > d - subindex = self.frame.index[indexer] - subframe = self.frame[indexer] + subindex = float_frame.index[indexer] + subframe = float_frame[indexer] tm.assert_index_equal(subindex, subframe.index) - pytest.raises(Exception, self.frame.__getitem__, indexer[:-1]) + pytest.raises(Exception, float_frame.__getitem__, indexer[:-1]) - def test_setitem(self): + def test_setitem(self, float_frame, float_frame_int_kind, + float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check_frame(frame, orig): N = len(frame) @@ -566,24 +543,27 @@ def _check_frame(frame, orig): frame['K'] = frame.default_fill_value assert len(frame['K'].sp_values) == 0 - self._check_all(_check_frame) + _check_frame(float_frame, float_frame_dense) + _check_frame(float_frame_int_kind, float_frame_dense) + _check_frame(float_frame_fill0, float_frame_fill0_dense) + _check_frame(float_frame_fill2, float_frame_fill2_dense) - def test_setitem_corner(self): - self.frame['a'] = self.frame['B'] - tm.assert_sp_series_equal(self.frame['a'], self.frame['B'], + def test_setitem_corner(self, float_frame): + float_frame['a'] = float_frame['B'] + tm.assert_sp_series_equal(float_frame['a'], float_frame['B'], check_names=False) - def test_setitem_array(self): - arr = self.frame['B'] + def test_setitem_array(self, float_frame): + arr = float_frame['B'] - self.frame['E'] = arr - tm.assert_sp_series_equal(self.frame['E'], self.frame['B'], + float_frame['E'] = arr + tm.assert_sp_series_equal(float_frame['E'], float_frame['B'], check_names=False) - self.frame['F'] = arr[:-1] - index = self.frame.index[:-1] - tm.assert_sp_series_equal(self.frame['E'].reindex(index), - self.frame['F'].reindex(index), + float_frame['F'] = arr[:-1] + index = float_frame.index[:-1] + tm.assert_sp_series_equal(float_frame['E'].reindex(index), + float_frame['F'].reindex(index), check_names=False) def test_setitem_chained_no_consolidate(self): @@ -595,44 +575,44 @@ def test_setitem_chained_no_consolidate(self): sdf[0][1] = 2 assert len(sdf._data.blocks) == 2 - def test_delitem(self): - A = self.frame['A'] - C = self.frame['C'] + def test_delitem(self, float_frame): + A = float_frame['A'] + C = float_frame['C'] - del self.frame['B'] - assert 'B' not in self.frame - tm.assert_sp_series_equal(self.frame['A'], A) - tm.assert_sp_series_equal(self.frame['C'], C) + del float_frame['B'] + assert 'B' not in float_frame + tm.assert_sp_series_equal(float_frame['A'], A) + tm.assert_sp_series_equal(float_frame['C'], C) - del self.frame['D'] - assert 'D' not in self.frame + del float_frame['D'] + assert 'D' not in float_frame - del self.frame['A'] - assert 'A' not in self.frame + del float_frame['A'] + assert 'A' not in float_frame - def test_set_columns(self): - self.frame.columns = self.frame.columns - pytest.raises(Exception, setattr, self.frame, 'columns', - self.frame.columns[:-1]) + def test_set_columns(self, float_frame): + float_frame.columns = float_frame.columns + pytest.raises(Exception, setattr, float_frame, 'columns', + float_frame.columns[:-1]) - def test_set_index(self): - self.frame.index = self.frame.index - pytest.raises(Exception, setattr, self.frame, 'index', - self.frame.index[:-1]) + def test_set_index(self, float_frame): + float_frame.index = float_frame.index + pytest.raises(Exception, setattr, float_frame, 'index', + float_frame.index[:-1]) - def test_append(self): - a = self.frame[:5] - b = self.frame[5:] + def test_append(self, float_frame): + a = float_frame[:5] + b = float_frame[5:] appended = a.append(b) - tm.assert_sp_frame_equal(appended, self.frame, exact_indices=False) + tm.assert_sp_frame_equal(appended, float_frame, exact_indices=False) - a = self.frame.iloc[:5, :3] - b = self.frame.iloc[5:] + a = float_frame.iloc[:5, :3] + b = float_frame.iloc[5:] with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # Stacklevel is set for pd.concat, not append appended = a.append(b) - tm.assert_sp_frame_equal(appended.iloc[:, :3], self.frame.iloc[:, :3], + tm.assert_sp_frame_equal(appended.iloc[:, :3], float_frame.iloc[:, :3], exact_indices=False) a = a[['B', 'C', 'A']].head(2) @@ -713,9 +693,9 @@ def test_astype_bool(self): assert res['A'].dtype == np.bool assert res['B'].dtype == np.bool - def test_fillna(self): - df = self.zframe.reindex(lrange(5)) - dense = self.zorig.reindex(lrange(5)) + def test_fillna(self, float_frame_fill0, float_frame_fill0_dense): + df = float_frame_fill0.reindex(lrange(5)) + dense = float_frame_fill0_dense.reindex(lrange(5)) result = df.fillna(0) expected = dense.fillna(0) @@ -795,45 +775,48 @@ def test_sparse_frame_fillna_limit(self): expected = expected.to_sparse() tm.assert_frame_equal(result, expected) - def test_rename(self): - result = self.frame.rename(index=str) - expected = SparseDataFrame(self.data, index=self.dates.strftime( - "%Y-%m-%d %H:%M:%S")) + def test_rename(self, float_frame): + result = float_frame.rename(index=str) + expected = SparseDataFrame(float_frame.values, + index=float_frame.index.strftime( + "%Y-%m-%d %H:%M:%S"), + columns=list('ABCD')) tm.assert_sp_frame_equal(result, expected) - result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + result = float_frame.rename(columns=lambda x: '%s%d' % (x, 1)) data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C1': np.arange(10, dtype=np.float64), 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - expected = SparseDataFrame(data, index=self.dates) + expected = SparseDataFrame(data, index=float_frame.index) tm.assert_sp_frame_equal(result, expected) - def test_corr(self): - res = self.frame.corr() - tm.assert_frame_equal(res, self.frame.to_dense().corr()) + def test_corr(self, float_frame): + res = float_frame.corr() + tm.assert_frame_equal(res, float_frame.to_dense().corr()) - def test_describe(self): - self.frame['foo'] = np.nan - self.frame.get_dtype_counts() - str(self.frame) - desc = self.frame.describe() # noqa + def test_describe(self, float_frame): + float_frame['foo'] = np.nan + float_frame.get_dtype_counts() + str(float_frame) + desc = float_frame.describe() # noqa - def test_join(self): - left = self.frame.loc[:, ['A', 'B']] - right = self.frame.loc[:, ['C', 'D']] + def test_join(self, float_frame): + left = float_frame.loc[:, ['A', 'B']] + right = float_frame.loc[:, ['C', 'D']] joined = left.join(right) - tm.assert_sp_frame_equal(joined, self.frame, exact_indices=False) + tm.assert_sp_frame_equal(joined, float_frame, exact_indices=False) - right = self.frame.loc[:, ['B', 'D']] + right = float_frame.loc[:, ['B', 'D']] pytest.raises(Exception, left.join, right) with tm.assert_raises_regex(ValueError, 'Other Series must have a name'): - self.frame.join(Series( - np.random.randn(len(self.frame)), index=self.frame.index)) + float_frame.join(Series( + np.random.randn(len(float_frame)), index=float_frame.index)) - def test_reindex(self): + def test_reindex(self, float_frame, float_frame_int_kind, + float_frame_fill0, float_frame_fill2): def _check_frame(frame): index = frame.index @@ -876,26 +859,27 @@ def _check_frame(frame): frame.default_fill_value) assert np.isnan(reindexed['Z'].sp_values).all() - _check_frame(self.frame) - _check_frame(self.iframe) - _check_frame(self.zframe) - _check_frame(self.fill_frame) + _check_frame(float_frame) + _check_frame(float_frame_int_kind) + _check_frame(float_frame_fill0) + _check_frame(float_frame_fill2) # with copy=False - reindexed = self.frame.reindex(self.frame.index, copy=False) + reindexed = float_frame.reindex(float_frame.index, copy=False) reindexed['F'] = reindexed['A'] - assert 'F' in self.frame + assert 'F' in float_frame - reindexed = self.frame.reindex(self.frame.index) + reindexed = float_frame.reindex(float_frame.index) reindexed['G'] = reindexed['A'] - assert 'G' not in self.frame + assert 'G' not in float_frame - def test_reindex_fill_value(self): + def test_reindex_fill_value(self, float_frame_fill0, + float_frame_fill0_dense): rng = bdate_range('20110110', periods=20) - result = self.zframe.reindex(rng, fill_value=0) - exp = self.zorig.reindex(rng, fill_value=0) - exp = exp.to_sparse(self.zframe.default_fill_value) + result = float_frame_fill0.reindex(rng, fill_value=0) + exp = float_frame_fill0_dense.reindex(rng, fill_value=0) + exp = exp.to_sparse(float_frame_fill0.default_fill_value) tm.assert_sp_frame_equal(result, exp) def test_reindex_method(self): @@ -968,20 +952,27 @@ def test_reindex_method(self): with pytest.raises(NotImplementedError): sparse.reindex(columns=range(6), method='ffill') - def test_take(self): - result = self.frame.take([1, 0, 2], axis=1) - expected = self.frame.reindex(columns=['B', 'A', 'C']) + def test_take(self, float_frame): + result = float_frame.take([1, 0, 2], axis=1) + expected = float_frame.reindex(columns=['B', 'A', 'C']) tm.assert_sp_frame_equal(result, expected) - def test_to_dense(self): + def test_to_dense(self, float_frame, float_frame_int_kind, + float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check(frame, orig): dense_dm = frame.to_dense() tm.assert_frame_equal(frame, dense_dm) tm.assert_frame_equal(dense_dm, orig, check_dtype=False) - self._check_all(_check) + _check(float_frame, float_frame_dense) + _check(float_frame_int_kind, float_frame_dense) + _check(float_frame_fill0, float_frame_fill0_dense) + _check(float_frame_fill2, float_frame_fill2_dense) - def test_stack_sparse_frame(self): + def test_stack_sparse_frame(self, float_frame, float_frame_int_kind, + float_frame_fill0, float_frame_fill2): with catch_warnings(record=True): def _check(frame): @@ -995,14 +986,17 @@ def _check(frame): tm.assert_numpy_array_equal(from_dense_lp.values, from_sparse_lp.values) - _check(self.frame) - _check(self.iframe) + _check(float_frame) + _check(float_frame_int_kind) # for now - pytest.raises(Exception, _check, self.zframe) - pytest.raises(Exception, _check, self.fill_frame) + pytest.raises(Exception, _check, float_frame_fill0) + pytest.raises(Exception, _check, float_frame_fill2) - def test_transpose(self): + def test_transpose(self, float_frame, float_frame_int_kind, + float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check(frame, orig): transposed = frame.T @@ -1013,9 +1007,14 @@ def _check(frame, orig): tm.assert_frame_equal(frame.T.T.to_dense(), orig.T.T) tm.assert_sp_frame_equal(frame, frame.T.T, exact_indices=False) - self._check_all(_check) + _check(float_frame, float_frame_dense) + _check(float_frame_int_kind, float_frame_dense) + _check(float_frame_fill0, float_frame_fill0_dense) + _check(float_frame_fill2, float_frame_fill2_dense) - def test_shift(self): + def test_shift(self, float_frame, float_frame_int_kind, float_frame_dense, + float_frame_fill0, float_frame_fill0_dense, + float_frame_fill2, float_frame_fill2_dense): def _check(frame, orig): shifted = frame.shift(0) @@ -1042,32 +1041,29 @@ def _check(frame, orig): kind=frame.default_kind) tm.assert_frame_equal(shifted, exp) - self._check_all(_check) + _check(float_frame, float_frame_dense) + _check(float_frame_int_kind, float_frame_dense) + _check(float_frame_fill0, float_frame_fill0_dense) + _check(float_frame_fill2, float_frame_fill2_dense) - def test_count(self): - dense_result = self.frame.to_dense().count() + def test_count(self, float_frame): + dense_result = float_frame.to_dense().count() - result = self.frame.count() + result = float_frame.count() tm.assert_series_equal(result, dense_result) - result = self.frame.count(axis=None) + result = float_frame.count(axis=None) tm.assert_series_equal(result, dense_result) - result = self.frame.count(axis=0) + result = float_frame.count(axis=0) tm.assert_series_equal(result, dense_result) - result = self.frame.count(axis=1) - dense_result = self.frame.to_dense().count(axis=1) + result = float_frame.count(axis=1) + dense_result = float_frame.to_dense().count(axis=1) # win32 don't check dtype tm.assert_series_equal(result, dense_result, check_dtype=False) - def _check_all(self, check_func): - check_func(self.frame, self.orig) - check_func(self.iframe, self.iorig) - check_func(self.zframe, self.zorig) - check_func(self.fill_frame, self.fill_orig) - def test_numpy_transpose(self): sdf = SparseDataFrame([1, 2, 3], index=[1, 2, 3], columns=['a']) result = np.transpose(np.transpose(sdf)) @@ -1076,8 +1072,8 @@ def test_numpy_transpose(self): msg = "the 'axes' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.transpose, sdf, axes=1) - def test_combine_first(self): - df = self.frame + def test_combine_first(self, float_frame): + df = float_frame result = df[::2].combine_first(df) result2 = df[::2].combine_first(df.to_dense()) @@ -1088,8 +1084,8 @@ def test_combine_first(self): tm.assert_sp_frame_equal(result, result2) tm.assert_sp_frame_equal(result, expected) - def test_combine_add(self): - df = self.frame.to_dense() + def test_combine_add(self, float_frame): + df = float_frame.to_dense() df2 = df.copy() df2['C'][:3] = np.nan df['A'][:3] = 5.7 @@ -1214,51 +1210,42 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(object): - def setup_method(self, method): - self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], - 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], - 'C': np.arange(10, dtype=float), - 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - - self.dates = bdate_range('1/1/2011', periods=10) - - self.frame = SparseDataFrame(self.data, index=self.dates) - def test_cumsum(self): - expected = SparseDataFrame(self.frame.to_dense().cumsum()) + def test_cumsum(self, float_frame): + expected = SparseDataFrame(float_frame.to_dense().cumsum()) - result = self.frame.cumsum() + result = float_frame.cumsum() tm.assert_sp_frame_equal(result, expected) - result = self.frame.cumsum(axis=None) + result = float_frame.cumsum(axis=None) tm.assert_sp_frame_equal(result, expected) - result = self.frame.cumsum(axis=0) + result = float_frame.cumsum(axis=0) tm.assert_sp_frame_equal(result, expected) - def test_numpy_cumsum(self): - result = np.cumsum(self.frame) - expected = SparseDataFrame(self.frame.to_dense().cumsum()) + def test_numpy_cumsum(self, float_frame): + result = np.cumsum(float_frame) + expected = SparseDataFrame(float_frame.to_dense().cumsum()) tm.assert_sp_frame_equal(result, expected) msg = "the 'dtype' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.frame, dtype=np.int64) + float_frame, dtype=np.int64) msg = "the 'out' parameter is not supported" tm.assert_raises_regex(ValueError, msg, np.cumsum, - self.frame, out=result) + float_frame, out=result) - def test_numpy_func_call(self): + def test_numpy_func_call(self, float_frame): # no exception should be raised even though # numpy passes in 'axis=None' or `axis=-1' funcs = ['sum', 'cumsum', 'var', 'mean', 'prod', 'cumprod', 'std', 'min', 'max'] for func in funcs: - getattr(np, func)(self.frame) + getattr(np, func)(float_frame) - @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)', strict=True) def test_quantile(self): # GH 17386 @@ -1275,7 +1262,7 @@ def test_quantile(self): tm.assert_series_equal(result, dense_expected) tm.assert_sp_series_equal(result, sparse_expected) - @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH#17386)', + @pytest.mark.xfail(reason='Wrong SparseBlock initialization (GH 17386)', strict=True) def test_quantile_multi(self): # GH 17386 From d64c0a802aae2705605db1f6f98ce0c493480b66 Mon Sep 17 00:00:00 2001 From: Troels Nielsen Date: Tue, 18 Sep 2018 16:47:31 +0200 Subject: [PATCH 76/86] BUG SeriesGroupBy.mean() overflowed on some integer array (#22653) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/common.py | 27 +++++++++++++++++++++++++++ pandas/core/groupby/ops.py | 3 ++- pandas/tests/groupby/test_function.py | 9 +++++++++ 4 files changed, 39 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 39ed5d968707b..3a44b0260153c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -768,6 +768,7 @@ Groupby/Resample/Rolling - Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`). - Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to ``loffset`` kwarg (:issue:`7687`). - Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). +- Bug in :meth:`SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`) Sparse ^^^^^^ diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b8cbb41501dd1..f6e7e87f1043b 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -90,6 +90,33 @@ def ensure_categorical(arr): return arr +def ensure_int64_or_float64(arr, copy=False): + """ + Ensure that an dtype array of some integer dtype + has an int64 dtype if possible + If it's not possible, potentially because of overflow, + convert the array to float64 instead. + + Parameters + ---------- + arr : array-like + The array whose data type we want to enforce. + copy: boolean + Whether to copy the original array or reuse + it in place, if possible. + + Returns + ------- + out_arr : The input array cast as int64 if + possible without overflow. + Otherwise the input array cast to float64. + """ + try: + return arr.astype('int64', copy=copy, casting='safe') + except TypeError: + return arr.astype('float64', copy=copy) + + def is_object_dtype(arr_or_dtype): """ Check whether an array-like or dtype is of the object dtype. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index ba04ff3a3d3ee..d9f7b4d9c31c3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -23,6 +23,7 @@ ensure_float64, ensure_platform_int, ensure_int64, + ensure_int64_or_float64, ensure_object, needs_i8_conversion, is_integer_dtype, @@ -471,7 +472,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, if (values == iNaT).any(): values = ensure_float64(values) else: - values = values.astype('int64', copy=False) + values = ensure_int64_or_float64(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index f8a0f1688c64e..775747ce0c6c1 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1125,3 +1125,12 @@ def h(df, arg3): expected = pd.Series([4, 8, 12], index=pd.Int64Index([1, 2, 3])) tm.assert_series_equal(result, expected) + + +def test_groupby_mean_no_overflow(): + # Regression test for (#22487) + df = pd.DataFrame({ + "user": ["A", "A", "A", "A", "A"], + "connections": [4970, 4749, 4719, 4704, 18446744073699999744] + }) + assert df.groupby('user')['connections'].mean()['A'] == 3689348814740003840 From 0ba7b162cdcc089cd4f6c135a122ccac5d7e6ab1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 11:51:59 -0500 Subject: [PATCH 77/86] TST: Fail on warning (#22699) --- .travis.yml | 4 +- ...-numpydev.yaml => travis-37-numpydev.yaml} | 2 +- doc/source/contributing.rst | 57 + pandas/compat/__init__.py | 12 + pandas/compat/chainmap_impl.py | 9 +- pandas/core/algorithms.py | 5 +- pandas/core/arrays/datetimelike.py | 1 + pandas/core/arrays/integer.py | 10 +- pandas/core/common.py | 2 +- pandas/core/computation/eval.py | 1 + pandas/core/dtypes/inference.py | 7 +- pandas/core/frame.py | 6 +- pandas/core/groupby/generic.py | 2 +- pandas/core/indexes/base.py | 1 + pandas/core/internals/blocks.py | 1 + pandas/core/series.py | 5 +- pandas/core/window.py | 2 + pandas/io/common.py | 2 + pandas/io/html.py | 4 +- pandas/io/pickle.py | 3 +- pandas/tests/api/test_api.py | 13 +- pandas/tests/api/test_types.py | 13 +- pandas/tests/arithmetic/test_datetime64.py | 4 + pandas/tests/arithmetic/test_numeric.py | 3 +- pandas/tests/computation/test_eval.py | 20 +- pandas/tests/dtypes/test_generic.py | 3 +- pandas/tests/dtypes/test_inference.py | 8 +- pandas/tests/dtypes/test_missing.py | 3 +- pandas/tests/extension/base/dtype.py | 9 +- pandas/tests/extension/json/array.py | 7 +- pandas/tests/frame/test_analytics.py | 8 +- pandas/tests/frame/test_apply.py | 7 +- pandas/tests/frame/test_constructors.py | 3 +- pandas/tests/frame/test_convert_to.py | 3 +- pandas/tests/frame/test_indexing.py | 80 +- pandas/tests/frame/test_operators.py | 6 +- pandas/tests/frame/test_query_eval.py | 1 + pandas/tests/frame/test_reshape.py | 3 +- pandas/tests/frame/test_subclass.py | 40 +- pandas/tests/generic/test_generic.py | 13 +- pandas/tests/generic/test_panel.py | 4 +- pandas/tests/groupby/aggregate/test_cython.py | 7 +- pandas/tests/groupby/test_groupby.py | 83 +- pandas/tests/groupby/test_grouping.py | 37 +- pandas/tests/groupby/test_whitelist.py | 12 +- .../tests/indexes/datetimes/test_datetime.py | 3 +- pandas/tests/indexes/datetimes/test_ops.py | 2 +- pandas/tests/indexes/datetimes/test_tools.py | 37 +- pandas/tests/indexes/multi/test_duplicates.py | 5 +- pandas/tests/indexes/test_base.py | 2 + pandas/tests/indexes/timedeltas/test_ops.py | 2 +- .../indexes/timedeltas/test_timedelta.py | 4 +- pandas/tests/indexing/common.py | 57 +- .../indexing/test_chaining_and_caching.py | 18 +- pandas/tests/indexing/test_floats.py | 14 +- pandas/tests/indexing/test_iloc.py | 12 +- pandas/tests/indexing/test_indexing.py | 16 +- pandas/tests/indexing/test_indexing_slow.py | 1 + pandas/tests/indexing/test_ix.py | 16 +- pandas/tests/indexing/test_loc.py | 3 +- pandas/tests/indexing/test_multiindex.py | 179 +- pandas/tests/indexing/test_panel.py | 2 + pandas/tests/indexing/test_partial.py | 3 + pandas/tests/internals/test_internals.py | 2 +- pandas/tests/io/formats/test_to_excel.py | 4 +- .../tests/io/generate_legacy_storage_files.py | 3 +- pandas/tests/io/parser/compression.py | 26 +- pandas/tests/io/sas/test_sas7bdat.py | 2 + pandas/tests/io/test_common.py | 2 + pandas/tests/io/test_compression.py | 6 +- pandas/tests/io/test_excel.py | 3 + pandas/tests/io/test_packers.py | 11 +- pandas/tests/io/test_pickle.py | 12 +- pandas/tests/io/test_pytables.py | 47 +- pandas/tests/io/test_sql.py | 9 +- pandas/tests/io/test_stata.py | 24 +- pandas/tests/plotting/test_frame.py | 9 +- pandas/tests/plotting/test_hist_method.py | 17 +- pandas/tests/plotting/test_misc.py | 2 + pandas/tests/reshape/merge/test_join.py | 1 + pandas/tests/reshape/test_concat.py | 115 +- pandas/tests/reshape/test_reshape.py | 9 +- pandas/tests/series/indexing/test_datetime.py | 2 + pandas/tests/series/indexing/test_indexing.py | 2 + pandas/tests/series/test_analytics.py | 39 +- pandas/tests/series/test_api.py | 9 +- pandas/tests/series/test_constructors.py | 2 + pandas/tests/series/test_dtypes.py | 6 +- pandas/tests/sparse/frame/test_frame.py | 28 +- .../tests/sparse/frame/test_to_from_scipy.py | 12 +- pandas/tests/sparse/series/test_series.py | 3 + pandas/tests/test_downstream.py | 12 + pandas/tests/test_errors.py | 3 +- pandas/tests/test_expressions.py | 6 +- pandas/tests/test_multilevel.py | 10 +- pandas/tests/test_nanops.py | 7 +- pandas/tests/test_panel.py | 3553 ++++++++--------- pandas/tests/test_resample.py | 28 +- pandas/tests/test_window.py | 9 + pandas/tests/tseries/offsets/test_offsets.py | 3 + .../offsets/test_offsets_properties.py | 10 +- pandas/tests/tslibs/test_parsing.py | 3 + pandas/tests/util/test_hashing.py | 5 +- pandas/tseries/holiday.py | 4 +- pandas/util/testing.py | 10 +- setup.cfg | 3 +- 106 files changed, 2677 insertions(+), 2298 deletions(-) rename ci/{travis-36-numpydev.yaml => travis-37-numpydev.yaml} (95%) diff --git a/.travis.yml b/.travis.yml index 32e6d2eae90a7..76f4715a4abb2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,7 +64,7 @@ matrix: # In allow_failures - dist: trusty env: - - JOB="3.6, NumPy dev" ENV_FILE="ci/travis-36-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" + - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: @@ -79,7 +79,7 @@ matrix: - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true - dist: trusty env: - - JOB="3.6, NumPy dev" ENV_FILE="ci/travis-36-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" + - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" addons: apt: packages: diff --git a/ci/travis-36-numpydev.yaml b/ci/travis-37-numpydev.yaml similarity index 95% rename from ci/travis-36-numpydev.yaml rename to ci/travis-37-numpydev.yaml index aba28634edd0d..82c75b7c91b1f 100644 --- a/ci/travis-36-numpydev.yaml +++ b/ci/travis-37-numpydev.yaml @@ -2,7 +2,7 @@ name: pandas channels: - defaults dependencies: - - python=3.6* + - python=3.7* - pytz - Cython>=0.28.2 # universal diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 60bfd07961b38..65e151feeba67 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -632,6 +632,14 @@ Otherwise, you need to do it manually: warnings.warn('Use new_func instead.', FutureWarning, stacklevel=2) new_func() +You'll also need to + +1. write a new test that asserts a warning is issued when calling with the deprecated argument +2. Update all of pandas existing tests and code to use the new argument + +See :ref:`contributing.warnings` for more. + + .. _contributing.ci: Testing With Continuous Integration @@ -859,6 +867,55 @@ preferred if the inputs or logic are simple, with Hypothesis tests reserved for cases with complex logic or where there are too many combinations of options or subtle interactions to test (or think of!) all of them. +.. _contributing.warnings: + +Testing Warnings +~~~~~~~~~~~~~~~~ + +By default, one of pandas CI workers will fail if any unhandled warnings are emitted. + +If your change involves checking that a warning is actually emitted, use +``tm.assert_produces_warning(ExpectedWarning)``. + + +.. code-block:: python + + with tm.assert_prodcues_warning(FutureWarning): + df.some_operation() + +We prefer this to the ``pytest.warns`` context manager because ours checks that the warning's +stacklevel is set correctly. The stacklevel is what ensure the *user's* file name and line number +is printed in the warning, rather than something internal to pandas. It represents the number of +function calls from user code (e.g. ``df.some_operation()``) to the function that actually emits +the warning. Our linter will fail the build if you use ``pytest.warns`` in a test. + +If you have a test that would emit a warning, but you aren't actually testing the +warning itself (say because it's going to be removed in the future, or because we're +matching a 3rd-party library's behavior), then use ``pytest.mark.filterwarnings`` to +ignore the error. + +.. code-block:: python + + @pytest.mark.filterwarnings("ignore:msg:category") + def test_thing(self): + ... + +If the test generates a warning of class ``category`` whose message starts +with ``msg``, the warning will be ignored and the test will pass. + +If you need finer-grained control, you can use Python's usual +`warnings module `__ +to control whether a warning is ignored / raised at different places within +a single test. + +.. code-block:: python + + with warch.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + # Or use warnings.filterwarnings(...) + +Alternatively, consider breaking up the unit test. + Running the test suite ---------------------- diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 28a55133e68aa..1453725225e7d 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -38,6 +38,7 @@ import struct import inspect from collections import namedtuple +import collections PY2 = sys.version_info[0] == 2 PY3 = sys.version_info[0] >= 3 @@ -135,6 +136,11 @@ def lfilter(*args, **kwargs): from importlib import reload reload = reload + Hashable = collections.abc.Hashable + Iterable = collections.abc.Iterable + Mapping = collections.abc.Mapping + Sequence = collections.abc.Sequence + Sized = collections.abc.Sized else: # Python 2 @@ -190,6 +196,12 @@ def get_range_parameters(data): reload = builtins.reload + Hashable = collections.Hashable + Iterable = collections.Iterable + Mapping = collections.Mapping + Sequence = collections.Sequence + Sized = collections.Sized + if PY2: def iteritems(obj, **kw): return obj.iteritems(**kw) diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py index c4aa8c8d6ab30..3ea5414cc41eb 100644 --- a/pandas/compat/chainmap_impl.py +++ b/pandas/compat/chainmap_impl.py @@ -1,4 +1,11 @@ -from collections import MutableMapping +import sys + +PY3 = sys.version_info[0] >= 3 + +if PY3: + from collections.abc import MutableMapping +else: + from collections import MutableMapping try: from thread import get_ident diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e5b6c84d37541..d39e9e08e2947 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -3,7 +3,7 @@ intended for public consumption """ from __future__ import division -from warnings import warn, catch_warnings +from warnings import warn, catch_warnings, simplefilter from textwrap import dedent import numpy as np @@ -91,7 +91,8 @@ def _ensure_data(values, dtype=None): # ignore the fact that we are casting to float # which discards complex parts - with catch_warnings(record=True): + with catch_warnings(): + simplefilter("ignore", np.ComplexWarning) values = ensure_float64(values) return values, 'float64', 'float64' diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 12e1dd1052e0b..69925ce1c520e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -59,6 +59,7 @@ def cmp_method(self, other): # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", "elementwise", FutureWarning) with np.errstate(all='ignore'): result = op(self.values, np.asarray(other)) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index aebc7a6a04ffc..e58109a25e1a5 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -5,7 +5,7 @@ from pandas._libs.lib import infer_dtype from pandas.util._decorators import cache_readonly -from pandas.compat import u, range +from pandas.compat import u, range, string_types from pandas.compat import set_function_name from pandas.core.dtypes.cast import astype_nansafe @@ -147,6 +147,11 @@ def coerce_to_array(values, dtype, mask=None, copy=False): dtype = values.dtype if dtype is not None: + if (isinstance(dtype, string_types) and + (dtype.startswith("Int") or dtype.startswith("UInt"))): + # Avoid DeprecationWarning from NumPy about np.dtype("Int64") + # https://github.com/numpy/numpy/pull/7476 + dtype = dtype.lower() if not issubclass(type(dtype), _IntegerDtype): try: dtype = _dtypes[str(np.dtype(dtype))] @@ -507,7 +512,8 @@ def cmp_method(self, other): # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future - with warnings.catch_warnings(record=True): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", "elementwise", FutureWarning) with np.errstate(all='ignore'): result = op(self._data, other) diff --git a/pandas/core/common.py b/pandas/core/common.py index 92e4e23ce958e..a6b05daf1d85d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -356,7 +356,7 @@ def standardize_mapping(into): return partial( collections.defaultdict, into.default_factory) into = type(into) - if not issubclass(into, collections.Mapping): + if not issubclass(into, compat.Mapping): raise TypeError('unsupported type: {into}'.format(into=into)) elif into == collections.defaultdict: raise TypeError( diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 434d7f6ccfe13..7025f3000eb5f 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -323,6 +323,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, # to use a non-numeric indexer try: with warnings.catch_warnings(record=True): + # TODO: Filter the warnings we actually care about here. target[assigner] = ret except (TypeError, IndexError): raise ValueError("Cannot assign expression output to target") diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index ed416c3ef857d..67f391615eedb 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -1,10 +1,9 @@ """ basic inference routines """ -import collections import re import numpy as np -from collections import Iterable from numbers import Number +from pandas import compat from pandas.compat import (PY2, string_types, text_type, string_and_binary_types, re_type) from pandas._libs import lib @@ -112,7 +111,7 @@ def _iterable_not_string(obj): False """ - return (isinstance(obj, collections.Iterable) and + return (isinstance(obj, compat.Iterable) and not isinstance(obj, string_types)) @@ -284,7 +283,7 @@ def is_list_like(obj): False """ - return (isinstance(obj, Iterable) and + return (isinstance(obj, compat.Iterable) and # we do not count strings/unicode/bytes as list-like not isinstance(obj, string_and_binary_types) and # exclude zero-dimensional numpy arrays, effectively scalars diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8daef91849773..0e0fada8e4d7f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -417,9 +417,9 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=copy) # For data is list-like, or Iterable (will consume into list) - elif (isinstance(data, collections.Iterable) + elif (isinstance(data, compat.Iterable) and not isinstance(data, string_and_binary_types)): - if not isinstance(data, collections.Sequence): + if not isinstance(data, compat.Sequence): data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1: @@ -7640,7 +7640,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): if isinstance(data[0], (list, tuple)): return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) - elif isinstance(data[0], collections.Mapping): + elif isinstance(data[0], compat.Mapping): return _list_of_dict_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) elif isinstance(data[0], Series): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 685635fb6854d..f15b1203a334e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -758,7 +758,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) - if isinstance(func_or_funcs, collections.Iterable): + if isinstance(func_or_funcs, compat.Iterable): # Catch instances of lists / tuples # but not the class list / tuple itself. ret = self._aggregate_multiple_funcs(func_or_funcs, diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 487d3975a6219..b42bbdafcab45 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -98,6 +98,7 @@ def cmp_method(self, other): # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", "elementwise", FutureWarning) with np.errstate(all='ignore'): result = op(self.values, np.asarray(other)) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e735b35653cd4..6576db9f642a6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -3490,6 +3490,7 @@ def _putmask_smart(v, m, n): # we ignore ComplexWarning here with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", np.ComplexWarning) nn_at = nn.astype(v.dtype) # avoid invalid dtype comparisons diff --git a/pandas/core/series.py b/pandas/core/series.py index 8f69de973e7a3..fdb9ef59c1d3e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6,7 +6,6 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -import collections import warnings from textwrap import dedent @@ -240,8 +239,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None, raise TypeError("{0!r} type is unordered" "".format(data.__class__.__name__)) # If data is Iterable but not list-like, consume into list. - elif (isinstance(data, collections.Iterable) - and not isinstance(data, collections.Sized)): + elif (isinstance(data, compat.Iterable) + and not isinstance(data, compat.Sized)): data = list(data) else: diff --git a/pandas/core/window.py b/pandas/core/window.py index eed0e97f30dc9..66f48f403c941 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2387,11 +2387,13 @@ def dataframe_from_int_dict(data, frame_template): if not arg2.columns.is_unique: raise ValueError("'arg2' columns are not unique") with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) X, Y = arg1.align(arg2, join='outer') X = X + 0 * Y Y = Y + 0 * X with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) res_columns = arg1.columns.union(arg2.columns) for col in res_columns: if col in X and col in Y: diff --git a/pandas/io/common.py b/pandas/io/common.py index 69cb9ed46419c..405911eda7e9e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -386,6 +386,8 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, # ZIP Compression elif compression == 'zip': zf = BytesZipFile(path_or_buf, mode) + # Ensure the container is closed as well. + handles.append(zf) if zf.mode == 'w': f = zf elif zf.mode == 'r': diff --git a/pandas/io/html.py b/pandas/io/html.py index cca27db00f48d..04534ff591a2c 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -6,7 +6,6 @@ import os import re import numbers -import collections from distutils.version import LooseVersion @@ -14,6 +13,7 @@ from pandas.errors import EmptyDataError from pandas.io.common import _is_url, urlopen, _validate_header_arg from pandas.io.parsers import TextParser +from pandas import compat from pandas.compat import (lrange, lmap, u, string_types, iteritems, raise_with_traceback, binary_type) from pandas import Series @@ -859,7 +859,7 @@ def _validate_flavor(flavor): flavor = 'lxml', 'bs4' elif isinstance(flavor, string_types): flavor = flavor, - elif isinstance(flavor, collections.Iterable): + elif isinstance(flavor, compat.Iterable): if not all(isinstance(flav, string_types) for flav in flavor): raise TypeError('Object of type {typ!r} is not an iterable of ' 'strings' diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 6738daec9397c..9c219d7fd6997 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -160,7 +160,8 @@ def try_read(path, encoding=None): # GH 6899 try: with warnings.catch_warnings(record=True): - # We want to silencce any warnings about, e.g. moved modules. + # We want to silence any warnings about, e.g. moved modules. + warnings.simplefilter("ignore", Warning) return read_wrapper(lambda f: pkl.load(f)) except Exception: # reg/patched pickle diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 199700b304a4e..4033d46e161ad 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- import sys -from warnings import catch_warnings import pytest import pandas as pd @@ -175,23 +174,23 @@ def test_get_store(self): class TestParser(object): + @pytest.mark.filterwarnings("ignore") def test_deprecation_access_func(self): - with catch_warnings(record=True): - pd.parser.na_values + pd.parser.na_values class TestLib(object): + @pytest.mark.filterwarnings("ignore") def test_deprecation_access_func(self): - with catch_warnings(record=True): - pd.lib.infer_dtype('foo') + pd.lib.infer_dtype('foo') class TestTSLib(object): + @pytest.mark.filterwarnings("ignore") def test_deprecation_access_func(self): - with catch_warnings(record=True): - pd.tslib.Timestamp('20160101') + pd.tslib.Timestamp('20160101') class TestTypes(object): diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index bd4891326c751..ed80c1414dbaa 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -1,10 +1,7 @@ # -*- coding: utf-8 -*- - +import sys import pytest -from warnings import catch_warnings - -import pandas from pandas.api import types from pandas.util import testing as tm @@ -59,7 +56,13 @@ def test_deprecated_from_api_types(self): def test_moved_infer_dtype(): + # del from sys.modules to ensure we try to freshly load. + # if this was imported from another test previously, we would + # not see the warning, since the import is otherwise cached. + sys.modules.pop("pandas.lib", None) + + with tm.assert_produces_warning(FutureWarning): + import pandas.lib - with catch_warnings(record=True): e = pandas.lib.infer_dtype('foo') assert e is not None diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b19cc61a2999e..36bb0aca066fb 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -1803,6 +1803,10 @@ def test_dt64_with_DateOffsets(klass, normalize, cls_and_kwargs): offset_cls = getattr(pd.offsets, cls_name) with warnings.catch_warnings(record=True): + # pandas.errors.PerformanceWarning: Non-vectorized DateOffset being + # applied to Series or DatetimeIndex + # we aren't testing that here, so ignore. + warnings.simplefilter("ignore", PerformanceWarning) for n in [0, 5]: if (cls_name in ['WeekOfMonth', 'LastWeekOfMonth', 'FY5253Quarter', 'FY5253'] and n == 0): diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index fcfc3994a88c8..0449212713048 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -4,7 +4,6 @@ # Specifically for numeric dtypes from decimal import Decimal import operator -from collections import Iterable import pytest import numpy as np @@ -12,7 +11,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas.compat import PY3 +from pandas.compat import PY3, Iterable from pandas.core import ops from pandas import Timedelta, Series, Index, TimedeltaIndex diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index 118b05d16ab09..eef8646e4d6d2 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1,5 +1,4 @@ import warnings -from warnings import catch_warnings import operator from itertools import product @@ -924,12 +923,18 @@ def testit(r_idx_type, c_idx_type, index_name): # only test dt with dt, otherwise weird joins result args = product(['i', 'u', 's'], ['i', 'u', 's'], ('index', 'columns')) with warnings.catch_warnings(record=True): + # avoid warning about comparing strings and ints + warnings.simplefilter("ignore", RuntimeWarning) + for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) # dt with dt args = product(['dt'], ['dt'], ('index', 'columns')) with warnings.catch_warnings(record=True): + # avoid warning about comparing strings and ints + warnings.simplefilter("ignore", RuntimeWarning) + for r_idx_type, c_idx_type, index_name in args: testit(r_idx_type, c_idx_type, index_name) @@ -1112,13 +1117,13 @@ def test_bool_ops_with_constants(self): exp = eval(ex) assert res == exp + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_panel_fails(self): - with catch_warnings(record=True): - x = Panel(randn(3, 4, 5)) - y = Series(randn(10)) - with pytest.raises(NotImplementedError): - self.eval('x + y', - local_dict={'x': x, 'y': y}) + x = Panel(randn(3, 4, 5)) + y = Series(randn(10)) + with pytest.raises(NotImplementedError): + self.eval('x + y', + local_dict={'x': x, 'y': y}) def test_4d_ndarray_fails(self): x = randn(3, 4, 5, 6) @@ -1382,6 +1387,7 @@ def test_query_inplace(self): @pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)]) + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_cannot_item_assign(self, invalid_target): msg = "Cannot assign expression output to target" expression = "a = 1 + 2" diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 53f92b98f022e..38d1143f3838b 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import numpy as np import pandas as pd from pandas.core.dtypes import generic as gt @@ -35,6 +35,7 @@ def test_abc_types(self): assert isinstance(pd.Series([1, 2, 3]), gt.ABCSeries) assert isinstance(self.df, gt.ABCDataFrame) with catch_warnings(record=True): + simplefilter('ignore', FutureWarning) assert isinstance(self.df.to_panel(), gt.ABCPanel) assert isinstance(self.sparse_series, gt.ABCSparseSeries) assert isinstance(self.sparse_array, gt.ABCSparseArray) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index dc330666b4b6c..76cd6aabb93ae 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -5,7 +5,7 @@ related to inference and not otherwise tested in types/test_common.py """ -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import collections import re from datetime import datetime, date, timedelta, time @@ -20,6 +20,7 @@ DatetimeIndex, TimedeltaIndex, Timestamp, Panel, Period, Categorical, isna, Interval, DateOffset) +from pandas import compat from pandas.compat import u, PY2, StringIO, lrange from pandas.core.dtypes import inference from pandas.core.dtypes.common import ( @@ -226,7 +227,7 @@ class OldStyleClass(): pass c = OldStyleClass() - assert not isinstance(c, collections.Hashable) + assert not isinstance(c, compat.Hashable) assert inference.is_hashable(c) hash(c) # this will not raise @@ -1158,6 +1159,7 @@ def test_is_scalar_numpy_zerodim_arrays(self): assert not is_scalar(zerodim) assert is_scalar(lib.item_from_zerodim(zerodim)) + @pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") def test_is_scalar_numpy_arrays(self): assert not is_scalar(np.array([])) assert not is_scalar(np.array([[]])) @@ -1176,6 +1178,7 @@ def test_is_scalar_pandas_containers(self): assert not is_scalar(DataFrame()) assert not is_scalar(DataFrame([[1]])) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) assert not is_scalar(Panel()) assert not is_scalar(Panel([[[1]]])) assert not is_scalar(Index([])) @@ -1210,6 +1213,7 @@ def test_nan_to_nat_conversions(): @td.skip_if_no_scipy +@pytest.mark.filterwarnings("ignore::PendingDeprecationWarning") def test_is_scipy_sparse(spmatrix): # noqa: F811 assert is_scipy_sparse(spmatrix([[0, 1]])) assert not is_scipy_sparse(np.array([1])) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index ca9a2dc81fcc6..8f82db69a9213 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pytest -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import numpy as np from datetime import datetime from pandas.util import testing as tm @@ -94,6 +94,7 @@ def test_isna_isnull(self, isna_f): # panel with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: result = isna_f(p) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index 02b7c9527769f..8d1f1cadcc23f 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import pandas as pd @@ -67,7 +69,12 @@ def test_check_dtype(self, data): expected = pd.Series([True, True, False, False], index=list('ABCD')) - result = df.dtypes == str(dtype) + # XXX: This should probably be *fixed* not ignored. + # See libops.scalar_compare + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + result = df.dtypes == str(dtype) + self.assert_series_equal(result, expected) expected = pd.Series([True, True, False, False], diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 980c245d55711..6ce0d63eb63ec 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -17,12 +17,13 @@ import numpy as np +from pandas import compat from pandas.core.dtypes.base import ExtensionDtype from pandas.core.arrays import ExtensionArray class JSONDtype(ExtensionDtype): - type = collections.Mapping + type = compat.Mapping name = 'json' try: na_value = collections.UserDict() @@ -79,7 +80,7 @@ def __getitem__(self, item): return self.data[item] elif isinstance(item, np.ndarray) and item.dtype == 'bool': return self._from_sequence([x for x, m in zip(self, item) if m]) - elif isinstance(item, collections.Iterable): + elif isinstance(item, compat.Iterable): # fancy indexing return type(self)([self.data[i] for i in item]) else: @@ -91,7 +92,7 @@ def __setitem__(self, key, value): self.data[key] = value else: if not isinstance(value, (type(self), - collections.Sequence)): + compat.Sequence)): # broadcast value value = itertools.cycle([value]) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index f06c8336373ca..52a52a1fd8752 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -116,8 +116,8 @@ def test_corr_int_and_boolean(self): 'a', 'b'], columns=['a', 'b']) for meth in ['pearson', 'kendall', 'spearman']: - # RuntimeWarning with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) result = df.corr(meth) tm.assert_frame_equal(result, expected) @@ -549,6 +549,8 @@ def test_mean(self): def test_product(self): self._check_stat_op('product', np.prod) + # TODO: Ensure warning isn't emitted in the first place + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") def test_median(self): def wrapper(x): if isna(x).any(): @@ -559,6 +561,7 @@ def wrapper(x): def test_min(self): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) self._check_stat_op('min', np.min, check_dates=True) self._check_stat_op('min', np.min, frame=self.intframe) @@ -610,6 +613,7 @@ def test_cummax(self): def test_max(self): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) self._check_stat_op('max', np.max, check_dates=True) self._check_stat_op('max', np.max, frame=self.intframe) @@ -1123,6 +1127,8 @@ def test_stats_mixed_type(self): self.mixed_frame.mean(1) self.mixed_frame.skew(1) + # TODO: Ensure warning isn't emitted in the first place + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") def test_median_corner(self): def wrapper(x): if isna(x).any(): diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 1452e1ab8d98d..7b71240a34b5c 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -108,9 +108,9 @@ def test_apply_with_reduce_empty(self): assert x == [] def test_apply_deprecate_reduce(self): - with warnings.catch_warnings(record=True): - x = [] - self.empty.apply(x.append, axis=1, result_type='reduce') + x = [] + with tm.assert_produces_warning(FutureWarning): + self.empty.apply(x.append, axis=1, reduce=True) def test_apply_standard_nonunique(self): df = DataFrame( @@ -261,6 +261,7 @@ def test_apply_empty_infer_type(self): def _check(df, f): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) test_res = f(np.array([], dtype='f8')) is_reduction = not isinstance(test_res, np.ndarray) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6c84beb64e196..2f1c9e05a01b0 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -916,9 +916,8 @@ def test_constructor_list_of_lists(self): def test_constructor_sequence_like(self): # GH 3783 # collections.Squence like - import collections - class DummyContainer(collections.Sequence): + class DummyContainer(compat.Sequence): def __init__(self, lst): self._lst = lst diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 2472022b862bc..a0e23d256c25b 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -110,9 +110,8 @@ def test_to_records_with_multindex(self): def test_to_records_with_Mapping_type(self): import email from email.parser import Parser - import collections - collections.Mapping.register(email.message.Message) + compat.Mapping.register(email.message.Message) headers = Parser().parsestr('From: \n' 'To: \n' diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 96b2e98dd7e8d..2b93af357481a 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import print_function -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from datetime import datetime, date, timedelta, time @@ -364,6 +364,7 @@ def test_getitem_ix_mixed_integer(self): assert_frame_equal(result, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[[1, 10]] expected = df.ix[Index([1, 10], dtype=object)] assert_frame_equal(result, expected) @@ -383,37 +384,45 @@ def test_getitem_ix_mixed_integer(self): def test_getitem_setitem_ix_negative_integers(self): with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = self.frame.ix[:, -1] assert_series_equal(result, self.frame['D']) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = self.frame.ix[:, [-1]] assert_frame_equal(result, self.frame[['D']]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = self.frame.ix[:, [-1, -2]] assert_frame_equal(result, self.frame[['D', 'C']]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) self.frame.ix[:, [-1]] = 0 assert (self.frame['D'] == 0).all() df = DataFrame(np.random.randn(8, 4)) # ix does label-based indexing when having an integer index with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) with pytest.raises(KeyError): df.ix[[-1]] with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) with pytest.raises(KeyError): df.ix[:, [-1]] # #1942 a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) a.ix[-1] = a.ix[-2] with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_series_equal(a.ix[-1], a.ix[-2], check_names=False) assert a.ix[-1].name == 'T' assert a.ix[-2].name == 'S' @@ -790,16 +799,19 @@ def test_getitem_fancy_2d(self): f = self.frame with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(f.ix[:, ['B', 'A']], f.reindex(columns=['B', 'A'])) subidx = self.frame.index[[5, 4, 1]] with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(f.ix[subidx, ['B', 'A']], f.reindex(index=subidx, columns=['B', 'A'])) # slicing rows, etc. with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(f.ix[5:10], f[5:10]) assert_frame_equal(f.ix[5:10, :], f[5:10]) assert_frame_equal(f.ix[:5, ['A', 'B']], @@ -808,22 +820,26 @@ def test_getitem_fancy_2d(self): # slice rows with labels, inclusive! with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) expected = f.ix[5:11] result = f.ix[f.index[5]:f.index[10]] assert_frame_equal(expected, result) # slice columns with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(f.ix[:, :2], f.reindex(columns=['A', 'B'])) # get view with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) exp = f.copy() f.ix[5:10].values[:] = 5 exp.values[5:10] = 5 assert_frame_equal(f, exp) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) pytest.raises(ValueError, f.ix.__getitem__, f > 0.5) def test_slice_floats(self): @@ -879,6 +895,7 @@ def test_setitem_fancy_2d(self): expected = frame.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame.ix[:, ['B', 'A']] = 1 expected['B'] = 1. expected['A'] = 1. @@ -894,6 +911,7 @@ def test_setitem_fancy_2d(self): values = randn(3, 2) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame.ix[subidx, ['B', 'A']] = values frame2.ix[[5, 4, 1], ['B', 'A']] = values @@ -907,12 +925,14 @@ def test_setitem_fancy_2d(self): frame = self.frame.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) expected1 = self.frame.copy() frame.ix[5:10] = 1. expected1.values[5:10] = 1. assert_frame_equal(frame, expected1) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) expected2 = self.frame.copy() arr = randn(5, len(frame.columns)) frame.ix[5:10] = arr @@ -921,6 +941,7 @@ def test_setitem_fancy_2d(self): # case 4 with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame = self.frame.copy() frame.ix[5:10, :] = 1. assert_frame_equal(frame, expected1) @@ -929,6 +950,7 @@ def test_setitem_fancy_2d(self): # case 5 with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame = self.frame.copy() frame2 = self.frame.copy() @@ -941,11 +963,13 @@ def test_setitem_fancy_2d(self): assert_frame_equal(frame, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame2.ix[:5, [0, 1]] = values assert_frame_equal(frame2, expected) # case 6: slice rows with labels, inclusive! with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame = self.frame.copy() expected = self.frame.copy() @@ -955,6 +979,7 @@ def test_setitem_fancy_2d(self): # case 7: slice columns with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame = self.frame.copy() frame2 = self.frame.copy() expected = self.frame.copy() @@ -997,6 +1022,7 @@ def test_fancy_setitem_int_labels(self): df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) tmp = df.copy() exp = df.copy() tmp.ix[[0, 2, 4]] = 5 @@ -1004,6 +1030,7 @@ def test_fancy_setitem_int_labels(self): assert_frame_equal(tmp, exp) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) tmp = df.copy() exp = df.copy() tmp.ix[6] = 5 @@ -1011,6 +1038,7 @@ def test_fancy_setitem_int_labels(self): assert_frame_equal(tmp, exp) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) tmp = df.copy() exp = df.copy() tmp.ix[:, 2] = 5 @@ -1024,21 +1052,25 @@ def test_fancy_getitem_int_labels(self): df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[[4, 2, 0], [2, 0]] expected = df.reindex(index=[4, 2, 0], columns=[2, 0]) assert_frame_equal(result, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[[4, 2, 0]] expected = df.reindex(index=[4, 2, 0]) assert_frame_equal(result, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[4] expected = df.xs(4) assert_series_equal(result, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[:, 3] expected = df[3] assert_series_equal(result, expected) @@ -1047,6 +1079,7 @@ def test_fancy_index_int_labels_exceptions(self): df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) # labels that aren't contained pytest.raises(KeyError, df.ix.__setitem__, @@ -1065,6 +1098,7 @@ def test_fancy_index_int_labels_exceptions(self): def test_setitem_fancy_mixed_2d(self): with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5 result = self.mixed_frame.ix[:5, ['C', 'B', 'A']] assert (result.values == 5).all() @@ -1078,6 +1112,7 @@ def test_setitem_fancy_mixed_2d(self): # #1432 with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df = DataFrame({1: [1., 2., 3.], 2: [3, 4, 5]}) assert df._is_mixed_type @@ -1095,27 +1130,32 @@ def test_ix_align(self): df = df_orig.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df.ix[:, 0] = b assert_series_equal(df.ix[:, 0].reindex(b.index), b) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) dft = df_orig.T dft.ix[0, :] = b assert_series_equal(dft.ix[0, :].reindex(b.index), b) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df = df_orig.copy() df.ix[:5, 0] = b s = df.ix[:5, 0] assert_series_equal(s, b.reindex(s.index)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) dft = df_orig.T dft.ix[0, :5] = b s = dft.ix[0, :5] assert_series_equal(s, b.reindex(s.index)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df = df_orig.copy() idx = [0, 1, 3, 5] df.ix[idx, 0] = b @@ -1123,6 +1163,7 @@ def test_ix_align(self): assert_series_equal(s, b.reindex(s.index)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) dft = df_orig.T dft.ix[0, idx] = b s = dft.ix[0, idx] @@ -1134,6 +1175,7 @@ def test_ix_frame_align(self): df = df_orig.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df.ix[:3] = b out = b.ix[:3] assert_frame_equal(out, b) @@ -1141,12 +1183,14 @@ def test_ix_frame_align(self): b.sort_index(inplace=True) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df = df_orig.copy() df.ix[[0, 1, 2]] = b out = df.ix[[0, 1, 2]].reindex(b.index) assert_frame_equal(out, b) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df = df_orig.copy() df.ix[:3] = b out = df.ix[:3] @@ -1189,6 +1233,7 @@ def test_ix_multi_take_nonint_index(self): df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'], columns=['a', 'b']) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) rs = df.ix[[0], [0]] xp = df.reindex(['x'], columns=['a']) assert_frame_equal(rs, xp) @@ -1197,6 +1242,7 @@ def test_ix_multi_take_multiindex(self): df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'], columns=[['a', 'b'], ['1', '2']]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) rs = df.ix[[0], [0]] xp = df.reindex(['x'], columns=[('a', '1')]) assert_frame_equal(rs, xp) @@ -1206,14 +1252,17 @@ def test_ix_dup(self): df = DataFrame(np.random.randn(len(idx), 3), idx) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) sub = df.ix[:'d'] assert_frame_equal(sub, df) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) sub = df.ix['a':'c'] assert_frame_equal(sub, df.ix[0:4]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) sub = df.ix['b':'d'] assert_frame_equal(sub, df.ix[2:]) @@ -1222,48 +1271,57 @@ def test_getitem_fancy_1d(self): # return self if no slicing...for now with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert f.ix[:, :] is f # low dimensional slice with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) xs1 = f.ix[2, ['C', 'B', 'A']] xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A']) tm.assert_series_equal(xs1, xs2) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) ts1 = f.ix[5:10, 2] ts2 = f[f.columns[2]][5:10] tm.assert_series_equal(ts1, ts2) # positional xs with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) xs1 = f.ix[0] xs2 = f.xs(f.index[0]) tm.assert_series_equal(xs1, xs2) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) xs1 = f.ix[f.index[5]] xs2 = f.xs(f.index[5]) tm.assert_series_equal(xs1, xs2) # single column with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_series_equal(f.ix[:, 'A'], f['A']) # return view with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) exp = f.copy() exp.values[5] = 4 f.ix[5][:] = 4 tm.assert_frame_equal(exp, f) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) exp.values[:, 1] = 6 f.ix[:, 1][:] = 6 tm.assert_frame_equal(exp, f) # slice of mixed-frame with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) xs = self.mixed_frame.ix[5] exp = self.mixed_frame.xs(self.mixed_frame.index[5]) tm.assert_series_equal(xs, exp) @@ -1275,6 +1333,7 @@ def test_setitem_fancy_1d(self): expected = self.frame.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame.ix[2, ['C', 'B', 'A']] = [1., 2., 3.] expected['C'][2] = 1. expected['B'][2] = 2. @@ -1282,6 +1341,7 @@ def test_setitem_fancy_1d(self): assert_frame_equal(frame, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame2 = self.frame.copy() frame2.ix[2, [3, 2, 1]] = [1., 2., 3.] assert_frame_equal(frame, expected) @@ -1291,12 +1351,14 @@ def test_setitem_fancy_1d(self): expected = self.frame.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) vals = randn(5) expected.values[5:10, 2] = vals frame.ix[5:10, 2] = vals assert_frame_equal(frame, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame2 = self.frame.copy() frame2.ix[5:10, 'B'] = vals assert_frame_equal(frame, expected) @@ -1306,11 +1368,13 @@ def test_setitem_fancy_1d(self): expected = self.frame.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame.ix[4] = 5. expected.values[4] = 5. assert_frame_equal(frame, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame.ix[frame.index[4]] = 6. expected.values[4] = 6. assert_frame_equal(frame, expected) @@ -1320,6 +1384,7 @@ def test_setitem_fancy_1d(self): expected = self.frame.copy() with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) frame.ix[:, 'A'] = 7. expected['A'] = 7. assert_frame_equal(frame, expected) @@ -1830,6 +1895,7 @@ def test_single_element_ix_dont_upcast(self): assert issubclass(self.frame['E'].dtype.type, (int, np.integer)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = self.frame.ix[self.frame.index[5], 'E'] assert is_integer(result) @@ -1841,6 +1907,7 @@ def test_single_element_ix_dont_upcast(self): df["b"] = 666 with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[0, "b"] assert is_integer(result) result = df.loc[0, "b"] @@ -1848,6 +1915,7 @@ def test_single_element_ix_dont_upcast(self): expected = Series([666], [0], name='b') with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[[0], "b"] assert_series_equal(result, expected) result = df.loc[[0], "b"] @@ -1919,12 +1987,14 @@ def test_iloc_duplicates(self): result = df.iloc[0] with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result2 = df.ix[0] assert isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.T.iloc[:, 0] result2 = df.T.ix[:, 0] assert isinstance(result, Series) @@ -1937,16 +2007,19 @@ def test_iloc_duplicates(self): index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) rs = df.iloc[0] xp = df.ix[0] assert_series_equal(rs, xp) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) rs = df.iloc[:, 0] xp = df.T.ix[0] assert_series_equal(rs, xp) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) rs = df.iloc[:, [0]] xp = df.ix[:, [0]] assert_frame_equal(rs, xp) @@ -2168,6 +2241,7 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[1:] assert_frame_equal(df.loc[0.2], expect) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:, 0] @@ -2177,6 +2251,7 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[1:] assert_frame_equal(df.loc[0.2], expect) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:, 0] @@ -2187,6 +2262,7 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[1:-1] assert_frame_equal(df.loc[0.2], expect) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[1:-1, 0] @@ -2196,6 +2272,7 @@ def test_getitem_ix_float_duplicates(self): expect = df.iloc[[1, -1]] assert_frame_equal(df.loc[0.2], expect) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) assert_frame_equal(df.ix[0.2], expect) expect = df.iloc[[1, -1], 0] @@ -2411,6 +2488,7 @@ def test_index_namedtuple(self): df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[IndexType("foo", "bar")]["A"] assert result == 1 diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index da4424b1ae626..97c94e1134cc8 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -209,6 +209,8 @@ def _check_unary_op(op): @pytest.mark.parametrize('op,res', [('__eq__', False), ('__ne__', True)]) + # not sure what's correct here. + @pytest.mark.filterwarnings("ignore:elementwise:FutureWarning") def test_logical_typeerror_with_non_valid(self, op, res): # we are comparing floats vs a string result = getattr(self.frame, op)('foo') @@ -278,7 +280,9 @@ def test_pos_numeric(self, df): assert_series_equal(+df['a'], df['a']) @pytest.mark.parametrize('df', [ - pd.DataFrame({'a': ['a', 'b']}), + # numpy changing behavior in the future + pytest.param(pd.DataFrame({'a': ['a', 'b']}), + marks=[pytest.mark.filterwarnings("ignore")]), pd.DataFrame({'a': np.array([-1, 2], dtype=object)}), pd.DataFrame({'a': [Decimal('-1.0'), Decimal('2.0')]}), ]) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 3be7ad12db883..3c6f0f0b2ab94 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -360,6 +360,7 @@ def to_series(mi, level): else: raise AssertionError("object must be a Series or Index") + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_raise_on_panel_with_multiindex(self, parser, engine): p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 2f90d24f652ca..9f6735c7ba2bf 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -2,7 +2,7 @@ from __future__ import print_function -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from datetime import datetime import itertools @@ -56,6 +56,7 @@ def test_pivot(self): with catch_warnings(record=True): # pivot multiple columns + simplefilter("ignore", FutureWarning) wp = tm.makePanel() lp = wp.to_frame() df = lp.reset_index() diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index caaa311e9ee96..07289d897be62 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -2,7 +2,7 @@ from __future__ import print_function -from warnings import catch_warnings +import pytest import numpy as np from pandas import DataFrame, Series, MultiIndex, Panel, Index @@ -126,28 +126,28 @@ def test_indexing_sliced(self): tm.assert_series_equal(res, exp) assert isinstance(res, tm.SubclassedSeries) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_to_panel_expanddim(self): # GH 9762 - with catch_warnings(record=True): - class SubclassedFrame(DataFrame): - - @property - def _constructor_expanddim(self): - return SubclassedPanel - - class SubclassedPanel(Panel): - pass - - index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) - df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) - result = df.to_panel() - assert isinstance(result, SubclassedPanel) - expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], - items=['X', 'Y'], major_axis=[0], - minor_axis=[0, 1, 2], - dtype='int64') - tm.assert_panel_equal(result, expected) + class SubclassedFrame(DataFrame): + + @property + def _constructor_expanddim(self): + return SubclassedPanel + + class SubclassedPanel(Panel): + pass + + index = MultiIndex.from_tuples([(0, 0), (0, 1), (0, 2)]) + df = SubclassedFrame({'X': [1, 2, 3], 'Y': [4, 5, 6]}, index=index) + result = df.to_panel() + assert isinstance(result, SubclassedPanel) + expected = SubclassedPanel([[[1, 2, 3]], [[4, 5, 6]]], + items=['X', 'Y'], major_axis=[0], + minor_axis=[0, 1, 2], + dtype='int64') + tm.assert_panel_equal(result, expected) def test_subclass_attr_err_propagation(self): # GH 11808 diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 533bff0384ad9..1652835de8228 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -2,7 +2,7 @@ # pylint: disable-msg=E1101,W0612 from copy import copy, deepcopy -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import pytest import numpy as np @@ -638,6 +638,7 @@ def test_sample(sel): s.sample(n=3, weights='weight_column') with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4], minor_axis=[3, 4, 5]) with pytest.raises(ValueError): @@ -705,6 +706,7 @@ def test_sample(sel): # Test default axes with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], minor_axis=[1, 3, 5]) assert_panel_equal( @@ -743,6 +745,7 @@ def test_squeeze(self): for df in [tm.makeTimeDataFrame()]: tm.assert_frame_equal(df.squeeze(), df) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) for p in [tm.makePanel()]: tm.assert_panel_equal(p.squeeze(), p) @@ -751,6 +754,7 @@ def test_squeeze(self): tm.assert_series_equal(df.squeeze(), df['A']) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) p = tm.makePanel().reindex(items=['ItemA']) tm.assert_frame_equal(p.squeeze(), p['ItemA']) @@ -761,6 +765,7 @@ def test_squeeze(self): empty_series = Series([], name='five') empty_frame = DataFrame([empty_series]) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) empty_panel = Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) @@ -798,6 +803,7 @@ def test_transpose(self): tm.assert_frame_equal(df.transpose().transpose(), df) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) for p in [tm.makePanel()]: tm.assert_panel_equal(p.transpose(2, 0, 1) .transpose(1, 2, 0), p) @@ -820,6 +826,7 @@ def test_numpy_transpose(self): np.transpose, df, axes=1) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) p = tm.makePanel() tm.assert_panel_equal(np.transpose( np.transpose(p, axes=(2, 0, 1)), @@ -842,6 +849,7 @@ def test_take(self): indices = [-3, 2, 0, 1] with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) for p in [tm.makePanel()]: out = p.take(indices) expected = Panel(data=p.values.take(indices, axis=0), @@ -856,6 +864,7 @@ def test_take_invalid_kwargs(self): df = tm.makeTimeDataFrame() with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) p = tm.makePanel() for obj in (s, df, p): @@ -963,6 +972,7 @@ def test_equals(self): def test_describe_raises(self): with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) with pytest.raises(NotImplementedError): tm.makePanel().describe() @@ -996,6 +1006,7 @@ def test_pipe_tuple_error(self): def test_pipe_panel(self): with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) f = lambda x, y: x + y result = wp.pipe(f, 2) diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py index 49cb773a1bd10..fe80b2af5ea63 100644 --- a/pandas/tests/generic/test_panel.py +++ b/pandas/tests/generic/test_panel.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=E1101,W0612 -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from pandas import Panel from pandas.util.testing import (assert_panel_equal, @@ -21,6 +21,7 @@ def test_to_xarray(self): from xarray import DataArray with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) p = tm.makePanel() result = p.to_xarray() @@ -51,6 +52,7 @@ def f(): def tester(self): f = getattr(super(TestPanel, self), t) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) f() return tester diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 48a45e93e1e8e..d8a545b323674 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -25,7 +25,12 @@ 'var', 'sem', 'mean', - 'median', + pytest.param('median', + # ignore mean of empty slice + # and all-NaN + marks=[pytest.mark.filterwarnings( + "ignore::RuntimeWarning" + )]), 'prod', 'min', 'max', diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 9affd0241d028..483f814bc8383 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -3,7 +3,6 @@ import pytest -from warnings import catch_warnings from datetime import datetime from decimal import Decimal @@ -508,30 +507,30 @@ def test_frame_multi_key_function_list(): @pytest.mark.parametrize('op', [lambda x: x.sum(), lambda x: x.mean()]) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_groupby_multiple_columns(df, op): data = df grouped = data.groupby(['A', 'B']) - with catch_warnings(record=True): - result1 = op(grouped) - - expected = defaultdict(dict) - for n1, gp1 in data.groupby('A'): - for n2, gp2 in gp1.groupby('B'): - expected[n1][n2] = op(gp2.loc[:, ['C', 'D']]) - expected = {k: DataFrame(v) - for k, v in compat.iteritems(expected)} - expected = Panel.fromDict(expected).swapaxes(0, 1) - expected.major_axis.name, expected.minor_axis.name = 'A', 'B' - - # a little bit crude - for col in ['C', 'D']: - result_col = op(grouped[col]) - exp = expected[col] - pivoted = result1[col].unstack() - pivoted2 = result_col.unstack() - assert_frame_equal(pivoted.reindex_like(exp), exp) - assert_frame_equal(pivoted2.reindex_like(exp), exp) + result1 = op(grouped) + + expected = defaultdict(dict) + for n1, gp1 in data.groupby('A'): + for n2, gp2 in gp1.groupby('B'): + expected[n1][n2] = op(gp2.loc[:, ['C', 'D']]) + expected = {k: DataFrame(v) + for k, v in compat.iteritems(expected)} + expected = Panel.fromDict(expected).swapaxes(0, 1) + expected.major_axis.name, expected.minor_axis.name = 'A', 'B' + + # a little bit crude + for col in ['C', 'D']: + result_col = op(grouped[col]) + exp = expected[col] + pivoted = result1[col].unstack() + pivoted2 = result_col.unstack() + assert_frame_equal(pivoted.reindex_like(exp), exp) + assert_frame_equal(pivoted2.reindex_like(exp), exp) # test single series works the same result = data['C'].groupby([data['A'], data['B']]).mean() @@ -1032,6 +1031,8 @@ def test_groupby_mixed_type_columns(): tm.assert_frame_equal(result, expected) +# TODO: Ensure warning isn't emitted in the first place +@pytest.mark.filterwarnings("ignore:Mean of:RuntimeWarning") def test_cython_grouper_series_bug_noncontig(): arr = np.empty((100, 100)) arr.fill(np.nan) @@ -1181,11 +1182,11 @@ def test_groupby_nat_exclude(): pytest.raises(KeyError, grouped.get_group, pd.NaT) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_sparse_friendly(df): sdf = df[['C', 'D']].to_sparse() - with catch_warnings(record=True): - panel = tm.makePanel() - tm.add_nans(panel) + panel = tm.makePanel() + tm.add_nans(panel) def _check_work(gp): gp.mean() @@ -1201,29 +1202,29 @@ def _check_work(gp): # _check_work(panel.groupby(lambda x: x.month, axis=1)) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_panel_groupby(): - with catch_warnings(record=True): - panel = tm.makePanel() - tm.add_nans(panel) - grouped = panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, - axis='items') - agged = grouped.mean() - agged2 = grouped.agg(lambda x: x.mean('items')) + panel = tm.makePanel() + tm.add_nans(panel) + grouped = panel.groupby({'ItemA': 0, 'ItemB': 0, 'ItemC': 1}, + axis='items') + agged = grouped.mean() + agged2 = grouped.agg(lambda x: x.mean('items')) - tm.assert_panel_equal(agged, agged2) + tm.assert_panel_equal(agged, agged2) - tm.assert_index_equal(agged.items, Index([0, 1])) + tm.assert_index_equal(agged.items, Index([0, 1])) - grouped = panel.groupby(lambda x: x.month, axis='major') - agged = grouped.mean() + grouped = panel.groupby(lambda x: x.month, axis='major') + agged = grouped.mean() - exp = Index(sorted(list(set(panel.major_axis.month)))) - tm.assert_index_equal(agged.major_axis, exp) + exp = Index(sorted(list(set(panel.major_axis.month)))) + tm.assert_index_equal(agged.major_axis, exp) - grouped = panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, - axis='minor') - agged = grouped.mean() - tm.assert_index_equal(agged.minor_axis, Index([0, 1])) + grouped = panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, + axis='minor') + agged = grouped.mean() + tm.assert_index_equal(agged.minor_axis, Index([0, 1])) def test_groupby_2d_malformed(): diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 737e8a805f3ce..e7c0881b11871 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -4,7 +4,6 @@ import pytest -from warnings import catch_warnings from pandas import (date_range, Timestamp, Index, MultiIndex, DataFrame, Series, CategoricalIndex) from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -557,15 +556,15 @@ def test_list_grouper_with_nat(self): class TestGetGroup(): + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_get_group(self): - with catch_warnings(record=True): - wp = tm.makePanel() - grouped = wp.groupby(lambda x: x.month, axis='major') + wp = tm.makePanel() + grouped = wp.groupby(lambda x: x.month, axis='major') - gp = grouped.get_group(1) - expected = wp.reindex( - major=[x for x in wp.major_axis if x.month == 1]) - assert_panel_equal(gp, expected) + gp = grouped.get_group(1) + expected = wp.reindex( + major=[x for x in wp.major_axis if x.month == 1]) + assert_panel_equal(gp, expected) # GH 5267 # be datelike friendly @@ -743,18 +742,18 @@ def test_multi_iter_frame(self, three_group): for key, group in grouped: pass + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_multi_iter_panel(self): - with catch_warnings(record=True): - wp = tm.makePanel() - grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()], - axis=1) - - for (month, wd), group in grouped: - exp_axis = [x - for x in wp.major_axis - if x.month == month and x.weekday() == wd] - expected = wp.reindex(major=exp_axis) - assert_panel_equal(group, expected) + wp = tm.makePanel() + grouped = wp.groupby([lambda x: x.month, lambda x: x.weekday()], + axis=1) + + for (month, wd), group in grouped: + exp_axis = [x + for x in wp.major_axis + if x.month == month and x.weekday() == wd] + expected = wp.reindex(major=exp_axis) + assert_panel_equal(group, expected) def test_dictify(self, df): dict(iter(df.groupby('A'))) diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py index 3afc278f9bc93..ae033f7b3f251 100644 --- a/pandas/tests/groupby/test_whitelist.py +++ b/pandas/tests/groupby/test_whitelist.py @@ -133,11 +133,15 @@ def df_letters(): return df -@pytest.mark.parametrize( - "obj, whitelist", zip((df_letters(), df_letters().floats), - (df_whitelist, s_whitelist))) -def test_groupby_whitelist(df_letters, obj, whitelist): +@pytest.mark.parametrize("whitelist", [df_whitelist, s_whitelist]) +def test_groupby_whitelist(df_letters, whitelist): df = df_letters + if whitelist == df_whitelist: + # dataframe + obj = df_letters + else: + obj = df_letters['floats'] + gb = obj.groupby(df.letters) assert set(whitelist) == set(gb._apply_whitelist) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index db3de0ceced0c..5ab32ee3863ae 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -1,4 +1,3 @@ -import warnings import sys import pytest @@ -201,7 +200,7 @@ def test_get_duplicates(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-03', '2000-01-03', '2000-01-04']) - with warnings.catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 result = idx.get_duplicates() diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 6ccd310f33bbd..24d99abaf44a8 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -534,8 +534,8 @@ def test_shift(self): assert shifted[0] == self.rng[0] assert shifted.freq == self.rng.freq - # PerformanceWarning with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", pd.errors.PerformanceWarning) rng = date_range(START, END, freq=BMonthEnd()) shifted = rng.shift(1, freq=CDay()) assert shifted[0] == rng[0] + CDay() diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index bef9b73773f46..cc6db8f5854c8 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1175,6 +1175,8 @@ def test_dayfirst(self, cache): class TestGuessDatetimeFormat(object): @td.skip_if_not_us_locale + @pytest.mark.filterwarnings("ignore:_timelex:DeprecationWarning") + # https://github.com/pandas-dev/pandas/issues/21322 def test_guess_datetime_format_for_array(self): expected_format = '%Y-%m-%d %H:%M:%S.%f' dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format) @@ -1573,12 +1575,20 @@ def test_parsers_timezone_minute_offsets_roundtrip(self, cache, dt_string, @pytest.fixture(params=['D', 's', 'ms', 'us', 'ns']) def units(request): + """Day and some time units. + + * D + * s + * ms + * us + * ns + """ return request.param @pytest.fixture def epoch_1960(): - # for origin as 1960-01-01 + """Timestamp at 1960-01-01.""" return Timestamp('1960-01-01') @@ -1587,12 +1597,25 @@ def units_from_epochs(): return list(range(5)) -@pytest.fixture(params=[epoch_1960(), - epoch_1960().to_pydatetime(), - epoch_1960().to_datetime64(), - str(epoch_1960())]) -def epochs(request): - return request.param +@pytest.fixture(params=['timestamp', 'pydatetime', 'datetime64', 'str_1960']) +def epochs(epoch_1960, request): + """Timestamp at 1960-01-01 in various forms. + + * pd.Timestamp + * datetime.datetime + * numpy.datetime64 + * str + """ + assert request.param in {'timestamp', 'pydatetime', 'datetime64', + "str_1960"} + if request.param == 'timestamp': + return epoch_1960 + elif request.param == 'pydatetime': + return epoch_1960.to_pydatetime() + elif request.param == "datetime64": + return epoch_1960.to_datetime64() + else: + return str(epoch_1960) @pytest.fixture diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 1cdf0ca6e013e..54a12137c9457 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- -import warnings from itertools import product import pytest @@ -241,7 +240,7 @@ def test_get_duplicates(): mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) assert not mi.has_duplicates - with warnings.catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 assert mi.get_duplicates().equals(MultiIndex.from_arrays([[], []])) @@ -257,7 +256,7 @@ def test_get_duplicates(): assert len(mi) == (n + 1) * (m + 1) assert not mi.has_duplicates - with warnings.catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 assert mi.get_duplicates().equals(MultiIndex.from_arrays( [[], []])) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index eab04419fe939..99a909849822b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -715,6 +715,8 @@ def test_empty_fancy_raises(self, attr): pytest.raises(IndexError, index.__getitem__, empty_farr) @pytest.mark.parametrize("itm", [101, 'no_int']) + # FutureWarning from non-tuple sequence of nd indexing + @pytest.mark.filterwarnings("ignore::FutureWarning") def test_getitem_error(self, indices, itm): with pytest.raises(IndexError): indices[itm] diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 2e257bb8a500a..d7bdd18f48523 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -334,7 +334,7 @@ def test_freq_setter_errors(self): idx.freq = '5D' # setting with a non-fixed frequency - msg = '<2 \* BusinessDays> is a non-fixed frequency' + msg = r'<2 \* BusinessDays> is a non-fixed frequency' with tm.assert_raises_regex(ValueError, msg): idx.freq = '2B' diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index d7745ffd94cd9..c329d8d15d729 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -1,5 +1,3 @@ -import warnings - import pytest import numpy as np @@ -147,7 +145,7 @@ def test_get_duplicates(self): idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', '4day']) - with warnings.catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 result = idx.get_duplicates() diff --git a/pandas/tests/indexing/common.py b/pandas/tests/indexing/common.py index cbf1bdbce9574..127548bdaf106 100644 --- a/pandas/tests/indexing/common.py +++ b/pandas/tests/indexing/common.py @@ -2,6 +2,7 @@ import itertools from warnings import catch_warnings, filterwarnings +import pytest import numpy as np from pandas.compat import lrange @@ -25,6 +26,7 @@ def _axify(obj, key, axis): return tuple(axes) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class Base(object): """ indexing comprehensive base class """ @@ -49,22 +51,20 @@ def setup_method(self, method): self.frame_uints = DataFrame(np.random.randn(4, 4), index=UInt64Index(lrange(0, 8, 2)), columns=UInt64Index(lrange(0, 12, 3))) - with catch_warnings(record=True): - self.panel_uints = Panel(np.random.rand(4, 4, 4), - items=UInt64Index(lrange(0, 8, 2)), - major_axis=UInt64Index(lrange(0, 12, 3)), - minor_axis=UInt64Index(lrange(0, 16, 4))) + self.panel_uints = Panel(np.random.rand(4, 4, 4), + items=UInt64Index(lrange(0, 8, 2)), + major_axis=UInt64Index(lrange(0, 12, 3)), + minor_axis=UInt64Index(lrange(0, 16, 4))) self.series_floats = Series(np.random.rand(4), index=Float64Index(range(0, 8, 2))) self.frame_floats = DataFrame(np.random.randn(4, 4), index=Float64Index(range(0, 8, 2)), columns=Float64Index(range(0, 12, 3))) - with catch_warnings(record=True): - self.panel_floats = Panel(np.random.rand(4, 4, 4), - items=Float64Index(range(0, 8, 2)), - major_axis=Float64Index(range(0, 12, 3)), - minor_axis=Float64Index(range(0, 16, 4))) + self.panel_floats = Panel(np.random.rand(4, 4, 4), + items=Float64Index(range(0, 8, 2)), + major_axis=Float64Index(range(0, 12, 3)), + minor_axis=Float64Index(range(0, 16, 4))) m_idces = [MultiIndex.from_product([[1, 2], [3, 4]]), MultiIndex.from_product([[5, 6], [7, 8]]), @@ -75,35 +75,31 @@ def setup_method(self, method): self.frame_multi = DataFrame(np.random.randn(4, 4), index=m_idces[0], columns=m_idces[1]) - with catch_warnings(record=True): - self.panel_multi = Panel(np.random.rand(4, 4, 4), - items=m_idces[0], - major_axis=m_idces[1], - minor_axis=m_idces[2]) + self.panel_multi = Panel(np.random.rand(4, 4, 4), + items=m_idces[0], + major_axis=m_idces[1], + minor_axis=m_idces[2]) self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) - with catch_warnings(record=True): - self.panel_labels = Panel(np.random.randn(4, 4, 4), - items=list('abcd'), - major_axis=list('ABCD'), - minor_axis=list('ZYXW')) + self.panel_labels = Panel(np.random.randn(4, 4, 4), + items=list('abcd'), + major_axis=list('ABCD'), + minor_axis=list('ZYXW')) self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, 'null', 8]) - with catch_warnings(record=True): - self.panel_mixed = Panel(np.random.randn(4, 4, 4), - items=[2, 4, 'null', 8]) + self.panel_mixed = Panel(np.random.randn(4, 4, 4), + items=[2, 4, 'null', 8]) self.series_ts = Series(np.random.randn(4), index=date_range('20130101', periods=4)) self.frame_ts = DataFrame(np.random.randn(4, 4), index=date_range('20130101', periods=4)) - with catch_warnings(record=True): - self.panel_ts = Panel(np.random.randn(4, 4, 4), - items=date_range('20130101', periods=4)) + self.panel_ts = Panel(np.random.randn(4, 4, 4), + items=date_range('20130101', periods=4)) dates_rev = (date_range('20130101', periods=4) .sort_values(ascending=False)) @@ -111,14 +107,12 @@ def setup_method(self, method): index=dates_rev) self.frame_ts_rev = DataFrame(np.random.randn(4, 4), index=dates_rev) - with catch_warnings(record=True): - self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), - items=dates_rev) + self.panel_ts_rev = Panel(np.random.randn(4, 4, 4), + items=dates_rev) self.frame_empty = DataFrame({}) self.series_empty = Series({}) - with catch_warnings(record=True): - self.panel_empty = Panel({}) + self.panel_empty = Panel({}) # form agglomerates for o in self._objs: @@ -175,6 +169,7 @@ def get_value(self, f, i, values=False): # v = v.__getitem__(a) # return v with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) return f.ix[i] def check_values(self, f, func, values=False): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 0e396a3248e3f..a7e55cdf9936e 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -1,5 +1,3 @@ -from warnings import catch_warnings - import pytest import numpy as np @@ -366,22 +364,22 @@ def check(result, expected): result4 = df['A'].iloc[2] check(result4, expected) + @pytest.mark.filterwarnings("ignore::DeprecationWarning") + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem df = tm.makeDataFrame() df['A'] # cache series - with catch_warnings(record=True): - df.ix["Hello Friend"] = df.ix[0] + df.ix["Hello Friend"] = df.ix[0] assert "Hello Friend" in df['A'].index assert "Hello Friend" in df['B'].index - with catch_warnings(record=True): - panel = tm.makePanel() - panel.ix[0] # get first item into cache - panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 - assert "A+1" in panel.ix[0].columns - assert "A+1" in panel.ix[1].columns + panel = tm.makePanel() + panel.ix[0] # get first item into cache + panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 + assert "A+1" in panel.ix[0].columns + assert "A+1" in panel.ix[1].columns # 5216 # make sure that we don't try to set a dead cache diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index ba1f1de21871f..3773b432135b9 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -10,6 +10,9 @@ import pandas.util.testing as tm +ignore_ix = pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") + + class TestFloatIndexers(object): def check(self, result, original, indexer, getitem): @@ -57,6 +60,7 @@ def f(): s.iloc[3.0] = 0 pytest.raises(TypeError, f) + @ignore_ix def test_scalar_non_numeric(self): # GH 4892 @@ -145,6 +149,7 @@ def f(): s[3] pytest.raises(TypeError, lambda: s[3.0]) + @ignore_ix def test_scalar_with_mixed(self): s2 = Series([1, 2, 3], index=['a', 'b', 'c']) @@ -202,6 +207,7 @@ def f(): expected = 3 assert result == expected + @ignore_ix def test_scalar_integer(self): # test how scalar float indexers work on int indexes @@ -254,6 +260,7 @@ def compare(x, y): # coerce to equal int assert 3.0 in s + @ignore_ix def test_scalar_float(self): # scalar float indexers work on a float index @@ -269,8 +276,7 @@ def test_scalar_float(self): (lambda x: x, True)]: # getting - with catch_warnings(record=True): - result = idxr(s)[indexer] + result = idxr(s)[indexer] self.check(result, s, 3, getitem) # setting @@ -305,6 +311,7 @@ def g(): s2.iloc[3.0] = 0 pytest.raises(TypeError, g) + @ignore_ix def test_slice_non_numeric(self): # GH 4892 @@ -356,6 +363,7 @@ def f(): idxr(s)[l] = 0 pytest.raises(TypeError, f) + @ignore_ix def test_slice_integer(self): # same as above, but for Integer based indexes @@ -483,6 +491,7 @@ def f(): pytest.raises(TypeError, f) + @ignore_ix def test_slice_integer_frame_getitem(self): # similar to above, but on the getitem dim (of a DataFrame) @@ -554,6 +563,7 @@ def f(): with catch_warnings(record=True): f(lambda x: x.ix) + @ignore_ix def test_slice_float(self): # same as above, but for floats diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 3dcfe6a68ad9f..538d9706d54d6 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -2,7 +2,7 @@ import pytest -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings, simplefilter import numpy as np import pandas as pd @@ -388,45 +388,53 @@ def test_iloc_getitem_frame(self): result = df.iloc[2] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) exp = df.ix[4] tm.assert_series_equal(result, exp) result = df.iloc[2, 2] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) exp = df.ix[4, 4] assert result == exp # slice result = df.iloc[4:8] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[8:14] tm.assert_frame_equal(result, expected) result = df.iloc[:, 2:3] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[:, 4:5] tm.assert_frame_equal(result, expected) # list of integers result = df.iloc[[0, 1, 3]] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[[0, 2, 6]] tm.assert_frame_equal(result, expected) result = df.iloc[[0, 1, 3], [0, 1]] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[[0, 2, 6], [0, 2]] tm.assert_frame_equal(result, expected) # neg indices result = df.iloc[[-1, 1, 3], [-1, 1]] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[[18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) # dups indices result = df.iloc[[-1, -1, 1, 3], [-1, 1]] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[[18, 18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) @@ -434,6 +442,7 @@ def test_iloc_getitem_frame(self): s = Series(index=lrange(1, 5)) result = df.iloc[s.index] with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) expected = df.ix[[2, 4, 6, 8]] tm.assert_frame_equal(result, expected) @@ -609,6 +618,7 @@ def test_iloc_mask(self): # UserWarnings from reindex of a boolean mask with catch_warnings(record=True): + simplefilter("ignore", UserWarning) result = dict() for idx in [None, 'index', 'locs']: mask = (df.nums > 2).values diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f64c50699461f..33b7c1b8154c7 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -6,7 +6,7 @@ import pytest import weakref -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from datetime import datetime from pandas.core.dtypes.common import ( @@ -419,11 +419,13 @@ def test_setitem_list(self): # ix with a list df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): + simplefilter("ignore") df.ix[1, 0] = [1, 2, 3] df.ix[1, 0] = [1, 2] result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): + simplefilter("ignore") result.ix[1, 0] = [1, 2] tm.assert_frame_equal(result, df) @@ -447,11 +449,13 @@ def view(self): df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): + simplefilter("ignore") df.ix[1, 0] = TO(1) df.ix[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): + simplefilter("ignore") result.ix[1, 0] = TO(2) tm.assert_frame_equal(result, df) @@ -459,6 +463,7 @@ def view(self): # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) with catch_warnings(record=True): + simplefilter("ignore") df.ix[1, 0] = TO(1) df.ix[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) @@ -629,6 +634,7 @@ def test_mixed_index_not_contains(self, index, val): def test_index_type_coercion(self): with catch_warnings(record=True): + simplefilter("ignore") # GH 11836 # if we have an index type and set it with something that looks @@ -760,16 +766,20 @@ def run_tests(df, rhs, right): left = df.copy() with catch_warnings(record=True): + # XXX: finer-filter here. + simplefilter("ignore") left.ix[s, l] = rhs tm.assert_frame_equal(left, right) left = df.copy() with catch_warnings(record=True): + simplefilter("ignore") left.ix[i, j] = rhs tm.assert_frame_equal(left, right) left = df.copy() with catch_warnings(record=True): + simplefilter("ignore") left.ix[r, c] = rhs tm.assert_frame_equal(left, right) @@ -821,6 +831,7 @@ def test_slice_with_zero_step_raises(self): tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s.loc[::0]) with catch_warnings(record=True): + simplefilter("ignore") tm.assert_raises_regex(ValueError, 'slice step cannot be zero', lambda: s.ix[::0]) @@ -839,11 +850,13 @@ def test_indexing_dtypes_on_empty(self): # Check that .iloc and .ix return correct dtypes GH9983 df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']}) with catch_warnings(record=True): + simplefilter("ignore") df2 = df.ix[[], :] assert df2.loc[:, 'a'].dtype == np.int64 tm.assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) with catch_warnings(record=True): + simplefilter("ignore") tm.assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) def test_range_in_series_indexing(self): @@ -917,6 +930,7 @@ def test_no_reference_cycle(self): for name in ('loc', 'iloc', 'at', 'iat'): getattr(df, name) with catch_warnings(record=True): + simplefilter("ignore") getattr(df, 'ix') wr = weakref.ref(df) del df diff --git a/pandas/tests/indexing/test_indexing_slow.py b/pandas/tests/indexing/test_indexing_slow.py index f4d581f450363..61e5fdd7b9562 100644 --- a/pandas/tests/indexing/test_indexing_slow.py +++ b/pandas/tests/indexing/test_indexing_slow.py @@ -12,6 +12,7 @@ class TestIndexingSlow(object): @pytest.mark.slow + @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") def test_multiindex_get_loc(self): # GH7724, GH2646 with warnings.catch_warnings(record=True): diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index c84576c984525..04d0e04b5651e 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -14,15 +14,17 @@ from pandas.errors import PerformanceWarning -class TestIX(object): +def test_ix_deprecation(): + # GH 15114 + + df = DataFrame({'A': [1, 2, 3]}) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + df.ix[1, 'A'] - def test_ix_deprecation(self): - # GH 15114 - df = DataFrame({'A': [1, 2, 3]}) - with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): - df.ix[1, 'A'] +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") +class TestIX(object): def test_ix_loc_setitem_consistency(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2e52154d7679b..9fa705f923c88 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -3,7 +3,7 @@ import itertools import pytest -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings import numpy as np import pandas as pd @@ -699,6 +699,7 @@ def test_loc_name(self): assert result == 'index_name' with catch_warnings(record=True): + filterwarnings("ignore", "\\n.ix", DeprecationWarning) result = df.ix[[0, 1]].index.name assert result == 'index_name' diff --git a/pandas/tests/indexing/test_multiindex.py b/pandas/tests/indexing/test_multiindex.py index d2c4c8f5e149b..9e66dfad3ddc7 100644 --- a/pandas/tests/indexing/test_multiindex.py +++ b/pandas/tests/indexing/test_multiindex.py @@ -9,6 +9,7 @@ from pandas.tests.indexing.common import _mklbl +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") class TestMultiIndexBasic(object): def test_iloc_getitem_multiindex2(self): @@ -1232,101 +1233,99 @@ def f(): tm.assert_frame_equal(df, expected) +@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning') class TestMultiIndexPanel(object): def test_iloc_getitem_panel_multiindex(self): - with catch_warnings(record=True): + # GH 7199 + # Panel with multi-index + multi_index = MultiIndex.from_tuples([('ONE', 'one'), + ('TWO', 'two'), + ('THREE', 'three')], + names=['UPPER', 'lower']) + + simple_index = [x[0] for x in multi_index] + wd1 = Panel(items=['First', 'Second'], + major_axis=['a', 'b', 'c', 'd'], + minor_axis=multi_index) + + wd2 = Panel(items=['First', 'Second'], + major_axis=['a', 'b', 'c', 'd'], + minor_axis=simple_index) + + expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] + result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG + tm.assert_frame_equal(result1, expected1) + + expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] + result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] + tm.assert_frame_equal(result2, expected2) + + expected1 = DataFrame(index=['a'], columns=multi_index, + dtype='float64') + result1 = wd1.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result1, expected1) + + expected2 = DataFrame(index=['a'], columns=simple_index, + dtype='float64') + result2 = wd2.iloc[0, [0], [0, 1, 2]] + tm.assert_frame_equal(result2, expected2) + + # GH 7516 + mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) + p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), + items=['a', 'b', 'c'], major_axis=mi, + minor_axis=['u', 'v', 'w']) + result = p.iloc[:, 1, 0] + expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') + tm.assert_series_equal(result, expected) - # GH 7199 - # Panel with multi-index - multi_index = MultiIndex.from_tuples([('ONE', 'one'), - ('TWO', 'two'), - ('THREE', 'three')], - names=['UPPER', 'lower']) - - simple_index = [x[0] for x in multi_index] - wd1 = Panel(items=['First', 'Second'], - major_axis=['a', 'b', 'c', 'd'], - minor_axis=multi_index) - - wd2 = Panel(items=['First', 'Second'], - major_axis=['a', 'b', 'c', 'd'], - minor_axis=simple_index) - - expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] - result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG - tm.assert_frame_equal(result1, expected1) - - expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] - result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] - tm.assert_frame_equal(result2, expected2) - - expected1 = DataFrame(index=['a'], columns=multi_index, - dtype='float64') - result1 = wd1.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result1, expected1) - - expected2 = DataFrame(index=['a'], columns=simple_index, - dtype='float64') - result2 = wd2.iloc[0, [0], [0, 1, 2]] - tm.assert_frame_equal(result2, expected2) - - # GH 7516 - mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) - p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), - items=['a', 'b', 'c'], major_axis=mi, - minor_axis=['u', 'v', 'w']) - result = p.iloc[:, 1, 0] - expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') - tm.assert_series_equal(result, expected) - - result = p.loc[:, (1, 'y'), 'u'] - tm.assert_series_equal(result, expected) + result = p.loc[:, (1, 'y'), 'u'] + tm.assert_series_equal(result, expected) def test_panel_setitem_with_multiindex(self): - with catch_warnings(record=True): - # 10360 - # failing with a multi-index - arr = np.array([[[1, 2, 3], [0, 0, 0]], - [[0, 0, 0], [0, 0, 0]]], - dtype=np.float64) - - # reg index - axes = dict(items=['A', 'B'], major_axis=[0, 1], - minor_axis=['X', 'Y', 'Z']) - p1 = Panel(0., **axes) - p1.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p1, expected) - - # multi-indexes - axes['items'] = MultiIndex.from_tuples( - [('A', 'a'), ('B', 'b')]) - p2 = Panel(0., **axes) - p2.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p2, expected) - - axes['major_axis'] = MultiIndex.from_tuples( - [('A', 1), ('A', 2)]) - p3 = Panel(0., **axes) - p3.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p3, expected) - - axes['minor_axis'] = MultiIndex.from_product( - [['X'], range(3)]) - p4 = Panel(0., **axes) - p4.iloc[0, 0, :] = [1, 2, 3] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p4, expected) - - arr = np.array( - [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], - dtype=np.float64) - p5 = Panel(0., **axes) - p5.iloc[0, :, 0] = [1, 2] - expected = Panel(arr, **axes) - tm.assert_panel_equal(p5, expected) + # 10360 + # failing with a multi-index + arr = np.array([[[1, 2, 3], [0, 0, 0]], + [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + + # reg index + axes = dict(items=['A', 'B'], major_axis=[0, 1], + minor_axis=['X', 'Y', 'Z']) + p1 = Panel(0., **axes) + p1.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p1, expected) + + # multi-indexes + axes['items'] = MultiIndex.from_tuples( + [('A', 'a'), ('B', 'b')]) + p2 = Panel(0., **axes) + p2.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p2, expected) + + axes['major_axis'] = MultiIndex.from_tuples( + [('A', 1), ('A', 2)]) + p3 = Panel(0., **axes) + p3.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p3, expected) + + axes['minor_axis'] = MultiIndex.from_product( + [['X'], range(3)]) + p4 = Panel(0., **axes) + p4.iloc[0, 0, :] = [1, 2, 3] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p4, expected) + + arr = np.array( + [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], + dtype=np.float64) + p5 = Panel(0., **axes) + p5.iloc[0, :, 0] = [1, 2] + expected = Panel(arr, **axes) + tm.assert_panel_equal(p5, expected) diff --git a/pandas/tests/indexing/test_panel.py b/pandas/tests/indexing/test_panel.py index 1085e2a61be48..2cd05b5779f30 100644 --- a/pandas/tests/indexing/test_panel.py +++ b/pandas/tests/indexing/test_panel.py @@ -6,6 +6,7 @@ from pandas import Panel, date_range, DataFrame +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestPanel(object): def test_iloc_getitem_panel(self): @@ -110,6 +111,7 @@ def test_iloc_panel_issue(self): assert p.iloc[1, :3, 1].shape == (3, ) assert p.iloc[:3, 1, 1].shape == (3, ) + @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") def test_panel_getitem(self): with catch_warnings(record=True): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 3c7a7f070805d..5910f462cb3df 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -16,6 +16,8 @@ class TestPartialSetting(object): + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") + @pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") def test_partial_setting(self): # GH2578, allow ix and friends to partially set @@ -404,6 +406,7 @@ def test_series_partial_set_with_name(self): result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) + @pytest.mark.filterwarnings("ignore:\\n.ix") def test_partial_set_invalid(self): # GH 4940 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 34f22513106ba..86251ad7529d5 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1285,7 +1285,7 @@ def test_deprecated_fastpath(): def test_validate_ndim(): values = np.array([1.0, 2.0]) placement = slice(2) - msg = "Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" + msg = r"Wrong number of dimensions. values.ndim != ndim \[1 != 2\]" with tm.assert_raises_regex(ValueError, msg): make_block(values, placement, ndim=2) diff --git a/pandas/tests/io/formats/test_to_excel.py b/pandas/tests/io/formats/test_to_excel.py index 9fc16c43f5c1d..7d54f93c9831e 100644 --- a/pandas/tests/io/formats/test_to_excel.py +++ b/pandas/tests/io/formats/test_to_excel.py @@ -6,8 +6,8 @@ import pytest import pandas.util.testing as tm -from warnings import catch_warnings from pandas.io.formats.excel import CSSToExcelConverter +from pandas.io.formats.css import CSSWarning @pytest.mark.parametrize('css,expected', [ @@ -272,6 +272,6 @@ def test_css_to_excel_bad_colors(input_color): "patternType": "solid" } - with catch_warnings(record=True): + with tm.assert_produces_warning(CSSWarning): convert = CSSToExcelConverter() assert expected == convert(css) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index aa020ba4c0623..4ebf435f7d75f 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -35,7 +35,7 @@ """ from __future__ import print_function -from warnings import catch_warnings +from warnings import catch_warnings, filterwarnings from distutils.version import LooseVersion from pandas import (Series, DataFrame, Panel, SparseSeries, SparseDataFrame, @@ -187,6 +187,7 @@ def create_data(): ) with catch_warnings(record=True): + filterwarnings("ignore", "\\nPanel", FutureWarning) mixed_dup_panel = Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'int']}) mixed_dup_panel.items = [u'ItemA', u'ItemA'] diff --git a/pandas/tests/io/parser/compression.py b/pandas/tests/io/parser/compression.py index e4950af19ea95..5a28b6263f20f 100644 --- a/pandas/tests/io/parser/compression.py +++ b/pandas/tests/io/parser/compression.py @@ -30,9 +30,8 @@ def test_zip(self): expected = self.read_csv(self.csv1) with tm.ensure_clean('test_file.zip') as path: - tmp = zipfile.ZipFile(path, mode='w') - tmp.writestr('test_file', data) - tmp.close() + with zipfile.ZipFile(path, mode='w') as tmp: + tmp.writestr('test_file', data) result = self.read_csv(path, compression='zip') tm.assert_frame_equal(result, expected) @@ -47,10 +46,9 @@ def test_zip(self): with tm.ensure_clean('combined_zip.zip') as path: inner_file_names = ['test_file', 'second_file'] - tmp = zipfile.ZipFile(path, mode='w') - for file_name in inner_file_names: - tmp.writestr(file_name, data) - tmp.close() + with zipfile.ZipFile(path, mode='w') as tmp: + for file_name in inner_file_names: + tmp.writestr(file_name, data) tm.assert_raises_regex(ValueError, 'Multiple files', self.read_csv, path, compression='zip') @@ -60,8 +58,8 @@ def test_zip(self): compression='infer') with tm.ensure_clean() as path: - tmp = zipfile.ZipFile(path, mode='w') - tmp.close() + with zipfile.ZipFile(path, mode='w') as tmp: + pass tm.assert_raises_regex(ValueError, 'Zero files', self.read_csv, path, compression='zip') @@ -84,9 +82,8 @@ def test_other_compression(self, compress_type, compress_method, ext): expected = self.read_csv(self.csv1) with tm.ensure_clean() as path: - tmp = compress_method(path, mode='wb') - tmp.write(data) - tmp.close() + with compress_method(path, mode='wb') as tmp: + tmp.write(data) result = self.read_csv(path, compression=compress_type) tm.assert_frame_equal(result, expected) @@ -100,9 +97,8 @@ def test_other_compression(self, compress_type, compress_method, ext): tm.assert_frame_equal(result, expected) with tm.ensure_clean('test.{}'.format(ext)) as path: - tmp = compress_method(path, mode='wb') - tmp.write(data) - tmp.close() + with compress_method(path, mode='wb') as tmp: + tmp.write(data) result = self.read_csv(path, compression='infer') tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index f4b14241ed80e..705387188438f 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -9,6 +9,8 @@ import pytest +# https://github.com/cython/cython/issues/1720 +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") class TestSAS7BDAT(object): @pytest.fixture(autouse=True) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 991b8ee508760..73e29e6eb9a6a 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -44,6 +44,8 @@ def __fspath__(self): HERE = os.path.abspath(os.path.dirname(__file__)) +# https://github.com/cython/cython/issues/1720 +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") class TestCommonIOCapabilities(object): data1 = """index,A,B,C,D foo,2,3,4,5 diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py index 1806ddd2bbcc6..b62a1e6c4933e 100644 --- a/pandas/tests/io/test_compression.py +++ b/pandas/tests/io/test_compression.py @@ -1,5 +1,6 @@ import os import warnings +import contextlib import pytest @@ -8,12 +9,15 @@ import pandas.util.testing as tm +@contextlib.contextmanager def catch_to_csv_depr(): # Catching warnings because Series.to_csv has # been deprecated. Remove this context when # Series.to_csv has been aligned. - return warnings.catch_warnings(record=True) + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", FutureWarning) + yield @pytest.mark.parametrize('obj', [ diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 6741645e466f3..a639556eb07d6 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -611,6 +611,8 @@ def test_read_from_s3_url(self, ext): tm.assert_frame_equal(url_table, local_table) @pytest.mark.slow + # ignore warning from old xlrd + @pytest.mark.filterwarnings("ignore:This metho:PendingDeprecationWarning") def test_read_from_file_url(self, ext): # FILE @@ -2189,6 +2191,7 @@ def test_ExcelWriter_dispatch_raises(self): with tm.assert_raises_regex(ValueError, 'No engine'): ExcelWriter('nothing') + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_register_writer(self): # some awkward mocking to test out dispatch and such actually works called_save = [] diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 412e218f95c6f..ee45f8828d85e 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -91,6 +91,7 @@ def check_arbitrary(a, b): assert(a == b) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestPackers(object): def setup_method(self, method): @@ -105,6 +106,7 @@ def encode_decode(self, x, compress=None, **kwargs): return read_msgpack(p, **kwargs) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestAPI(TestPackers): def test_string_io(self): @@ -464,6 +466,7 @@ def test_basic(self): assert_categorical_equal(i, i_rec) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestNDFrame(TestPackers): def setup_method(self, method): @@ -486,10 +489,9 @@ def setup_method(self, method): 'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)), 'mixed': DataFrame(data)} - with catch_warnings(record=True): - self.panel = { - 'float': Panel(dict(ItemA=self.frame['float'], - ItemB=self.frame['float'] + 1))} + self.panel = { + 'float': Panel(dict(ItemA=self.frame['float'], + ItemB=self.frame['float'] + 1))} def test_basic_frame(self): @@ -846,6 +848,7 @@ def legacy_packer(request, datapath): return datapath(request.param) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestMsgpack(object): """ How to add msgpack tests: diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index 77b4a3c7cac5f..a47c3c01fc80e 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -14,7 +14,7 @@ """ import glob import pytest -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import os from distutils.version import LooseVersion @@ -202,6 +202,7 @@ def test_pickles(current_pickle_data, legacy_pickle): version = os.path.basename(os.path.dirname(legacy_pickle)) with catch_warnings(record=True): + simplefilter("ignore") compare(current_pickle_data, legacy_pickle, version) @@ -332,9 +333,9 @@ def compress_file(self, src_path, dest_path, compression): f = bz2.BZ2File(dest_path, "w") elif compression == 'zip': import zipfile - f = zipfile.ZipFile(dest_path, "w", - compression=zipfile.ZIP_DEFLATED) - f.write(src_path, os.path.basename(src_path)) + with zipfile.ZipFile(dest_path, "w", + compression=zipfile.ZIP_DEFLATED) as f: + f.write(src_path, os.path.basename(src_path)) elif compression == 'xz': lzma = pandas.compat.import_lzma() f = lzma.LZMAFile(dest_path, "w") @@ -343,9 +344,8 @@ def compress_file(self, src_path, dest_path, compression): raise ValueError(msg) if compression != "zip": - with open(src_path, "rb") as fh: + with open(src_path, "rb") as fh, f: f.write(fh.read()) - f.close() def test_write_explicit(self, compression, get_random_path): base = get_random_path diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ddcfcc0842d1a..ea5f1684c0695 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2,7 +2,7 @@ import os import tempfile from contextlib import contextmanager -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from distutils.version import LooseVersion import datetime @@ -40,6 +40,10 @@ LooseVersion('2.2') else 'zlib') +ignore_natural_naming_warning = pytest.mark.filterwarnings( + "ignore:object name:tables.exceptions.NaturalNameWarning" +) + # contextmanager to ensure the file cleanup @@ -139,12 +143,14 @@ def teardown_method(self, method): @pytest.mark.single +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestHDFStore(Base): def test_factory_fun(self): path = create_tempfile(self.path) try: - with catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): with get_store(path) as tbl: raise ValueError('blah') except ValueError: @@ -153,11 +159,13 @@ def test_factory_fun(self): safe_remove(path) try: - with catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): with get_store(path) as tbl: tbl['a'] = tm.makeDataFrame() - with catch_warnings(record=True): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): with get_store(path) as tbl: assert len(tbl) == 1 assert type(tbl['a']) == DataFrame @@ -425,8 +433,8 @@ def test_repr(self): df.loc[3:6, ['obj1']] = np.nan df = df._consolidate()._convert(datetime=True) - # PerformanceWarning with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) store['df'] = df # make a random group in hdf space @@ -446,6 +454,7 @@ def test_repr(self): repr(s) str(s) + @ignore_natural_naming_warning def test_contains(self): with ensure_clean_store(self.path) as store: @@ -912,11 +921,15 @@ def test_put_mixed_type(self): # PerformanceWarning with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) store.put('df', df) expected = store.get('df') tm.assert_frame_equal(expected, df) + @pytest.mark.filterwarnings( + "ignore:object name:tables.exceptions.NaturalNameWarning" + ) def test_append(self): with ensure_clean_store(self.path) as store: @@ -1075,6 +1088,7 @@ def check(format, index): # PerformanceWarning with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) check('fixed', index) @pytest.mark.skipif(not is_platform_little_endian(), @@ -1355,6 +1369,7 @@ def test_append_with_strings(self): with ensure_clean_store(self.path) as store: with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) wp = tm.makePanel() wp2 = wp.rename_axis( {x: "%s_extra" % x for x in wp.minor_axis}, axis=2) @@ -2553,6 +2568,7 @@ def test_terms(self): with ensure_clean_store(self.path) as store: with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) wp = tm.makePanel() wpneg = Panel.fromDict({-1: tm.makeDataFrame(), @@ -2758,8 +2774,10 @@ def test_tuple_index(self): DF = DataFrame(data, index=idx, columns=col) with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) self._check_roundtrip(DF, tm.assert_frame_equal) + @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning") def test_index_types(self): with catch_warnings(record=True): @@ -2988,6 +3006,9 @@ def test_wide(self): wp = tm.makePanel() self._check_roundtrip(wp, assert_panel_equal) + @pytest.mark.filterwarnings( + "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning" + ) def test_select_with_dups(self): # single dtypes @@ -3047,6 +3068,9 @@ def test_select_with_dups(self): result = store.select('df', columns=['B', 'A']) assert_frame_equal(result, expected, by_blocks=True) + @pytest.mark.filterwarnings( + "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning" + ) def test_wide_table_dups(self): with ensure_clean_store(self.path) as store: with catch_warnings(record=True): @@ -3589,6 +3613,9 @@ def test_select_iterator_many_empty_frames(self): # should be [] assert len(results) == 0 + @pytest.mark.filterwarnings( + "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" + ) def test_retain_index_attributes(self): # GH 3499, losing frequency info on index recreation @@ -3631,6 +3658,9 @@ def test_retain_index_attributes(self): freq='D')))) store.append('df2', df3) + @pytest.mark.filterwarnings( + "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning" + ) def test_retain_index_attributes2(self): with ensure_clean_path(self.path) as path: @@ -4533,7 +4563,8 @@ def test_legacy_table_read(self, datapath): datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'), mode='r') as store: - with catch_warnings(record=True): + with catch_warnings(): + simplefilter("ignore", pd.io.pytables.IncompatibilityWarning) store.select('df1') store.select('df2') store.select('wp1') @@ -4665,6 +4696,7 @@ def test_unicode_index(self): # PerformanceWarning with catch_warnings(record=True): + simplefilter("ignore", pd.errors.PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) @@ -4933,6 +4965,7 @@ def test_columns_multiindex_modified(self): df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa assert cols2load_original == cols2load + @ignore_natural_naming_warning def test_to_hdf_with_object_column_names(self): # GH9057 # Writing HDF5 table format should only work for string-like @@ -5277,6 +5310,7 @@ def test_complex_mixed_table(self): reread = read_hdf(path, 'df') assert_frame_equal(df, reread) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_complex_across_dimensions_fixed(self): with catch_warnings(record=True): complex128 = np.array( @@ -5294,6 +5328,7 @@ def test_complex_across_dimensions_fixed(self): reread = read_hdf(path, 'obj') comp(obj, reread) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_complex_across_dimensions(self): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index e4df7043919ae..237cc2936919e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -18,7 +18,6 @@ """ from __future__ import print_function -from warnings import catch_warnings import pytest import sqlite3 import csv @@ -582,11 +581,11 @@ def test_to_sql_series(self): s2 = sql.read_sql_query("SELECT * FROM test_series", self.conn) tm.assert_frame_equal(s.to_frame(), s2) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_to_sql_panel(self): - with catch_warnings(record=True): - panel = tm.makePanel() - pytest.raises(NotImplementedError, sql.to_sql, panel, - 'test_panel', self.conn) + panel = tm.makePanel() + pytest.raises(NotImplementedError, sql.to_sql, panel, + 'test_panel', self.conn) def test_roundtrip(self): sql.to_sql(self.test_frame1, 'test_frame_roundtrip', diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index cfe47cae7e5e1..303d3a3d8dbe9 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -120,7 +120,7 @@ def test_read_empty_dta(self, version): def test_data_method(self): # Minimal testing of legacy data method with StataReader(self.dta1_114) as rdr: - with warnings.catch_warnings(record=True) as w: # noqa + with tm.assert_produces_warning(UserWarning): parsed_114_data = rdr.data() with StataReader(self.dta1_114) as rdr: @@ -388,10 +388,8 @@ def test_read_write_dta11(self): formatted = formatted.astype(np.int32) with tm.ensure_clean() as path: - with warnings.catch_warnings(record=True) as w: + with tm.assert_produces_warning(pd.io.stata.InvalidColumnName): original.to_stata(path, None) - # should get a warning for that format. - assert len(w) == 1 written_and_read_again = self.read_dta(path) tm.assert_frame_equal( @@ -871,6 +869,9 @@ def test_drop_column(self): read_stata(self.dta15_117, convert_dates=True, columns=columns) @pytest.mark.parametrize('version', [114, 117]) + @pytest.mark.filterwarnings( + "ignore:\\nStata value:pandas.io.stata.ValueLabelTypeMismatch" + ) def test_categorical_writing(self, version): original = DataFrame.from_records( [ @@ -901,12 +902,10 @@ def test_categorical_writing(self, version): expected.index.name = 'index' with tm.ensure_clean() as path: - with warnings.catch_warnings(record=True) as w: # noqa - # Silence warnings - original.to_stata(path, version=version) - written_and_read_again = self.read_dta(path) - res = written_and_read_again.set_index('index') - tm.assert_frame_equal(res, expected, check_categorical=False) + original.to_stata(path, version=version) + written_and_read_again = self.read_dta(path) + res = written_and_read_again.set_index('index') + tm.assert_frame_equal(res, expected, check_categorical=False) def test_categorical_warnings_and_errors(self): # Warning for non-string labels @@ -933,10 +932,9 @@ def test_categorical_warnings_and_errors(self): original = pd.concat([original[col].astype('category') for col in original], axis=1) - with warnings.catch_warnings(record=True) as w: + with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch): original.to_stata(path) # should get a warning for mixed content - assert len(w) == 1 @pytest.mark.parametrize('version', [114, 117]) def test_categorical_with_stata_missing_values(self, version): @@ -1445,7 +1443,7 @@ def test_convert_strl_name_swap(self): columns=['long1' * 10, 'long', 1]) original.index.name = 'index' - with warnings.catch_warnings(record=True) as w: # noqa + with tm.assert_produces_warning(pd.io.stata.InvalidColumnName): with tm.ensure_clean() as path: original.to_stata(path, convert_strl=['long', 1], version=117) reread = self.read_dta(path) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 772989231e9a7..cd297c356d60e 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -628,6 +628,7 @@ def test_subplots_multiple_axes(self): # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes fig, axes = self.plt.subplots(2, 2) with warnings.catch_warnings(): + warnings.simplefilter("ignore", UserWarning) df = DataFrame(np.random.rand(10, 4), index=list(string.ascii_letters[:10])) @@ -1574,7 +1575,11 @@ def test_hist_df(self): self._check_ticks_props(axes, xrot=40, yrot=0) tm.close() - ax = series.plot.hist(normed=True, cumulative=True, bins=4) + if plotting._compat._mpl_ge_2_2_0(): + kwargs = {"density": True} + else: + kwargs = {"normed": True} + ax = series.plot.hist(cumulative=True, bins=4, **kwargs) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] tm.assert_almost_equal(rects[-1].get_height(), 1.0) @@ -1850,7 +1855,7 @@ def test_line_colors(self): tm.close() - ax2 = df.plot(colors=custom_colors) + ax2 = df.plot(color=custom_colors) lines2 = ax2.get_lines() for l1, l2 in zip(ax.get_lines(), lines2): diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 864d39eba29c5..2864877550bac 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -12,6 +12,7 @@ from numpy.random import randn from pandas.plotting._core import grouped_hist +from pandas.plotting._compat import _mpl_ge_2_2_0 from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -193,7 +194,11 @@ def test_hist_df_legacy(self): tm.close() # make sure kwargs to hist are handled - ax = ser.hist(normed=True, cumulative=True, bins=4) + if _mpl_ge_2_2_0(): + kwargs = {"density": True} + else: + kwargs = {"normed": True} + ax = ser.hist(cumulative=True, bins=4, **kwargs) # height of last bin (index 5) must be 1.0 rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] tm.assert_almost_equal(rects[-1].get_height(), 1.0) @@ -279,9 +284,15 @@ def test_grouped_hist_legacy(self): # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 - axes = grouped_hist(df.A, by=df.C, normed=True, cumulative=True, + + if _mpl_ge_2_2_0(): + kwargs = {"density": True} + else: + kwargs = {"normed": True} + + axes = grouped_hist(df.A, by=df.C, cumulative=True, bins=4, xlabelsize=xf, xrot=xrot, - ylabelsize=yf, yrot=yrot) + ylabelsize=yf, yrot=yrot, **kwargs) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index e80443954a434..8c84b785c88e4 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -212,6 +212,8 @@ def test_parallel_coordinates(self, iris): with tm.assert_produces_warning(FutureWarning): parallel_coordinates(df, 'Name', colors=colors) + # not sure if this is indicative of a problem + @pytest.mark.filterwarnings("ignore:Attempting to set:UserWarning") def test_parallel_coordinates_with_sorted_labels(self): """ For #15908 """ from pandas.plotting import parallel_coordinates diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 09f511886583c..e965ff7a78a39 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -19,6 +19,7 @@ a_ = np.array +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestJoin(object): def setup_method(self, method): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 762b04cc3bd4f..2aaa04d571e69 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1,5 +1,6 @@ -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from itertools import combinations +from collections import deque import datetime as dt import dateutil @@ -13,6 +14,7 @@ read_csv, isna, Series, date_range, Index, Panel, MultiIndex, Timestamp, DatetimeIndex, Categorical) +from pandas.compat import Iterable from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.util import testing as tm from pandas.util.testing import (assert_frame_equal, @@ -1465,6 +1467,7 @@ def test_concat_mixed_objs(self): # invalid concatente of mixed dims with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) panel = tm.makePanel() pytest.raises(ValueError, lambda: concat([panel, s1], axis=1)) @@ -1503,59 +1506,61 @@ def test_dtype_coerceion(self): result = concat([df.iloc[[0]], df.iloc[[1]]]) tm.assert_series_equal(result.dtypes, df.dtypes) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_panel_concat_other_axes(self): - with catch_warnings(record=True): - panel = tm.makePanel() + panel = tm.makePanel() - p1 = panel.iloc[:, :5, :] - p2 = panel.iloc[:, 5:, :] + p1 = panel.iloc[:, :5, :] + p2 = panel.iloc[:, 5:, :] - result = concat([p1, p2], axis=1) - tm.assert_panel_equal(result, panel) + result = concat([p1, p2], axis=1) + tm.assert_panel_equal(result, panel) - p1 = panel.iloc[:, :, :2] - p2 = panel.iloc[:, :, 2:] + p1 = panel.iloc[:, :, :2] + p2 = panel.iloc[:, :, 2:] - result = concat([p1, p2], axis=2) - tm.assert_panel_equal(result, panel) + result = concat([p1, p2], axis=2) + tm.assert_panel_equal(result, panel) - # if things are a bit misbehaved - p1 = panel.iloc[:2, :, :2] - p2 = panel.iloc[:, :, 2:] - p1['ItemC'] = 'baz' + # if things are a bit misbehaved + p1 = panel.iloc[:2, :, :2] + p2 = panel.iloc[:, :, 2:] + p1['ItemC'] = 'baz' - result = concat([p1, p2], axis=2) + result = concat([p1, p2], axis=2) - expected = panel.copy() - expected['ItemC'] = expected['ItemC'].astype('O') - expected.loc['ItemC', :, :2] = 'baz' - tm.assert_panel_equal(result, expected) + expected = panel.copy() + expected['ItemC'] = expected['ItemC'].astype('O') + expected.loc['ItemC', :, :2] = 'baz' + tm.assert_panel_equal(result, expected) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") + # Panel.rename warning we don't care about + @pytest.mark.filterwarnings("ignore:Using:FutureWarning") def test_panel_concat_buglet(self, sort): - with catch_warnings(record=True): - # #2257 - def make_panel(): - index = 5 - cols = 3 + # #2257 + def make_panel(): + index = 5 + cols = 3 - def df(): - return DataFrame(np.random.randn(index, cols), - index=["I%s" % i for i in range(index)], - columns=["C%s" % i for i in range(cols)]) - return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']}) + def df(): + return DataFrame(np.random.randn(index, cols), + index=["I%s" % i for i in range(index)], + columns=["C%s" % i for i in range(cols)]) + return Panel({"Item%s" % x: df() for x in ['A', 'B', 'C']}) - panel1 = make_panel() - panel2 = make_panel() + panel1 = make_panel() + panel2 = make_panel() - panel2 = panel2.rename_axis({x: "%s_1" % x - for x in panel2.major_axis}, - axis=1) + panel2 = panel2.rename_axis({x: "%s_1" % x + for x in panel2.major_axis}, + axis=1) - panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) - panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) + panel3 = panel2.rename_axis(lambda x: '%s_1' % x, axis=1) + panel3 = panel3.rename_axis(lambda x: '%s_1' % x, axis=2) - # it works! - concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort) + # it works! + concat([panel1, panel3], axis=1, verify_integrity=True, sort=sort) def test_concat_series(self): @@ -1722,8 +1727,6 @@ def test_concat_series_axis1_same_names_ignore_index(self): tm.assert_index_equal(result.columns, expected) def test_concat_iterables(self): - from collections import deque, Iterable - # GH8645 check concat works with tuples, list, generators, and weird # stuff like deque and custom iterables df1 = DataFrame([1, 2, 3]) @@ -2351,30 +2354,30 @@ def test_concat_datetime_timezone(self): tm.assert_frame_equal(result, expected) # GH 13783: Concat after resample - with catch_warnings(record=True): - result = pd.concat([df1.resample('H').mean(), - df2.resample('H').mean()]) - expected = pd.DataFrame({'a': [1, 2, 3] + [np.nan] * 3, - 'b': [np.nan] * 3 + [1, 2, 3]}, - index=idx1.append(idx1)) - tm.assert_frame_equal(result, expected) + result = pd.concat([df1.resample('H').mean(), + df2.resample('H').mean()], sort=True) + expected = pd.DataFrame({'a': [1, 2, 3] + [np.nan] * 3, + 'b': [np.nan] * 3 + [1, 2, 3]}, + index=idx1.append(idx1)) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['float']) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_concat_no_unnecessary_upcast(dt, pdt): - with catch_warnings(record=True): - # GH 13247 - dims = pdt().ndim - dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), - pdt(np.array([np.nan], dtype=dt, ndmin=dims)), - pdt(np.array([5], dtype=dt, ndmin=dims))] - x = pd.concat(dfs) - assert x.values.dtype == dt + # GH 13247 + dims = pdt().ndim + dfs = [pdt(np.array([1], dtype=dt, ndmin=dims)), + pdt(np.array([np.nan], dtype=dt, ndmin=dims)), + pdt(np.array([5], dtype=dt, ndmin=dims))] + x = pd.concat(dfs) + assert x.values.dtype == dt @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel]) @pytest.mark.parametrize('dt', np.sctypes['int']) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_concat_will_upcast(dt, pdt): with catch_warnings(record=True): dims = pdt().ndim diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 3f4ccd7693a8f..ed9ad06a9b371 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -from warnings import catch_warnings import pytest from collections import OrderedDict @@ -501,12 +500,12 @@ def test_get_dummies_duplicate_columns(self, df): class TestCategoricalReshape(object): + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_reshaping_panel_categorical(self): - with catch_warnings(record=True): - p = tm.makePanel() - p['str'] = 'foo' - df = p.to_frame() + p = tm.makePanel() + p['str'] = 'foo' + df = p.to_frame() df['category'] = df['str'].astype('category') result = df['category'].unstack() diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index bcea47f42056b..d1f022ef982c0 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -383,6 +383,8 @@ def test_getitem_setitem_periodindex(): assert_series_equal(result, ts) +# FutureWarning from NumPy. +@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_getitem_median_slice_bug(): index = date_range('20090415', '20090519', freq='2B') s = Series(np.random.randn(13), index=index) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 25bc394e312a0..aa4f58089a933 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -390,6 +390,8 @@ def test_setslice(test_data): assert sl.index.is_unique +# FutureWarning from NumPy about [slice(None, 5). +@pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_basic_getitem_setitem_corner(test_data): # invalid tuples, e.g. td.ts[:, None] vs. td.ts[:, 2] with tm.assert_raises_regex(ValueError, 'tuple-index'): diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index d5d9e5f4f14de..9acd6501c3825 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1640,8 +1640,35 @@ def test_value_counts_categorical_not_ordered(self): tm.assert_series_equal(idx.value_counts(normalize=True), exp) +main_dtypes = [ + 'datetime', + 'datetimetz', + 'timedelta', + 'int8', + 'int16', + 'int32', + 'int64', + 'float32', + 'float64', + 'uint8', + 'uint16', + 'uint32', + 'uint64' +] + + @pytest.fixture def s_main_dtypes(): + """A DataFrame with many dtypes + + * datetime + * datetimetz + * timedelta + * [u]int{8,16,32,64} + * float{32,64} + + The columns are the name of the dtype. + """ df = pd.DataFrame( {'datetime': pd.to_datetime(['2003', '2002', '2001', '2002', @@ -1661,6 +1688,12 @@ def s_main_dtypes(): return df +@pytest.fixture(params=main_dtypes) +def s_main_dtypes_split(request, s_main_dtypes): + """Each series in s_main_dtypes.""" + return s_main_dtypes[request.param] + + class TestMode(object): @pytest.mark.parametrize('dropna, expected', [ @@ -1864,12 +1897,10 @@ def test_error(self, r): with tm.assert_raises_regex(TypeError, msg): method(arg) - @pytest.mark.parametrize( - "s", - [v for k, v in s_main_dtypes().iteritems()]) - def test_nsmallest_nlargest(self, s): + def test_nsmallest_nlargest(self, s_main_dtypes_split): # float, int, datetime64 (use i8), timedelts64 (same), # object that are numbers, object that are strings + s = s_main_dtypes_split assert_series_equal(s.nsmallest(2), s.iloc[[2, 1]]) assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index da9b03e81994d..3b82242626c20 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -1,6 +1,7 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 from collections import OrderedDict +import warnings import pydoc import pytest @@ -728,8 +729,12 @@ def test_dt_accessor_api_for_categorical(self): func_defs.append(f_def) for func, args, kwargs in func_defs: - res = getattr(c.dt, func)(*args, **kwargs) - exp = getattr(s.dt, func)(*args, **kwargs) + with warnings.catch_warnings(): + if func == 'to_period': + # dropping TZ + warnings.simplefilter("ignore", UserWarning) + res = getattr(c.dt, func)(*args, **kwargs) + exp = getattr(s.dt, func)(*args, **kwargs) if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 9faf47ace242d..4817f5bdccc29 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -957,6 +957,8 @@ def test_constructor_set(self): values = frozenset(values) pytest.raises(TypeError, Series, values) + # https://github.com/pandas-dev/pandas/issues/22698 + @pytest.mark.filterwarnings("ignore:elementwise comparison:FutureWarning") def test_fromDict(self): data = {'a': 0, 'b': 1, 'c': 2, 'd': 3} diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index dd1b623f0f7ff..7aecaf340a3e0 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -428,8 +428,10 @@ def test_astype_empty_constructor_equality(self, dtype): if dtype not in ('S', 'V'): # poor support (if any) currently with warnings.catch_warnings(record=True): - # Generic timestamp dtypes ('M' and 'm') are deprecated, - # but we test that already in series/test_constructors.py + if dtype in ('M', 'm'): + # Generic timestamp dtypes ('M' and 'm') are deprecated, + # but we test that already in series/test_constructors.py + warnings.simplefilter("ignore", FutureWarning) init_empty = Series([], dtype=dtype) as_type_empty = Series([]).astype(dtype) diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py index 30938966b5d1a..5e5a341ca76d6 100644 --- a/pandas/tests/sparse/frame/test_frame.py +++ b/pandas/tests/sparse/frame/test_frame.py @@ -3,7 +3,6 @@ import operator import pytest -from warnings import catch_warnings from numpy import nan import numpy as np import pandas as pd @@ -971,27 +970,26 @@ def _check(frame, orig): _check(float_frame_fill0, float_frame_fill0_dense) _check(float_frame_fill2, float_frame_fill2_dense) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_stack_sparse_frame(self, float_frame, float_frame_int_kind, float_frame_fill0, float_frame_fill2): - with catch_warnings(record=True): + def _check(frame): + dense_frame = frame.to_dense() # noqa - def _check(frame): - dense_frame = frame.to_dense() # noqa + wp = Panel.from_dict({'foo': frame}) + from_dense_lp = wp.to_frame() - wp = Panel.from_dict({'foo': frame}) - from_dense_lp = wp.to_frame() + from_sparse_lp = spf.stack_sparse_frame(frame) - from_sparse_lp = spf.stack_sparse_frame(frame) + tm.assert_numpy_array_equal(from_dense_lp.values, + from_sparse_lp.values) - tm.assert_numpy_array_equal(from_dense_lp.values, - from_sparse_lp.values) + _check(float_frame) + _check(float_frame_int_kind) - _check(float_frame) - _check(float_frame_int_kind) - - # for now - pytest.raises(Exception, _check, float_frame_fill0) - pytest.raises(Exception, _check, float_frame_fill2) + # for now + pytest.raises(Exception, _check, float_frame_fill0) + pytest.raises(Exception, _check, float_frame_fill2) def test_transpose(self, float_frame, float_frame_int_kind, float_frame_dense, diff --git a/pandas/tests/sparse/frame/test_to_from_scipy.py b/pandas/tests/sparse/frame/test_to_from_scipy.py index aef49c84fc2ad..a7f64bbe9a49f 100644 --- a/pandas/tests/sparse/frame/test_to_from_scipy.py +++ b/pandas/tests/sparse/frame/test_to_from_scipy.py @@ -1,6 +1,5 @@ import pytest import numpy as np -from warnings import catch_warnings from pandas.util import testing as tm from pandas import SparseDataFrame, SparseSeries from distutils.version import LooseVersion @@ -12,12 +11,16 @@ scipy = pytest.importorskip('scipy') +ignore_matrix_warning = pytest.mark.filterwarnings( + "ignore:the matrix subclass:PendingDeprecationWarning" +) @pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('def')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) +@ignore_matrix_warning def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): # GH 4343 # Make one ndarray and from it one sparse matrix, both to be used for @@ -69,6 +72,8 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +@ignore_matrix_warning +@pytest.mark.filterwarnings("ignore:object dtype is not supp:UserWarning") def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object @@ -108,8 +113,7 @@ def test_from_to_scipy_object(spmatrix, fill_value): tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense()) # Assert spmatrices equal - with catch_warnings(record=True): - assert dict(sdf.to_coo().todok()) == dict(spm.todok()) + assert dict(sdf.to_coo().todok()) == dict(spm.todok()) # Ensure dtype is preserved if possible res_dtype = object @@ -117,6 +121,7 @@ def test_from_to_scipy_object(spmatrix, fill_value): assert sdf.to_coo().dtype == res_dtype +@ignore_matrix_warning def test_from_scipy_correct_ordering(spmatrix): # GH 16179 arr = np.arange(1, 5).reshape(2, 2) @@ -135,6 +140,7 @@ def test_from_scipy_correct_ordering(spmatrix): tm.assert_frame_equal(sdf.to_dense(), expected.to_dense()) +@ignore_matrix_warning def test_from_scipy_fillna(spmatrix): # GH 16112 arr = np.eye(3) diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py index 921c30234660f..5b50606bf37bd 100644 --- a/pandas/tests/sparse/series/test_series.py +++ b/pandas/tests/sparse/series/test_series.py @@ -1022,6 +1022,9 @@ def test_round_trip_preserve_multiindex_names(self): @td.skip_if_no_scipy +@pytest.mark.filterwarnings( + "ignore:the matrix subclass:PendingDeprecationWarning" +) class TestSparseSeriesScipyInteraction(object): # Issue 8048: add SparseSeries coo methods diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 70973801d7cda..abcfa4b320b22 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -62,6 +62,8 @@ def test_oo_optimizable(): @tm.network +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't:ImportWarning") def test_statsmodels(): statsmodels = import_module('statsmodels') # noqa @@ -71,6 +73,8 @@ def test_statsmodels(): smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=df).fit() +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't:ImportWarning") def test_scikit_learn(df): sklearn = import_module('sklearn') # noqa @@ -82,7 +86,9 @@ def test_scikit_learn(df): clf.predict(digits.data[-1:]) +# Cython import warning and traitlets @tm.network +@pytest.mark.filterwarnings("ignore") def test_seaborn(): seaborn = import_module('seaborn') @@ -104,6 +110,10 @@ def test_pandas_datareader(): 'F', 'quandl', '2017-01-01', '2017-02-01') +# importing from pandas, Cython import warning +@pytest.mark.filterwarnings("ignore:The 'warn':DeprecationWarning") +@pytest.mark.filterwarnings("ignore:pandas.util:DeprecationWarning") +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") def test_geopandas(): geopandas = import_module('geopandas') # noqa @@ -111,6 +121,8 @@ def test_geopandas(): assert geopandas.read_file(fp) is not None +# Cython import warning +@pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") def test_pyarrow(df): pyarrow = import_module('pyarrow') # noqa diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 7f9cddf9859a5..76e003c463e7d 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import pytest -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import pandas # noqa import pandas as pd from pandas.errors import AbstractMethodError @@ -48,6 +48,7 @@ def test_error_rename(): pass with catch_warnings(record=True): + simplefilter("ignore") try: raise ParserError() except pd.parser.CParserError: diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 468463d3eba5f..c101fd25ce5e5 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -2,7 +2,7 @@ from __future__ import print_function # pylint: disable-msg=W0612,E1101 -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import re import operator import pytest @@ -38,6 +38,7 @@ columns=list('ABCD'), dtype='int64') with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) _frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=(_frame.copy() + 3), ItemC=_frame.copy(), @@ -191,6 +192,7 @@ def test_integer_arithmetic_series(self): self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0]) @pytest.mark.slow + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_integer_panel(self): self.run_panel(_integer2_panel, np.random.randint(1, 100)) @@ -201,6 +203,7 @@ def test_float_arithmetic_series(self): self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0]) @pytest.mark.slow + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_float_panel(self): self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8) @@ -215,6 +218,7 @@ def test_mixed_arithmetic_series(self): self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4) @pytest.mark.slow + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_mixed_panel(self): self.run_panel(_mixed2_panel, np.random.randint(1, 100), binary_comp=-2) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index ecd0af9c13d34..1718c6beaef55 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101,W0141 -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter import datetime import itertools import pytest @@ -194,6 +194,7 @@ def test_reindex(self): tm.assert_frame_equal(reindexed, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) reindexed = self.frame.ix[[('foo', 'one'), ('bar', 'one')]] tm.assert_frame_equal(reindexed, expected) @@ -206,6 +207,7 @@ def test_reindex_preserve_levels(self): assert chunk.index is new_index with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) chunk = self.ymd.ix[new_index] assert chunk.index is new_index @@ -269,6 +271,7 @@ def test_series_getitem(self): tm.assert_series_equal(result, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] tm.assert_series_equal(result, expected) @@ -348,6 +351,7 @@ def test_frame_getitem_setitem_multislice(self): tm.assert_series_equal(df['value'], result) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[:, 'value'] tm.assert_series_equal(df['value'], result) @@ -423,6 +427,7 @@ def test_getitem_tuple_plus_slice(self): expected = idf.loc[0, 0] expected2 = idf.xs((0, 0)) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) expected3 = idf.ix[0, 0] tm.assert_series_equal(result, expected) @@ -684,6 +689,7 @@ def test_frame_setitem_ix(self): assert df.loc[('bar', 'two'), 1] == 7 with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) df = self.frame.copy() df.columns = lrange(3) df.ix[('bar', 'two'), 1] = 7 @@ -713,6 +719,7 @@ def test_getitem_partial_column_select(self): tm.assert_frame_equal(result, expected) with catch_warnings(record=True): + simplefilter("ignore", DeprecationWarning) result = df.ix[('a', 'y'), [1, 0]] tm.assert_frame_equal(result, expected) @@ -1294,6 +1301,7 @@ def test_swaplevel(self): def test_swaplevel_panel(self): with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) expected = panel.copy() expected.major_axis = expected.major_axis.swaplevel(0, 1) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index a70ee80aee180..b6c2c65fb6dce 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -359,6 +359,7 @@ def test_returned_dtype(self): def test_nanmedian(self): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) self.check_funs(nanops.nanmedian, np.median, allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=True, allow_obj='convert') @@ -394,12 +395,14 @@ def _minmax_wrap(self, value, axis=None, func=None): def test_nanmin(self): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._minmax_wrap, func=np.min) self.check_funs(nanops.nanmin, func, allow_str=False, allow_obj=False) def test_nanmax(self): - with warnings.catch_warnings(record=True): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._minmax_wrap, func=np.max) self.check_funs(nanops.nanmax, func, allow_str=False, allow_obj=False) @@ -417,6 +420,7 @@ def _argminmax_wrap(self, value, axis=None, func=None): def test_nanargmax(self): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._argminmax_wrap, func=np.argmax) self.check_funs(nanops.nanargmax, func, allow_str=False, allow_obj=False, @@ -424,6 +428,7 @@ def test_nanargmax(self): def test_nanargmin(self): with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) func = partial(self._argminmax_wrap, func=np.argmin) self.check_funs(nanops.nanargmin, func, allow_str=False, allow_obj=False) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index b968c52ce3dfd..51c779c6a97a3 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=W0612,E1101 -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from datetime import datetime import operator import pytest @@ -30,49 +30,47 @@ def make_test_panel(): with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) _panel = tm.makePanel() tm.add_nans(_panel) _panel = _panel.copy() return _panel +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class PanelTests(object): panel = None def test_pickle(self): - with catch_warnings(record=True): - unpickled = tm.round_trip_pickle(self.panel) - assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) + unpickled = tm.round_trip_pickle(self.panel) + assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) def test_rank(self): - with catch_warnings(record=True): - pytest.raises(NotImplementedError, lambda: self.panel.rank()) + pytest.raises(NotImplementedError, lambda: self.panel.rank()) def test_cumsum(self): - with catch_warnings(record=True): - cumsum = self.panel.cumsum() - assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) + cumsum = self.panel.cumsum() + assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum()) def not_hashable(self): - with catch_warnings(record=True): - c_empty = Panel() - c = Panel(Panel([[[1]]])) - pytest.raises(TypeError, hash, c_empty) - pytest.raises(TypeError, hash, c) + c_empty = Panel() + c = Panel(Panel([[[1]]])) + pytest.raises(TypeError, hash, c_empty) + pytest.raises(TypeError, hash, c) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class SafeForLongAndSparse(object): def test_repr(self): repr(self.panel) def test_copy_names(self): - with catch_warnings(record=True): - for attr in ('major_axis', 'minor_axis'): - getattr(self.panel, attr).name = None - cp = self.panel.copy() - getattr(cp, attr).name = 'foo' - assert getattr(self.panel, attr).name is None + for attr in ('major_axis', 'minor_axis'): + getattr(self.panel, attr).name = None + cp = self.panel.copy() + getattr(cp, attr).name = 'foo' + assert getattr(self.panel, attr).name is None def test_iter(self): tm.equalContents(list(self.panel), self.panel.items) @@ -91,6 +89,8 @@ def test_mean(self): def test_prod(self): self._check_stat_op('prod', np.prod, skipna_alternative=np.nanprod) + @pytest.mark.filterwarnings("ignore:Invalid value:RuntimeWarning") + @pytest.mark.filterwarnings("ignore:All-NaN:RuntimeWarning") def test_median(self): def wrapper(x): if isna(x).any(): @@ -99,13 +99,13 @@ def wrapper(x): self._check_stat_op('median', wrapper) + @pytest.mark.filterwarnings("ignore:Invalid value:RuntimeWarning") def test_min(self): - with catch_warnings(record=True): - self._check_stat_op('min', np.min) + self._check_stat_op('min', np.min) + @pytest.mark.filterwarnings("ignore:Invalid value:RuntimeWarning") def test_max(self): - with catch_warnings(record=True): - self._check_stat_op('max', np.max) + self._check_stat_op('max', np.max) @td.skip_if_no_scipy def test_skew(self): @@ -181,6 +181,7 @@ def wrapper(x): numeric_only=True) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class SafeForSparse(object): def test_get_axis(self): @@ -240,48 +241,46 @@ def test_get_plane_axes(self): index, columns = self.panel._get_plane_axes(0) def test_truncate(self): - with catch_warnings(record=True): - dates = self.panel.major_axis - start, end = dates[1], dates[5] + dates = self.panel.major_axis + start, end = dates[1], dates[5] - trunced = self.panel.truncate(start, end, axis='major') - expected = self.panel['ItemA'].truncate(start, end) + trunced = self.panel.truncate(start, end, axis='major') + expected = self.panel['ItemA'].truncate(start, end) - assert_frame_equal(trunced['ItemA'], expected) + assert_frame_equal(trunced['ItemA'], expected) - trunced = self.panel.truncate(before=start, axis='major') - expected = self.panel['ItemA'].truncate(before=start) + trunced = self.panel.truncate(before=start, axis='major') + expected = self.panel['ItemA'].truncate(before=start) - assert_frame_equal(trunced['ItemA'], expected) + assert_frame_equal(trunced['ItemA'], expected) - trunced = self.panel.truncate(after=end, axis='major') - expected = self.panel['ItemA'].truncate(after=end) + trunced = self.panel.truncate(after=end, axis='major') + expected = self.panel['ItemA'].truncate(after=end) - assert_frame_equal(trunced['ItemA'], expected) + assert_frame_equal(trunced['ItemA'], expected) def test_arith(self): - with catch_warnings(record=True): - self._test_op(self.panel, operator.add) - self._test_op(self.panel, operator.sub) - self._test_op(self.panel, operator.mul) - self._test_op(self.panel, operator.truediv) - self._test_op(self.panel, operator.floordiv) - self._test_op(self.panel, operator.pow) - - self._test_op(self.panel, lambda x, y: y + x) - self._test_op(self.panel, lambda x, y: y - x) - self._test_op(self.panel, lambda x, y: y * x) - self._test_op(self.panel, lambda x, y: y / x) - self._test_op(self.panel, lambda x, y: y ** x) - - self._test_op(self.panel, lambda x, y: x + y) # panel + 1 - self._test_op(self.panel, lambda x, y: x - y) # panel - 1 - self._test_op(self.panel, lambda x, y: x * y) # panel * 1 - self._test_op(self.panel, lambda x, y: x / y) # panel / 1 - self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 - - pytest.raises(Exception, self.panel.__add__, - self.panel['ItemA']) + self._test_op(self.panel, operator.add) + self._test_op(self.panel, operator.sub) + self._test_op(self.panel, operator.mul) + self._test_op(self.panel, operator.truediv) + self._test_op(self.panel, operator.floordiv) + self._test_op(self.panel, operator.pow) + + self._test_op(self.panel, lambda x, y: y + x) + self._test_op(self.panel, lambda x, y: y - x) + self._test_op(self.panel, lambda x, y: y * x) + self._test_op(self.panel, lambda x, y: y / x) + self._test_op(self.panel, lambda x, y: y ** x) + + self._test_op(self.panel, lambda x, y: x + y) # panel + 1 + self._test_op(self.panel, lambda x, y: x - y) # panel - 1 + self._test_op(self.panel, lambda x, y: x * y) # panel * 1 + self._test_op(self.panel, lambda x, y: x / y) # panel / 1 + self._test_op(self.panel, lambda x, y: x ** y) # panel ** 1 + + pytest.raises(Exception, self.panel.__add__, + self.panel['ItemA']) @staticmethod def _test_op(panel, op): @@ -300,100 +299,99 @@ def test_iteritems(self): assert len(list(self.panel.iteritems())) == len(self.panel.items) def test_combineFrame(self): - with catch_warnings(record=True): - def check_op(op, name): - # items - df = self.panel['ItemA'] + def check_op(op, name): + # items + df = self.panel['ItemA'] - func = getattr(self.panel, name) + func = getattr(self.panel, name) - result = func(df, axis='items') + result = func(df, axis='items') - assert_frame_equal( - result['ItemB'], op(self.panel['ItemB'], df)) + assert_frame_equal( + result['ItemB'], op(self.panel['ItemB'], df)) - # major - xs = self.panel.major_xs(self.panel.major_axis[0]) - result = func(xs, axis='major') + # major + xs = self.panel.major_xs(self.panel.major_axis[0]) + result = func(xs, axis='major') - idx = self.panel.major_axis[1] + idx = self.panel.major_axis[1] - assert_frame_equal(result.major_xs(idx), - op(self.panel.major_xs(idx), xs)) + assert_frame_equal(result.major_xs(idx), + op(self.panel.major_xs(idx), xs)) - # minor - xs = self.panel.minor_xs(self.panel.minor_axis[0]) - result = func(xs, axis='minor') + # minor + xs = self.panel.minor_xs(self.panel.minor_axis[0]) + result = func(xs, axis='minor') - idx = self.panel.minor_axis[1] + idx = self.panel.minor_axis[1] - assert_frame_equal(result.minor_xs(idx), - op(self.panel.minor_xs(idx), xs)) + assert_frame_equal(result.minor_xs(idx), + op(self.panel.minor_xs(idx), xs)) - ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] - if not compat.PY3: - ops.append('div') + ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'pow', 'mod'] + if not compat.PY3: + ops.append('div') - for op in ops: - try: - check_op(getattr(operator, op), op) - except: - pprint_thing("Failing operation: %r" % op) - raise - if compat.PY3: - try: - check_op(operator.truediv, 'div') - except: - pprint_thing("Failing operation: %r" % 'div') - raise + for op in ops: + try: + check_op(getattr(operator, op), op) + except: + pprint_thing("Failing operation: %r" % op) + raise + if compat.PY3: + try: + check_op(operator.truediv, 'div') + except: + pprint_thing("Failing operation: %r" % 'div') + raise def test_combinePanel(self): - with catch_warnings(record=True): - result = self.panel.add(self.panel) - assert_panel_equal(result, self.panel * 2) + result = self.panel.add(self.panel) + assert_panel_equal(result, self.panel * 2) def test_neg(self): - with catch_warnings(record=True): - assert_panel_equal(-self.panel, self.panel * -1) + assert_panel_equal(-self.panel, self.panel * -1) # issue 7692 def test_raise_when_not_implemented(self): - with catch_warnings(record=True): - p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), - items=['ItemA', 'ItemB', 'ItemC'], - major_axis=date_range('20130101', periods=4), - minor_axis=list('ABCDE')) - d = p.sum(axis=1).iloc[0] - ops = ['add', 'sub', 'mul', 'truediv', - 'floordiv', 'div', 'mod', 'pow'] - for op in ops: - with pytest.raises(NotImplementedError): - getattr(p, op)(d, axis=0) + p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), + items=['ItemA', 'ItemB', 'ItemC'], + major_axis=date_range('20130101', periods=4), + minor_axis=list('ABCDE')) + d = p.sum(axis=1).iloc[0] + ops = ['add', 'sub', 'mul', 'truediv', + 'floordiv', 'div', 'mod', 'pow'] + for op in ops: + with pytest.raises(NotImplementedError): + getattr(p, op)(d, axis=0) def test_select(self): - with catch_warnings(record=True): - p = self.panel + p = self.panel - # select items + # select items + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = p.select(lambda x: x in ('ItemA', 'ItemC'), axis='items') - expected = p.reindex(items=['ItemA', 'ItemC']) - assert_panel_equal(result, expected) + expected = p.reindex(items=['ItemA', 'ItemC']) + assert_panel_equal(result, expected) - # select major_axis + # select major_axis + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = p.select(lambda x: x >= datetime( 2000, 1, 15), axis='major') - new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] - expected = p.reindex(major=new_major) - assert_panel_equal(result, expected) + new_major = p.major_axis[p.major_axis >= datetime(2000, 1, 15)] + expected = p.reindex(major=new_major) + assert_panel_equal(result, expected) - # select minor_axis + # select minor_axis + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = p.select(lambda x: x in ('D', 'A'), axis=2) - expected = p.reindex(minor=['A', 'D']) - assert_panel_equal(result, expected) + expected = p.reindex(minor=['A', 'D']) + assert_panel_equal(result, expected) - # corner case, empty thing + # corner case, empty thing + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = p.select(lambda x: x in ('foo', ), axis='items') - assert_panel_equal(result, p.reindex(items=[])) + assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): for item in self.panel.items: @@ -407,211 +405,204 @@ def test_get_value(self): def test_abs(self): - with catch_warnings(record=True): - result = self.panel.abs() - result2 = abs(self.panel) - expected = np.abs(self.panel) - assert_panel_equal(result, expected) - assert_panel_equal(result2, expected) + result = self.panel.abs() + result2 = abs(self.panel) + expected = np.abs(self.panel) + assert_panel_equal(result, expected) + assert_panel_equal(result2, expected) - df = self.panel['ItemA'] - result = df.abs() - result2 = abs(df) - expected = np.abs(df) - assert_frame_equal(result, expected) - assert_frame_equal(result2, expected) - - s = df['A'] - result = s.abs() - result2 = abs(s) - expected = np.abs(s) - assert_series_equal(result, expected) - assert_series_equal(result2, expected) - assert result.name == 'A' - assert result2.name == 'A' + df = self.panel['ItemA'] + result = df.abs() + result2 = abs(df) + expected = np.abs(df) + assert_frame_equal(result, expected) + assert_frame_equal(result2, expected) + + s = df['A'] + result = s.abs() + result2 = abs(s) + expected = np.abs(s) + assert_series_equal(result, expected) + assert_series_equal(result2, expected) + assert result.name == 'A' + assert result2.name == 'A' +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class CheckIndexing(object): def test_getitem(self): pytest.raises(Exception, self.panel.__getitem__, 'ItemQ') def test_delitem_and_pop(self): - with catch_warnings(record=True): - expected = self.panel['ItemA'] - result = self.panel.pop('ItemA') - assert_frame_equal(expected, result) - assert 'ItemA' not in self.panel.items + expected = self.panel['ItemA'] + result = self.panel.pop('ItemA') + assert_frame_equal(expected, result) + assert 'ItemA' not in self.panel.items - del self.panel['ItemB'] - assert 'ItemB' not in self.panel.items - pytest.raises(Exception, self.panel.__delitem__, 'ItemB') + del self.panel['ItemB'] + assert 'ItemB' not in self.panel.items + pytest.raises(Exception, self.panel.__delitem__, 'ItemB') - values = np.empty((3, 3, 3)) - values[0] = 0 - values[1] = 1 - values[2] = 2 + values = np.empty((3, 3, 3)) + values[0] = 0 + values[1] = 1 + values[2] = 2 - panel = Panel(values, lrange(3), lrange(3), lrange(3)) + panel = Panel(values, lrange(3), lrange(3), lrange(3)) - # did we delete the right row? + # did we delete the right row? - panelc = panel.copy() - del panelc[0] - tm.assert_frame_equal(panelc[1], panel[1]) - tm.assert_frame_equal(panelc[2], panel[2]) + panelc = panel.copy() + del panelc[0] + tm.assert_frame_equal(panelc[1], panel[1]) + tm.assert_frame_equal(panelc[2], panel[2]) - panelc = panel.copy() - del panelc[1] - tm.assert_frame_equal(panelc[0], panel[0]) - tm.assert_frame_equal(panelc[2], panel[2]) + panelc = panel.copy() + del panelc[1] + tm.assert_frame_equal(panelc[0], panel[0]) + tm.assert_frame_equal(panelc[2], panel[2]) - panelc = panel.copy() - del panelc[2] - tm.assert_frame_equal(panelc[1], panel[1]) - tm.assert_frame_equal(panelc[0], panel[0]) + panelc = panel.copy() + del panelc[2] + tm.assert_frame_equal(panelc[1], panel[1]) + tm.assert_frame_equal(panelc[0], panel[0]) def test_setitem(self): - with catch_warnings(record=True): - lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() - with pytest.raises(ValueError): - self.panel['ItemE'] = lp - - # DataFrame - df = self.panel['ItemA'][2:].filter(items=['A', 'B']) - self.panel['ItemF'] = df - self.panel['ItemE'] = df - - df2 = self.panel['ItemF'] - - assert_frame_equal(df, df2.reindex( - index=df.index, columns=df.columns)) - - # scalar - self.panel['ItemG'] = 1 - self.panel['ItemE'] = True - assert self.panel['ItemG'].values.dtype == np.int64 - assert self.panel['ItemE'].values.dtype == np.bool_ - - # object dtype - self.panel['ItemQ'] = 'foo' - assert self.panel['ItemQ'].values.dtype == np.object_ - - # boolean dtype - self.panel['ItemP'] = self.panel['ItemA'] > 0 - assert self.panel['ItemP'].values.dtype == np.bool_ - - pytest.raises(TypeError, self.panel.__setitem__, 'foo', - self.panel.loc[['ItemP']]) - - # bad shape - p = Panel(np.random.randn(4, 3, 2)) - with tm.assert_raises_regex(ValueError, - r"shape of value must be " - r"\(3, 2\), shape of given " - r"object was \(4, 2\)"): - p[0] = np.random.randn(4, 2) + lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() + with pytest.raises(ValueError): + self.panel['ItemE'] = lp + + # DataFrame + df = self.panel['ItemA'][2:].filter(items=['A', 'B']) + self.panel['ItemF'] = df + self.panel['ItemE'] = df + + df2 = self.panel['ItemF'] + + assert_frame_equal(df, df2.reindex( + index=df.index, columns=df.columns)) + + # scalar + self.panel['ItemG'] = 1 + self.panel['ItemE'] = True + assert self.panel['ItemG'].values.dtype == np.int64 + assert self.panel['ItemE'].values.dtype == np.bool_ + + # object dtype + self.panel['ItemQ'] = 'foo' + assert self.panel['ItemQ'].values.dtype == np.object_ + + # boolean dtype + self.panel['ItemP'] = self.panel['ItemA'] > 0 + assert self.panel['ItemP'].values.dtype == np.bool_ + + pytest.raises(TypeError, self.panel.__setitem__, 'foo', + self.panel.loc[['ItemP']]) + + # bad shape + p = Panel(np.random.randn(4, 3, 2)) + with tm.assert_raises_regex(ValueError, + r"shape of value must be " + r"\(3, 2\), shape of given " + r"object was \(4, 2\)"): + p[0] = np.random.randn(4, 2) def test_setitem_ndarray(self): - with catch_warnings(record=True): - timeidx = date_range(start=datetime(2009, 1, 1), - end=datetime(2009, 12, 31), - freq=MonthEnd()) - lons_coarse = np.linspace(-177.5, 177.5, 72) - lats_coarse = np.linspace(-87.5, 87.5, 36) - P = Panel(items=timeidx, major_axis=lons_coarse, - minor_axis=lats_coarse) - data = np.random.randn(72 * 36).reshape((72, 36)) - key = datetime(2009, 2, 28) - P[key] = data - - assert_almost_equal(P[key].values, data) + timeidx = date_range(start=datetime(2009, 1, 1), + end=datetime(2009, 12, 31), + freq=MonthEnd()) + lons_coarse = np.linspace(-177.5, 177.5, 72) + lats_coarse = np.linspace(-87.5, 87.5, 36) + P = Panel(items=timeidx, major_axis=lons_coarse, + minor_axis=lats_coarse) + data = np.random.randn(72 * 36).reshape((72, 36)) + key = datetime(2009, 2, 28) + P[key] = data + + assert_almost_equal(P[key].values, data) def test_set_minor_major(self): - with catch_warnings(record=True): - # GH 11014 - df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) - df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) - panel = Panel({'Item1': df1, 'Item2': df2}) - - newminor = notna(panel.iloc[:, :, 0]) - panel.loc[:, :, 'NewMinor'] = newminor - assert_frame_equal(panel.loc[:, :, 'NewMinor'], - newminor.astype(object)) - - newmajor = notna(panel.iloc[:, 0, :]) - panel.loc[:, 'NewMajor', :] = newmajor - assert_frame_equal(panel.loc[:, 'NewMajor', :], - newmajor.astype(object)) + # GH 11014 + df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) + df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) + panel = Panel({'Item1': df1, 'Item2': df2}) + + newminor = notna(panel.iloc[:, :, 0]) + panel.loc[:, :, 'NewMinor'] = newminor + assert_frame_equal(panel.loc[:, :, 'NewMinor'], + newminor.astype(object)) + + newmajor = notna(panel.iloc[:, 0, :]) + panel.loc[:, 'NewMajor', :] = newmajor + assert_frame_equal(panel.loc[:, 'NewMajor', :], + newmajor.astype(object)) def test_major_xs(self): - with catch_warnings(record=True): - ref = self.panel['ItemA'] + ref = self.panel['ItemA'] - idx = self.panel.major_axis[5] - xs = self.panel.major_xs(idx) + idx = self.panel.major_axis[5] + xs = self.panel.major_xs(idx) - result = xs['ItemA'] - assert_series_equal(result, ref.xs(idx), check_names=False) - assert result.name == 'ItemA' + result = xs['ItemA'] + assert_series_equal(result, ref.xs(idx), check_names=False) + assert result.name == 'ItemA' - # not contained - idx = self.panel.major_axis[0] - BDay() - pytest.raises(Exception, self.panel.major_xs, idx) + # not contained + idx = self.panel.major_axis[0] - BDay() + pytest.raises(Exception, self.panel.major_xs, idx) def test_major_xs_mixed(self): - with catch_warnings(record=True): - self.panel['ItemD'] = 'foo' - xs = self.panel.major_xs(self.panel.major_axis[0]) - assert xs['ItemA'].dtype == np.float64 - assert xs['ItemD'].dtype == np.object_ + self.panel['ItemD'] = 'foo' + xs = self.panel.major_xs(self.panel.major_axis[0]) + assert xs['ItemA'].dtype == np.float64 + assert xs['ItemD'].dtype == np.object_ def test_minor_xs(self): - with catch_warnings(record=True): - ref = self.panel['ItemA'] + ref = self.panel['ItemA'] - idx = self.panel.minor_axis[1] - xs = self.panel.minor_xs(idx) + idx = self.panel.minor_axis[1] + xs = self.panel.minor_xs(idx) - assert_series_equal(xs['ItemA'], ref[idx], check_names=False) + assert_series_equal(xs['ItemA'], ref[idx], check_names=False) - # not contained - pytest.raises(Exception, self.panel.minor_xs, 'E') + # not contained + pytest.raises(Exception, self.panel.minor_xs, 'E') def test_minor_xs_mixed(self): - with catch_warnings(record=True): - self.panel['ItemD'] = 'foo' + self.panel['ItemD'] = 'foo' - xs = self.panel.minor_xs('D') - assert xs['ItemA'].dtype == np.float64 - assert xs['ItemD'].dtype == np.object_ + xs = self.panel.minor_xs('D') + assert xs['ItemA'].dtype == np.float64 + assert xs['ItemD'].dtype == np.object_ def test_xs(self): - with catch_warnings(record=True): - itemA = self.panel.xs('ItemA', axis=0) - expected = self.panel['ItemA'] - tm.assert_frame_equal(itemA, expected) + itemA = self.panel.xs('ItemA', axis=0) + expected = self.panel['ItemA'] + tm.assert_frame_equal(itemA, expected) - # Get a view by default. - itemA_view = self.panel.xs('ItemA', axis=0) - itemA_view.values[:] = np.nan + # Get a view by default. + itemA_view = self.panel.xs('ItemA', axis=0) + itemA_view.values[:] = np.nan - assert np.isnan(self.panel['ItemA'].values).all() + assert np.isnan(self.panel['ItemA'].values).all() - # Mixed-type yields a copy. - self.panel['strings'] = 'foo' - result = self.panel.xs('D', axis=2) - assert result._is_copy is not None + # Mixed-type yields a copy. + self.panel['strings'] = 'foo' + result = self.panel.xs('D', axis=2) + assert result._is_copy is not None def test_getitem_fancy_labels(self): - with catch_warnings(record=True): - p = self.panel + p = self.panel - items = p.items[[1, 0]] - dates = p.major_axis[::2] - cols = ['D', 'C', 'F'] + items = p.items[[1, 0]] + dates = p.major_axis[::2] + cols = ['D', 'C', 'F'] - # all 3 specified + # all 3 specified + with catch_warnings(): + simplefilter("ignore", FutureWarning) + # XXX: warning in _validate_read_indexer assert_panel_equal(p.loc[items, dates, cols], p.reindex(items=items, major=dates, minor=cols)) @@ -670,132 +661,127 @@ def test_getitem_fancy_xs(self): assert_series_equal(p.loc[:, date, col], p.major_xs(date).loc[col]) def test_getitem_fancy_xs_check_view(self): - with catch_warnings(record=True): - item = 'ItemB' - date = self.panel.major_axis[5] - - # make sure it's always a view - NS = slice(None, None) - - # DataFrames - comp = assert_frame_equal - self._check_view(item, comp) - self._check_view((item, NS), comp) - self._check_view((item, NS, NS), comp) - self._check_view((NS, date), comp) - self._check_view((NS, date, NS), comp) - self._check_view((NS, NS, 'C'), comp) - - # Series - comp = assert_series_equal - self._check_view((item, date), comp) - self._check_view((item, date, NS), comp) - self._check_view((item, NS, 'C'), comp) - self._check_view((NS, date, 'C'), comp) + item = 'ItemB' + date = self.panel.major_axis[5] + + # make sure it's always a view + NS = slice(None, None) + + # DataFrames + comp = assert_frame_equal + self._check_view(item, comp) + self._check_view((item, NS), comp) + self._check_view((item, NS, NS), comp) + self._check_view((NS, date), comp) + self._check_view((NS, date, NS), comp) + self._check_view((NS, NS, 'C'), comp) + + # Series + comp = assert_series_equal + self._check_view((item, date), comp) + self._check_view((item, date, NS), comp) + self._check_view((item, NS, 'C'), comp) + self._check_view((NS, date, 'C'), comp) def test_getitem_callable(self): - with catch_warnings(record=True): - p = self.panel - # GH 12533 + p = self.panel + # GH 12533 - assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) - assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], - p.loc[['ItemB', 'ItemC']]) + assert_frame_equal(p[lambda x: 'ItemB'], p.loc['ItemB']) + assert_panel_equal(p[lambda x: ['ItemB', 'ItemC']], + p.loc[['ItemB', 'ItemC']]) def test_ix_setitem_slice_dataframe(self): - with catch_warnings(record=True): - a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], - minor_axis=[111, 222, 333]) - b = DataFrame(np.random.randn(2, 3), index=[111, 333], - columns=[1, 2, 3]) + a = Panel(items=[1, 2, 3], major_axis=[11, 22, 33], + minor_axis=[111, 222, 333]) + b = DataFrame(np.random.randn(2, 3), index=[111, 333], + columns=[1, 2, 3]) - a.loc[:, 22, [111, 333]] = b + a.loc[:, 22, [111, 333]] = b - assert_frame_equal(a.loc[:, 22, [111, 333]], b) + assert_frame_equal(a.loc[:, 22, [111, 333]], b) def test_ix_align(self): - with catch_warnings(record=True): - from pandas import Series - b = Series(np.random.randn(10), name=0) - b.sort_values() - df_orig = Panel(np.random.randn(3, 10, 2)) - df = df_orig.copy() + from pandas import Series + b = Series(np.random.randn(10), name=0) + b.sort_values() + df_orig = Panel(np.random.randn(3, 10, 2)) + df = df_orig.copy() - df.loc[0, :, 0] = b - assert_series_equal(df.loc[0, :, 0].reindex(b.index), b) + df.loc[0, :, 0] = b + assert_series_equal(df.loc[0, :, 0].reindex(b.index), b) - df = df_orig.swapaxes(0, 1) - df.loc[:, 0, 0] = b - assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b) + df = df_orig.swapaxes(0, 1) + df.loc[:, 0, 0] = b + assert_series_equal(df.loc[:, 0, 0].reindex(b.index), b) - df = df_orig.swapaxes(1, 2) - df.loc[0, 0, :] = b - assert_series_equal(df.loc[0, 0, :].reindex(b.index), b) + df = df_orig.swapaxes(1, 2) + df.loc[0, 0, :] = b + assert_series_equal(df.loc[0, 0, :].reindex(b.index), b) def test_ix_frame_align(self): - with catch_warnings(record=True): - p_orig = tm.makePanel() - df = p_orig.iloc[0].copy() - assert_frame_equal(p_orig['ItemA'], df) - - p = p_orig.copy() - p.iloc[0, :, :] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0, :, :] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.loc['ItemA'] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.loc['ItemA', :, :] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p['ItemA'] = df - assert_panel_equal(p, p_orig) - - p = p_orig.copy() - p.iloc[0, [0, 1, 3, 5], -2:] = df - out = p.iloc[0, [0, 1, 3, 5], -2:] - assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) - - # GH3830, panel assignent by values/frame - for dtype in ['float64', 'int64']: - - panel = Panel(np.arange(40).reshape((2, 4, 5)), - items=['a1', 'a2'], dtype=dtype) - df1 = panel.iloc[0] - df2 = panel.iloc[1] - - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) - - # Assignment by Value Passes for 'a2' - panel.loc['a2'] = df1.values - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df1) - - # Assignment by DataFrame Ok w/o loc 'a2' - panel['a2'] = df2 - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) - - # Assignment by DataFrame Fails for 'a2' - panel.loc['a2'] = df2 - tm.assert_frame_equal(panel.loc['a1'], df1) - tm.assert_frame_equal(panel.loc['a2'], df2) + p_orig = tm.makePanel() + df = p_orig.iloc[0].copy() + assert_frame_equal(p_orig['ItemA'], df) + + p = p_orig.copy() + p.iloc[0, :, :] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0, :, :] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.loc['ItemA'] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.loc['ItemA', :, :] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p['ItemA'] = df + assert_panel_equal(p, p_orig) + + p = p_orig.copy() + p.iloc[0, [0, 1, 3, 5], -2:] = df + out = p.iloc[0, [0, 1, 3, 5], -2:] + assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) + + # GH3830, panel assignent by values/frame + for dtype in ['float64', 'int64']: + + panel = Panel(np.arange(40).reshape((2, 4, 5)), + items=['a1', 'a2'], dtype=dtype) + df1 = panel.iloc[0] + df2 = panel.iloc[1] + + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) + + # Assignment by Value Passes for 'a2' + panel.loc['a2'] = df1.values + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df1) + + # Assignment by DataFrame Ok w/o loc 'a2' + panel['a2'] = df2 + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) + + # Assignment by DataFrame Fails for 'a2' + panel.loc['a2'] = df2 + tm.assert_frame_equal(panel.loc['a1'], df1) + tm.assert_frame_equal(panel.loc['a2'], df2) def _check_view(self, indexer, comp): cp = self.panel.copy() @@ -805,83 +791,85 @@ def _check_view(self, indexer, comp): comp(cp.loc[indexer].reindex_like(obj), obj) def test_logical_with_nas(self): - with catch_warnings(record=True): - d = Panel({'ItemA': {'a': [np.nan, False]}, - 'ItemB': {'a': [True, True]}}) + d = Panel({'ItemA': {'a': [np.nan, False]}, + 'ItemB': {'a': [True, True]}}) - result = d['ItemA'] | d['ItemB'] - expected = DataFrame({'a': [np.nan, True]}) - assert_frame_equal(result, expected) + result = d['ItemA'] | d['ItemB'] + expected = DataFrame({'a': [np.nan, True]}) + assert_frame_equal(result, expected) - # this is autodowncasted here - result = d['ItemA'].fillna(False) | d['ItemB'] - expected = DataFrame({'a': [True, True]}) - assert_frame_equal(result, expected) + # this is autodowncasted here + result = d['ItemA'].fillna(False) | d['ItemB'] + expected = DataFrame({'a': [True, True]}) + assert_frame_equal(result, expected) def test_neg(self): - with catch_warnings(record=True): - assert_panel_equal(-self.panel, -1 * self.panel) + assert_panel_equal(-self.panel, -1 * self.panel) def test_invert(self): - with catch_warnings(record=True): - assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) + assert_panel_equal(-(self.panel < 0), ~(self.panel < 0)) def test_comparisons(self): - with catch_warnings(record=True): - p1 = tm.makePanel() - p2 = tm.makePanel() + p1 = tm.makePanel() + p2 = tm.makePanel() - tp = p1.reindex(items=p1.items + ['foo']) - df = p1[p1.items[0]] + tp = p1.reindex(items=p1.items + ['foo']) + df = p1[p1.items[0]] - def test_comp(func): + def test_comp(func): - # versus same index - result = func(p1, p2) - tm.assert_numpy_array_equal(result.values, - func(p1.values, p2.values)) + # versus same index + result = func(p1, p2) + tm.assert_numpy_array_equal(result.values, + func(p1.values, p2.values)) - # versus non-indexed same objs - pytest.raises(Exception, func, p1, tp) + # versus non-indexed same objs + pytest.raises(Exception, func, p1, tp) - # versus different objs - pytest.raises(Exception, func, p1, df) + # versus different objs + pytest.raises(Exception, func, p1, df) - # versus scalar - result3 = func(self.panel, 0) - tm.assert_numpy_array_equal(result3.values, - func(self.panel.values, 0)) + # versus scalar + result3 = func(self.panel, 0) + tm.assert_numpy_array_equal(result3.values, + func(self.panel.values, 0)) - with np.errstate(invalid='ignore'): - test_comp(operator.eq) - test_comp(operator.ne) - test_comp(operator.lt) - test_comp(operator.gt) - test_comp(operator.ge) - test_comp(operator.le) + with np.errstate(invalid='ignore'): + test_comp(operator.eq) + test_comp(operator.ne) + test_comp(operator.lt) + test_comp(operator.gt) + test_comp(operator.ge) + test_comp(operator.le) def test_get_value(self): - with catch_warnings(record=True): - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): result = self.panel.get_value(item, mjr, mnr) - expected = self.panel[item][mnr][mjr] - assert_almost_equal(result, expected) + expected = self.panel[item][mnr][mjr] + assert_almost_equal(result, expected) + with catch_warnings(): + simplefilter("ignore", FutureWarning) with tm.assert_raises_regex(TypeError, "There must be an argument " "for each axis"): self.panel.get_value('a') def test_set_value(self): - with catch_warnings(record=True): - for item in self.panel.items: - for mjr in self.panel.major_axis[::2]: - for mnr in self.panel.minor_axis: + for item in self.panel.items: + for mjr in self.panel.major_axis[::2]: + for mnr in self.panel.minor_axis: + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): self.panel.set_value(item, mjr, mnr, 1.) - tm.assert_almost_equal(self.panel[item][mnr][mjr], 1.) + tm.assert_almost_equal(self.panel[item][mnr][mjr], 1.) - # resize + # resize + with catch_warnings(): + simplefilter("ignore", FutureWarning) res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) assert isinstance(res, Panel) assert res is not self.panel @@ -896,6 +884,7 @@ def test_set_value(self): self.panel.set_value('a') +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestPanel(PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): @@ -906,314 +895,298 @@ def setup_method(self, method): self.panel.items.name = None def test_constructor(self): - with catch_warnings(record=True): - # with BlockManager - wp = Panel(self.panel._data) - assert wp._data is self.panel._data - - wp = Panel(self.panel._data, copy=True) - assert wp._data is not self.panel._data - tm.assert_panel_equal(wp, self.panel) - - # strings handled prop - wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) - assert wp.values.dtype == np.object_ - - vals = self.panel.values - - # no copy - wp = Panel(vals) - assert wp.values is vals - - # copy - wp = Panel(vals, copy=True) - assert wp.values is not vals - - # GH #8285, test when scalar data is used to construct a Panel - # if dtype is not passed, it should be inferred - value_and_dtype = [(1, 'int64'), (3.14, 'float64'), - ('foo', np.object_)] - for (val, dtype) in value_and_dtype: - wp = Panel(val, items=range(2), major_axis=range(3), - minor_axis=range(4)) - vals = np.empty((2, 3, 4), dtype=dtype) - vals.fill(val) - - tm.assert_panel_equal(wp, Panel(vals, dtype=dtype)) - - # test the case when dtype is passed - wp = Panel(1, items=range(2), major_axis=range(3), - minor_axis=range(4), - dtype='float32') - vals = np.empty((2, 3, 4), dtype='float32') - vals.fill(1) - - tm.assert_panel_equal(wp, Panel(vals, dtype='float32')) + # with BlockManager + wp = Panel(self.panel._data) + assert wp._data is self.panel._data + + wp = Panel(self.panel._data, copy=True) + assert wp._data is not self.panel._data + tm.assert_panel_equal(wp, self.panel) + + # strings handled prop + wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) + assert wp.values.dtype == np.object_ + + vals = self.panel.values + + # no copy + wp = Panel(vals) + assert wp.values is vals + + # copy + wp = Panel(vals, copy=True) + assert wp.values is not vals + + # GH #8285, test when scalar data is used to construct a Panel + # if dtype is not passed, it should be inferred + value_and_dtype = [(1, 'int64'), (3.14, 'float64'), + ('foo', np.object_)] + for (val, dtype) in value_and_dtype: + wp = Panel(val, items=range(2), major_axis=range(3), + minor_axis=range(4)) + vals = np.empty((2, 3, 4), dtype=dtype) + vals.fill(val) + + tm.assert_panel_equal(wp, Panel(vals, dtype=dtype)) + + # test the case when dtype is passed + wp = Panel(1, items=range(2), major_axis=range(3), + minor_axis=range(4), + dtype='float32') + vals = np.empty((2, 3, 4), dtype='float32') + vals.fill(1) + + tm.assert_panel_equal(wp, Panel(vals, dtype='float32')) def test_constructor_cast(self): - with catch_warnings(record=True): - zero_filled = self.panel.fillna(0) + zero_filled = self.panel.fillna(0) - casted = Panel(zero_filled._data, dtype=int) - casted2 = Panel(zero_filled.values, dtype=int) + casted = Panel(zero_filled._data, dtype=int) + casted2 = Panel(zero_filled.values, dtype=int) - exp_values = zero_filled.values.astype(int) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) + exp_values = zero_filled.values.astype(int) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) - casted = Panel(zero_filled._data, dtype=np.int32) - casted2 = Panel(zero_filled.values, dtype=np.int32) + casted = Panel(zero_filled._data, dtype=np.int32) + casted2 = Panel(zero_filled.values, dtype=np.int32) - exp_values = zero_filled.values.astype(np.int32) - assert_almost_equal(casted.values, exp_values) - assert_almost_equal(casted2.values, exp_values) + exp_values = zero_filled.values.astype(np.int32) + assert_almost_equal(casted.values, exp_values) + assert_almost_equal(casted2.values, exp_values) - # can't cast - data = [[['foo', 'bar', 'baz']]] - pytest.raises(ValueError, Panel, data, dtype=float) + # can't cast + data = [[['foo', 'bar', 'baz']]] + pytest.raises(ValueError, Panel, data, dtype=float) def test_constructor_empty_panel(self): - with catch_warnings(record=True): - empty = Panel() - assert len(empty.items) == 0 - assert len(empty.major_axis) == 0 - assert len(empty.minor_axis) == 0 + empty = Panel() + assert len(empty.items) == 0 + assert len(empty.major_axis) == 0 + assert len(empty.minor_axis) == 0 def test_constructor_observe_dtype(self): - with catch_warnings(record=True): - # GH #411 - panel = Panel(items=lrange(3), major_axis=lrange(3), - minor_axis=lrange(3), dtype='O') - assert panel.values.dtype == np.object_ + # GH #411 + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') + assert panel.values.dtype == np.object_ def test_constructor_dtypes(self): - with catch_warnings(record=True): - # GH #797 - - def _check_dtype(panel, dtype): - for i in panel.items: - assert panel[i].values.dtype.name == dtype - - # only nan holding types allowed here - for dtype in ['float64', 'float32', 'object']: - panel = Panel(items=lrange(2), major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), - items=lrange(2), - major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), - items=lrange(2), - major_axis=lrange(10), - minor_axis=lrange(5), dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - panel = Panel( - np.random.randn(2, 10, 5), - items=lrange(2), major_axis=lrange(10), - minor_axis=lrange(5), - dtype=dtype) - _check_dtype(panel, dtype) - - for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - df1 = DataFrame(np.random.randn(2, 5), - index=lrange(2), columns=lrange(5)) - df2 = DataFrame(np.random.randn(2, 5), - index=lrange(2), columns=lrange(5)) - panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) - _check_dtype(panel, dtype) + # GH #797 + + def _check_dtype(panel, dtype): + for i in panel.items: + assert panel[i].values.dtype.name == dtype + + # only nan holding types allowed here + for dtype in ['float64', 'float32', 'object']: + panel = Panel(items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel( + np.random.randn(2, 10, 5), + items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), + dtype=dtype) + _check_dtype(panel, dtype) + + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + df1 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + df2 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) + _check_dtype(panel, dtype) def test_constructor_fails_with_not_3d_input(self): - with catch_warnings(record=True): - with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa - Panel(np.random.randn(10, 2)) + with tm.assert_raises_regex(ValueError, "The number of dimensions required is 3"): # noqa + Panel(np.random.randn(10, 2)) def test_consolidate(self): - with catch_warnings(record=True): - assert self.panel._data.is_consolidated() + assert self.panel._data.is_consolidated() - self.panel['foo'] = 1. - assert not self.panel._data.is_consolidated() + self.panel['foo'] = 1. + assert not self.panel._data.is_consolidated() - panel = self.panel._consolidate() - assert panel._data.is_consolidated() + panel = self.panel._consolidate() + assert panel._data.is_consolidated() def test_ctor_dict(self): - with catch_warnings(record=True): - itema = self.panel['ItemA'] - itemb = self.panel['ItemB'] + itema = self.panel['ItemA'] + itemb = self.panel['ItemB'] - d = {'A': itema, 'B': itemb[5:]} - d2 = {'A': itema._series, 'B': itemb[5:]._series} - d3 = {'A': None, - 'B': DataFrame(itemb[5:]._series), - 'C': DataFrame(itema._series)} + d = {'A': itema, 'B': itemb[5:]} + d2 = {'A': itema._series, 'B': itemb[5:]._series} + d3 = {'A': None, + 'B': DataFrame(itemb[5:]._series), + 'C': DataFrame(itema._series)} - wp = Panel.from_dict(d) - wp2 = Panel.from_dict(d2) # nested Dict + wp = Panel.from_dict(d) + wp2 = Panel.from_dict(d2) # nested Dict - # TODO: unused? - wp3 = Panel.from_dict(d3) # noqa + # TODO: unused? + wp3 = Panel.from_dict(d3) # noqa - tm.assert_index_equal(wp.major_axis, self.panel.major_axis) - assert_panel_equal(wp, wp2) + tm.assert_index_equal(wp.major_axis, self.panel.major_axis) + assert_panel_equal(wp, wp2) - # intersect - wp = Panel.from_dict(d, intersect=True) - tm.assert_index_equal(wp.major_axis, itemb.index[5:]) + # intersect + wp = Panel.from_dict(d, intersect=True) + tm.assert_index_equal(wp.major_axis, itemb.index[5:]) - # use constructor - assert_panel_equal(Panel(d), Panel.from_dict(d)) - assert_panel_equal(Panel(d2), Panel.from_dict(d2)) - assert_panel_equal(Panel(d3), Panel.from_dict(d3)) + # use constructor + assert_panel_equal(Panel(d), Panel.from_dict(d)) + assert_panel_equal(Panel(d2), Panel.from_dict(d2)) + assert_panel_equal(Panel(d3), Panel.from_dict(d3)) - # a pathological case - d4 = {'A': None, 'B': None} + # a pathological case + d4 = {'A': None, 'B': None} - # TODO: unused? - wp4 = Panel.from_dict(d4) # noqa + # TODO: unused? + wp4 = Panel.from_dict(d4) # noqa - assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) + assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) - # cast - dcasted = {k: v.reindex(wp.major_axis).fillna(0) - for k, v in compat.iteritems(d)} - result = Panel(dcasted, dtype=int) - expected = Panel({k: v.astype(int) - for k, v in compat.iteritems(dcasted)}) - assert_panel_equal(result, expected) + # cast + dcasted = {k: v.reindex(wp.major_axis).fillna(0) + for k, v in compat.iteritems(d)} + result = Panel(dcasted, dtype=int) + expected = Panel({k: v.astype(int) + for k, v in compat.iteritems(dcasted)}) + assert_panel_equal(result, expected) - result = Panel(dcasted, dtype=np.int32) - expected = Panel({k: v.astype(np.int32) - for k, v in compat.iteritems(dcasted)}) - assert_panel_equal(result, expected) + result = Panel(dcasted, dtype=np.int32) + expected = Panel({k: v.astype(np.int32) + for k, v in compat.iteritems(dcasted)}) + assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - with catch_warnings(record=True): - data = {k: v.values for k, v in self.panel.iteritems()} - result = Panel(data) - exp_major = Index(np.arange(len(self.panel.major_axis))) - tm.assert_index_equal(result.major_axis, exp_major) + data = {k: v.values for k, v in self.panel.iteritems()} + result = Panel(data) + exp_major = Index(np.arange(len(self.panel.major_axis))) + tm.assert_index_equal(result.major_axis, exp_major) - result = Panel(data, items=self.panel.items, - major_axis=self.panel.major_axis, - minor_axis=self.panel.minor_axis) - assert_panel_equal(result, self.panel) + result = Panel(data, items=self.panel.items, + major_axis=self.panel.major_axis, + minor_axis=self.panel.minor_axis) + assert_panel_equal(result, self.panel) - data['ItemC'] = self.panel['ItemC'] - result = Panel(data) - assert_panel_equal(result, self.panel) + data['ItemC'] = self.panel['ItemC'] + result = Panel(data) + assert_panel_equal(result, self.panel) - # corner, blow up - data['ItemB'] = data['ItemB'][:-1] - pytest.raises(Exception, Panel, data) + # corner, blow up + data['ItemB'] = data['ItemB'][:-1] + pytest.raises(Exception, Panel, data) - data['ItemB'] = self.panel['ItemB'].values[:, :-1] - pytest.raises(Exception, Panel, data) + data['ItemB'] = self.panel['ItemB'].values[:, :-1] + pytest.raises(Exception, Panel, data) def test_ctor_orderedDict(self): - with catch_warnings(record=True): - keys = list(set(np.random.randint(0, 5000, 100)))[ - :50] # unique random int keys - d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) - p = Panel(d) - assert list(p.items) == keys + keys = list(set(np.random.randint(0, 5000, 100)))[ + :50] # unique random int keys + d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) + p = Panel(d) + assert list(p.items) == keys - p = Panel.from_dict(d) - assert list(p.items) == keys + p = Panel.from_dict(d) + assert list(p.items) == keys def test_constructor_resize(self): - with catch_warnings(record=True): - data = self.panel._data - items = self.panel.items[:-1] - major = self.panel.major_axis[:-1] - minor = self.panel.minor_axis[:-1] - - result = Panel(data, items=items, - major_axis=major, minor_axis=minor) - expected = self.panel.reindex( - items=items, major=major, minor=minor) - assert_panel_equal(result, expected) - - result = Panel(data, items=items, major_axis=major) - expected = self.panel.reindex(items=items, major=major) - assert_panel_equal(result, expected) - - result = Panel(data, items=items) - expected = self.panel.reindex(items=items) - assert_panel_equal(result, expected) - - result = Panel(data, minor_axis=minor) - expected = self.panel.reindex(minor=minor) - assert_panel_equal(result, expected) + data = self.panel._data + items = self.panel.items[:-1] + major = self.panel.major_axis[:-1] + minor = self.panel.minor_axis[:-1] + + result = Panel(data, items=items, + major_axis=major, minor_axis=minor) + expected = self.panel.reindex( + items=items, major=major, minor=minor) + assert_panel_equal(result, expected) + + result = Panel(data, items=items, major_axis=major) + expected = self.panel.reindex(items=items, major=major) + assert_panel_equal(result, expected) + + result = Panel(data, items=items) + expected = self.panel.reindex(items=items) + assert_panel_equal(result, expected) + + result = Panel(data, minor_axis=minor) + expected = self.panel.reindex(minor=minor) + assert_panel_equal(result, expected) def test_from_dict_mixed_orient(self): - with catch_warnings(record=True): - df = tm.makeDataFrame() - df['foo'] = 'bar' + df = tm.makeDataFrame() + df['foo'] = 'bar' - data = {'k1': df, 'k2': df} + data = {'k1': df, 'k2': df} - panel = Panel.from_dict(data, orient='minor') + panel = Panel.from_dict(data, orient='minor') - assert panel['foo'].values.dtype == np.object_ - assert panel['A'].values.dtype == np.float64 + assert panel['foo'].values.dtype == np.object_ + assert panel['A'].values.dtype == np.float64 def test_constructor_error_msgs(self): - with catch_warnings(record=True): - def testit(): - Panel(np.random.randn(3, 4, 5), - lrange(4), lrange(5), lrange(5)) - - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(4, 5, 5\)", - testit) - - def testit(): - Panel(np.random.randn(3, 4, 5), - lrange(5), lrange(4), lrange(5)) - - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(5, 4, 5\)", - testit) - - def testit(): - Panel(np.random.randn(3, 4, 5), - lrange(5), lrange(5), lrange(4)) - - tm.assert_raises_regex(ValueError, - r"Shape of passed values is " - r"\(3, 4, 5\), indices imply " - r"\(5, 5, 4\)", - testit) + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(4), lrange(5), lrange(5)) + + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(4, 5, 5\)", + testit) + + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(5), lrange(4), lrange(5)) + + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(5, 4, 5\)", + testit) + + def testit(): + Panel(np.random.randn(3, 4, 5), + lrange(5), lrange(5), lrange(4)) + + tm.assert_raises_regex(ValueError, + r"Shape of passed values is " + r"\(3, 4, 5\), indices imply " + r"\(5, 5, 4\)", + testit) def test_conform(self): - with catch_warnings(record=True): - df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) - conformed = self.panel.conform(df) + df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) + conformed = self.panel.conform(df) - tm.assert_index_equal(conformed.index, self.panel.major_axis) - tm.assert_index_equal(conformed.columns, self.panel.minor_axis) + tm.assert_index_equal(conformed.index, self.panel.major_axis) + tm.assert_index_equal(conformed.columns, self.panel.minor_axis) def test_convert_objects(self): - with catch_warnings(record=True): - - # GH 4937 - p = Panel(dict(A=dict(a=['1', '1.0']))) - expected = Panel(dict(A=dict(a=[1, 1.0]))) - result = p._convert(numeric=True, coerce=True) - assert_panel_equal(result, expected) + # GH 4937 + p = Panel(dict(A=dict(a=['1', '1.0']))) + expected = Panel(dict(A=dict(a=[1, 1.0]))) + result = p._convert(numeric=True, coerce=True) + assert_panel_equal(result, expected) def test_dtypes(self): @@ -1222,964 +1195,933 @@ def test_dtypes(self): assert_series_equal(result, expected) def test_astype(self): - with catch_warnings(record=True): - # GH7271 - data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) - panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f']) + # GH7271 + data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) + panel = Panel(data, ['a', 'b'], ['c', 'd'], ['e', 'f']) - str_data = np.array([[['1', '2'], ['3', '4']], - [['5', '6'], ['7', '8']]]) - expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) - assert_panel_equal(panel.astype(str), expected) + str_data = np.array([[['1', '2'], ['3', '4']], + [['5', '6'], ['7', '8']]]) + expected = Panel(str_data, ['a', 'b'], ['c', 'd'], ['e', 'f']) + assert_panel_equal(panel.astype(str), expected) - pytest.raises(NotImplementedError, panel.astype, {0: str}) + pytest.raises(NotImplementedError, panel.astype, {0: str}) def test_apply(self): - with catch_warnings(record=True): - # GH1148 - - # ufunc - applied = self.panel.apply(np.sqrt) - with np.errstate(invalid='ignore'): - expected = np.sqrt(self.panel.values) - assert_almost_equal(applied.values, expected) - - # ufunc same shape - result = self.panel.apply(lambda x: x * 2, axis='items') - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, axis='major_axis') - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, axis='minor_axis') - expected = self.panel * 2 - assert_panel_equal(result, expected) - - # reduction to DataFrame - result = self.panel.apply(lambda x: x.dtype, axis='items') - expected = DataFrame(np.dtype('float64'), - index=self.panel.major_axis, - columns=self.panel.minor_axis) - assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.dtype, axis='major_axis') - expected = DataFrame(np.dtype('float64'), - index=self.panel.minor_axis, - columns=self.panel.items) - assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') - expected = DataFrame(np.dtype('float64'), - index=self.panel.major_axis, - columns=self.panel.items) - assert_frame_equal(result, expected) - - # reductions via other dims - expected = self.panel.sum(0) - result = self.panel.apply(lambda x: x.sum(), axis='items') - assert_frame_equal(result, expected) - expected = self.panel.sum(1) - result = self.panel.apply(lambda x: x.sum(), axis='major_axis') - assert_frame_equal(result, expected) - expected = self.panel.sum(2) - result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') - assert_frame_equal(result, expected) - - # pass kwargs - result = self.panel.apply( - lambda x, y: x.sum() + y, axis='items', y=5) - expected = self.panel.sum(0) + 5 - assert_frame_equal(result, expected) + # GH1148 + + # ufunc + applied = self.panel.apply(np.sqrt) + with np.errstate(invalid='ignore'): + expected = np.sqrt(self.panel.values) + assert_almost_equal(applied.values, expected) + + # ufunc same shape + result = self.panel.apply(lambda x: x * 2, axis='items') + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, axis='major_axis') + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, axis='minor_axis') + expected = self.panel * 2 + assert_panel_equal(result, expected) + + # reduction to DataFrame + result = self.panel.apply(lambda x: x.dtype, axis='items') + expected = DataFrame(np.dtype('float64'), + index=self.panel.major_axis, + columns=self.panel.minor_axis) + assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x.dtype, axis='major_axis') + expected = DataFrame(np.dtype('float64'), + index=self.panel.minor_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) + result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') + expected = DataFrame(np.dtype('float64'), + index=self.panel.major_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) + + # reductions via other dims + expected = self.panel.sum(0) + result = self.panel.apply(lambda x: x.sum(), axis='items') + assert_frame_equal(result, expected) + expected = self.panel.sum(1) + result = self.panel.apply(lambda x: x.sum(), axis='major_axis') + assert_frame_equal(result, expected) + expected = self.panel.sum(2) + result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') + assert_frame_equal(result, expected) + + # pass kwargs + result = self.panel.apply( + lambda x, y: x.sum() + y, axis='items', y=5) + expected = self.panel.sum(0) + 5 + assert_frame_equal(result, expected) def test_apply_slabs(self): - with catch_warnings(record=True): - - # same shape as original - result = self.panel.apply(lambda x: x * 2, - axis=['items', 'major_axis']) - expected = (self.panel * 2).transpose('minor_axis', 'major_axis', - 'items') - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['major_axis', 'items']) - assert_panel_equal(result, expected) - - result = self.panel.apply(lambda x: x * 2, - axis=['items', 'minor_axis']) - expected = (self.panel * 2).transpose('major_axis', 'minor_axis', - 'items') - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['minor_axis', 'items']) - assert_panel_equal(result, expected) - - result = self.panel.apply(lambda x: x * 2, - axis=['major_axis', 'minor_axis']) - expected = self.panel * 2 - assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x * 2, - axis=['minor_axis', 'major_axis']) - assert_panel_equal(result, expected) - - # reductions - result = self.panel.apply(lambda x: x.sum(0), axis=[ - 'items', 'major_axis' - ]) - expected = self.panel.sum(1).T - assert_frame_equal(result, expected) + + # same shape as original + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'major_axis']) + expected = (self.panel * 2).transpose('minor_axis', 'major_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'items']) + assert_panel_equal(result, expected) + + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'minor_axis']) + expected = (self.panel * 2).transpose('major_axis', 'minor_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'items']) + assert_panel_equal(result, expected) + + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'minor_axis']) + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'major_axis']) + assert_panel_equal(result, expected) + + # reductions + result = self.panel.apply(lambda x: x.sum(0), axis=[ + 'items', 'major_axis' + ]) + expected = self.panel.sum(1).T + assert_frame_equal(result, expected) + + result = self.panel.apply(lambda x: x.sum(1), axis=[ + 'items', 'major_axis' + ]) + expected = self.panel.sum(0) + assert_frame_equal(result, expected) + + # transforms + f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T # make sure that we don't trigger any warnings - with catch_warnings(record=True): - result = self.panel.apply(lambda x: x.sum(1), axis=[ - 'items', 'major_axis' - ]) - expected = self.panel.sum(0) - assert_frame_equal(result, expected) - - # transforms - f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T - - # make sure that we don't trigger any warnings - result = self.panel.apply(f, axis=['items', 'major_axis']) - expected = Panel({ax: f(self.panel.loc[:, :, ax]) - for ax in self.panel.minor_axis}) - assert_panel_equal(result, expected) - - result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) - expected = Panel({ax: f(self.panel.loc[ax]) - for ax in self.panel.items}) - assert_panel_equal(result, expected) - - result = self.panel.apply(f, axis=['minor_axis', 'items']) - expected = Panel({ax: f(self.panel.loc[:, ax]) - for ax in self.panel.major_axis}) - assert_panel_equal(result, expected) - - # with multi-indexes - # GH7469 - index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( - 'two', 'a'), ('two', 'b')]) - dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( - 4, 3), columns=list("ABC"), index=index) - dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( - 4, 3), columns=list("ABC"), index=index) - p = Panel({'f': dfa, 'g': dfb}) - result = p.apply(lambda x: x.sum(), axis=0) - - # on windows this will be in32 - result = result.astype('int64') - expected = p.sum(0) - assert_frame_equal(result, expected) + result = self.panel.apply(f, axis=['items', 'major_axis']) + expected = Panel({ax: f(self.panel.loc[:, :, ax]) + for ax in self.panel.minor_axis}) + assert_panel_equal(result, expected) + + result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) + expected = Panel({ax: f(self.panel.loc[ax]) + for ax in self.panel.items}) + assert_panel_equal(result, expected) + + result = self.panel.apply(f, axis=['minor_axis', 'items']) + expected = Panel({ax: f(self.panel.loc[:, ax]) + for ax in self.panel.major_axis}) + assert_panel_equal(result, expected) + + # with multi-indexes + # GH7469 + index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( + 'two', 'a'), ('two', 'b')]) + dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + p = Panel({'f': dfa, 'g': dfb}) + result = p.apply(lambda x: x.sum(), axis=0) + + # on windows this will be in32 + result = result.astype('int64') + expected = p.sum(0) + assert_frame_equal(result, expected) def test_apply_no_or_zero_ndim(self): - with catch_warnings(record=True): - # GH10332 - self.panel = Panel(np.random.rand(5, 5, 5)) + # GH10332 + self.panel = Panel(np.random.rand(5, 5, 5)) - result_int = self.panel.apply(lambda df: 0, axis=[1, 2]) - result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2]) - result_int64 = self.panel.apply( - lambda df: np.int64(0), axis=[1, 2]) - result_float64 = self.panel.apply(lambda df: np.float64(0.0), - axis=[1, 2]) + result_int = self.panel.apply(lambda df: 0, axis=[1, 2]) + result_float = self.panel.apply(lambda df: 0.0, axis=[1, 2]) + result_int64 = self.panel.apply( + lambda df: np.int64(0), axis=[1, 2]) + result_float64 = self.panel.apply(lambda df: np.float64(0.0), + axis=[1, 2]) - expected_int = expected_int64 = Series([0] * 5) - expected_float = expected_float64 = Series([0.0] * 5) + expected_int = expected_int64 = Series([0] * 5) + expected_float = expected_float64 = Series([0.0] * 5) - assert_series_equal(result_int, expected_int) - assert_series_equal(result_int64, expected_int64) - assert_series_equal(result_float, expected_float) - assert_series_equal(result_float64, expected_float64) + assert_series_equal(result_int, expected_int) + assert_series_equal(result_int64, expected_int64) + assert_series_equal(result_float, expected_float) + assert_series_equal(result_float64, expected_float64) def test_reindex(self): - with catch_warnings(record=True): - ref = self.panel['ItemB'] + ref = self.panel['ItemB'] - # items - result = self.panel.reindex(items=['ItemA', 'ItemB']) - assert_frame_equal(result['ItemB'], ref) + # items + result = self.panel.reindex(items=['ItemA', 'ItemB']) + assert_frame_equal(result['ItemB'], ref) - # major - new_major = list(self.panel.major_axis[:10]) - result = self.panel.reindex(major=new_major) - assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) + # major + new_major = list(self.panel.major_axis[:10]) + result = self.panel.reindex(major=new_major) + assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) - # raise exception put both major and major_axis - pytest.raises(Exception, self.panel.reindex, - major_axis=new_major, - major=new_major) + # raise exception put both major and major_axis + pytest.raises(Exception, self.panel.reindex, + major_axis=new_major, + major=new_major) - # minor - new_minor = list(self.panel.minor_axis[:2]) - result = self.panel.reindex(minor=new_minor) - assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) + # minor + new_minor = list(self.panel.minor_axis[:2]) + result = self.panel.reindex(minor=new_minor) + assert_frame_equal(result['ItemB'], ref.reindex(columns=new_minor)) - # raise exception put both major and major_axis - pytest.raises(Exception, self.panel.reindex, - minor_axis=new_minor, - minor=new_minor) + # raise exception put both major and major_axis + pytest.raises(Exception, self.panel.reindex, + minor_axis=new_minor, + minor=new_minor) - # this ok - result = self.panel.reindex() - assert_panel_equal(result, self.panel) - assert result is not self.panel + # this ok + result = self.panel.reindex() + assert_panel_equal(result, self.panel) + assert result is not self.panel - # with filling - smaller_major = self.panel.major_axis[::5] - smaller = self.panel.reindex(major=smaller_major) + # with filling + smaller_major = self.panel.major_axis[::5] + smaller = self.panel.reindex(major=smaller_major) - larger = smaller.reindex(major=self.panel.major_axis, method='pad') + larger = smaller.reindex(major=self.panel.major_axis, method='pad') - assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), - smaller.major_xs(smaller_major[0])) + assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), + smaller.major_xs(smaller_major[0])) - # don't necessarily copy - result = self.panel.reindex( - major=self.panel.major_axis, copy=False) - assert_panel_equal(result, self.panel) - assert result is self.panel + # don't necessarily copy + result = self.panel.reindex( + major=self.panel.major_axis, copy=False) + assert_panel_equal(result, self.panel) + assert result is self.panel def test_reindex_axis_style(self): - with catch_warnings(record=True): - panel = Panel(np.random.rand(5, 5, 5)) - expected0 = Panel(panel.values).iloc[[0, 1]] - expected1 = Panel(panel.values).iloc[:, [0, 1]] - expected2 = Panel(panel.values).iloc[:, :, [0, 1]] + panel = Panel(np.random.rand(5, 5, 5)) + expected0 = Panel(panel.values).iloc[[0, 1]] + expected1 = Panel(panel.values).iloc[:, [0, 1]] + expected2 = Panel(panel.values).iloc[:, :, [0, 1]] - result = panel.reindex([0, 1], axis=0) - assert_panel_equal(result, expected0) + result = panel.reindex([0, 1], axis=0) + assert_panel_equal(result, expected0) - result = panel.reindex([0, 1], axis=1) - assert_panel_equal(result, expected1) + result = panel.reindex([0, 1], axis=1) + assert_panel_equal(result, expected1) - result = panel.reindex([0, 1], axis=2) - assert_panel_equal(result, expected2) + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) - result = panel.reindex([0, 1], axis=2) - assert_panel_equal(result, expected2) + result = panel.reindex([0, 1], axis=2) + assert_panel_equal(result, expected2) def test_reindex_multi(self): - with catch_warnings(record=True): - - # with and without copy full reindexing - result = self.panel.reindex( - items=self.panel.items, - major=self.panel.major_axis, - minor=self.panel.minor_axis, copy=False) - - assert result.items is self.panel.items - assert result.major_axis is self.panel.major_axis - assert result.minor_axis is self.panel.minor_axis - - result = self.panel.reindex( - items=self.panel.items, - major=self.panel.major_axis, - minor=self.panel.minor_axis, copy=False) - assert_panel_equal(result, self.panel) - - # multi-axis indexing consistency - # GH 5900 - df = DataFrame(np.random.randn(4, 3)) - p = Panel({'Item1': df}) - expected = Panel({'Item1': df}) - expected['Item2'] = np.nan - - items = ['Item1', 'Item2'] - major_axis = np.arange(4) - minor_axis = np.arange(3) - - results = [] - results.append(p.reindex(items=items, major_axis=major_axis, - copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, - copy=False)) - results.append(p.reindex(items=items, minor_axis=minor_axis, - copy=True)) - results.append(p.reindex(items=items, minor_axis=minor_axis, - copy=False)) - results.append(p.reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, - minor_axis=minor_axis, copy=False)) - - for i, r in enumerate(results): - assert_panel_equal(expected, r) + + # with and without copy full reindexing + result = self.panel.reindex( + items=self.panel.items, + major=self.panel.major_axis, + minor=self.panel.minor_axis, copy=False) + + assert result.items is self.panel.items + assert result.major_axis is self.panel.major_axis + assert result.minor_axis is self.panel.minor_axis + + result = self.panel.reindex( + items=self.panel.items, + major=self.panel.major_axis, + minor=self.panel.minor_axis, copy=False) + assert_panel_equal(result, self.panel) + + # multi-axis indexing consistency + # GH 5900 + df = DataFrame(np.random.randn(4, 3)) + p = Panel({'Item1': df}) + expected = Panel({'Item1': df}) + expected['Item2'] = np.nan + + items = ['Item1', 'Item2'] + major_axis = np.arange(4) + minor_axis = np.arange(3) + + results = [] + results.append(p.reindex(items=items, major_axis=major_axis, + copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + copy=False)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=True)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=False)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=False)) + + for i, r in enumerate(results): + assert_panel_equal(expected, r) def test_reindex_like(self): - with catch_warnings(record=True): - # reindex_like - smaller = self.panel.reindex(items=self.panel.items[:-1], - major=self.panel.major_axis[:-1], - minor=self.panel.minor_axis[:-1]) - smaller_like = self.panel.reindex_like(smaller) - assert_panel_equal(smaller, smaller_like) + # reindex_like + smaller = self.panel.reindex(items=self.panel.items[:-1], + major=self.panel.major_axis[:-1], + minor=self.panel.minor_axis[:-1]) + smaller_like = self.panel.reindex_like(smaller) + assert_panel_equal(smaller, smaller_like) def test_take(self): - with catch_warnings(record=True): - # axis == 0 - result = self.panel.take([2, 0, 1], axis=0) - expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) - assert_panel_equal(result, expected) + # axis == 0 + result = self.panel.take([2, 0, 1], axis=0) + expected = self.panel.reindex(items=['ItemC', 'ItemA', 'ItemB']) + assert_panel_equal(result, expected) - # axis >= 1 - result = self.panel.take([3, 0, 1, 2], axis=2) - expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) - assert_panel_equal(result, expected) + # axis >= 1 + result = self.panel.take([3, 0, 1, 2], axis=2) + expected = self.panel.reindex(minor=['D', 'A', 'B', 'C']) + assert_panel_equal(result, expected) - # neg indices ok - expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) - result = self.panel.take([3, -1, 1, 2], axis=2) - assert_panel_equal(result, expected) + # neg indices ok + expected = self.panel.reindex(minor=['D', 'D', 'B', 'C']) + result = self.panel.take([3, -1, 1, 2], axis=2) + assert_panel_equal(result, expected) - pytest.raises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) + pytest.raises(Exception, self.panel.take, [4, 0, 1, 2], axis=2) def test_sort_index(self): - with catch_warnings(record=True): - import random - - ritems = list(self.panel.items) - rmajor = list(self.panel.major_axis) - rminor = list(self.panel.minor_axis) - random.shuffle(ritems) - random.shuffle(rmajor) - random.shuffle(rminor) - - random_order = self.panel.reindex(items=ritems) - sorted_panel = random_order.sort_index(axis=0) - assert_panel_equal(sorted_panel, self.panel) - - # descending - random_order = self.panel.reindex(items=ritems) - sorted_panel = random_order.sort_index(axis=0, ascending=False) - assert_panel_equal( - sorted_panel, - self.panel.reindex(items=self.panel.items[::-1])) - - random_order = self.panel.reindex(major=rmajor) - sorted_panel = random_order.sort_index(axis=1) - assert_panel_equal(sorted_panel, self.panel) - - random_order = self.panel.reindex(minor=rminor) - sorted_panel = random_order.sort_index(axis=2) - assert_panel_equal(sorted_panel, self.panel) + import random + + ritems = list(self.panel.items) + rmajor = list(self.panel.major_axis) + rminor = list(self.panel.minor_axis) + random.shuffle(ritems) + random.shuffle(rmajor) + random.shuffle(rminor) + + random_order = self.panel.reindex(items=ritems) + sorted_panel = random_order.sort_index(axis=0) + assert_panel_equal(sorted_panel, self.panel) + + # descending + random_order = self.panel.reindex(items=ritems) + sorted_panel = random_order.sort_index(axis=0, ascending=False) + assert_panel_equal( + sorted_panel, + self.panel.reindex(items=self.panel.items[::-1])) + + random_order = self.panel.reindex(major=rmajor) + sorted_panel = random_order.sort_index(axis=1) + assert_panel_equal(sorted_panel, self.panel) + + random_order = self.panel.reindex(minor=rminor) + sorted_panel = random_order.sort_index(axis=2) + assert_panel_equal(sorted_panel, self.panel) def test_fillna(self): - with catch_warnings(record=True): - filled = self.panel.fillna(0) - assert np.isfinite(filled.values).all() - - filled = self.panel.fillna(method='backfill') - assert_frame_equal(filled['ItemA'], - self.panel['ItemA'].fillna(method='backfill')) - - panel = self.panel.copy() - panel['str'] = 'foo' - - filled = panel.fillna(method='backfill') - assert_frame_equal(filled['ItemA'], - panel['ItemA'].fillna(method='backfill')) - - empty = self.panel.reindex(items=[]) - filled = empty.fillna(0) - assert_panel_equal(filled, empty) - - pytest.raises(ValueError, self.panel.fillna) - pytest.raises(ValueError, self.panel.fillna, 5, method='ffill') - - pytest.raises(TypeError, self.panel.fillna, [1, 2]) - pytest.raises(TypeError, self.panel.fillna, (1, 2)) - - # limit not implemented when only value is specified - p = Panel(np.random.randn(3, 4, 5)) - p.iloc[0:2, 0:2, 0:2] = np.nan - pytest.raises(NotImplementedError, - lambda: p.fillna(999, limit=1)) - - # Test in place fillNA - # Expected result - expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - # method='ffill' - p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - p1.fillna(method='ffill', inplace=True) - assert_panel_equal(p1, expected) - - # method='bfill' - p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]], - items=['a', 'b'], minor_axis=['x', 'y'], - dtype=np.float64) - p2.fillna(method='bfill', inplace=True) - assert_panel_equal(p2, expected) + filled = self.panel.fillna(0) + assert np.isfinite(filled.values).all() + + filled = self.panel.fillna(method='backfill') + assert_frame_equal(filled['ItemA'], + self.panel['ItemA'].fillna(method='backfill')) + + panel = self.panel.copy() + panel['str'] = 'foo' + + filled = panel.fillna(method='backfill') + assert_frame_equal(filled['ItemA'], + panel['ItemA'].fillna(method='backfill')) + + empty = self.panel.reindex(items=[]) + filled = empty.fillna(0) + assert_panel_equal(filled, empty) + + pytest.raises(ValueError, self.panel.fillna) + pytest.raises(ValueError, self.panel.fillna, 5, method='ffill') + + pytest.raises(TypeError, self.panel.fillna, [1, 2]) + pytest.raises(TypeError, self.panel.fillna, (1, 2)) + + # limit not implemented when only value is specified + p = Panel(np.random.randn(3, 4, 5)) + p.iloc[0:2, 0:2, 0:2] = np.nan + pytest.raises(NotImplementedError, + lambda: p.fillna(999, limit=1)) + + # Test in place fillNA + # Expected result + expected = Panel([[[0, 1], [2, 1]], [[10, 11], [12, 11]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + # method='ffill' + p1 = Panel([[[0, 1], [2, np.nan]], [[10, 11], [12, np.nan]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + p1.fillna(method='ffill', inplace=True) + assert_panel_equal(p1, expected) + + # method='bfill' + p2 = Panel([[[0, np.nan], [2, 1]], [[10, np.nan], [12, 11]]], + items=['a', 'b'], minor_axis=['x', 'y'], + dtype=np.float64) + p2.fillna(method='bfill', inplace=True) + assert_panel_equal(p2, expected) def test_ffill_bfill(self): - with catch_warnings(record=True): - assert_panel_equal(self.panel.ffill(), - self.panel.fillna(method='ffill')) - assert_panel_equal(self.panel.bfill(), - self.panel.fillna(method='bfill')) + assert_panel_equal(self.panel.ffill(), + self.panel.fillna(method='ffill')) + assert_panel_equal(self.panel.bfill(), + self.panel.fillna(method='bfill')) def test_truncate_fillna_bug(self): - with catch_warnings(record=True): - # #1823 - result = self.panel.truncate(before=None, after=None, axis='items') + # #1823 + result = self.panel.truncate(before=None, after=None, axis='items') - # it works! - result.fillna(value=0.0) + # it works! + result.fillna(value=0.0) def test_swapaxes(self): - with catch_warnings(record=True): - result = self.panel.swapaxes('items', 'minor') - assert result.items is self.panel.minor_axis + result = self.panel.swapaxes('items', 'minor') + assert result.items is self.panel.minor_axis - result = self.panel.swapaxes('items', 'major') - assert result.items is self.panel.major_axis + result = self.panel.swapaxes('items', 'major') + assert result.items is self.panel.major_axis - result = self.panel.swapaxes('major', 'minor') - assert result.major_axis is self.panel.minor_axis + result = self.panel.swapaxes('major', 'minor') + assert result.major_axis is self.panel.minor_axis - panel = self.panel.copy() - result = panel.swapaxes('major', 'minor') - panel.values[0, 0, 1] = np.nan - expected = panel.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + panel = self.panel.copy() + result = panel.swapaxes('major', 'minor') + panel.values[0, 0, 1] = np.nan + expected = panel.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - # this should also work - result = self.panel.swapaxes(0, 1) - assert result.items is self.panel.major_axis + # this should also work + result = self.panel.swapaxes(0, 1) + assert result.items is self.panel.major_axis - # this works, but return a copy - result = self.panel.swapaxes('items', 'items') - assert_panel_equal(self.panel, result) - assert id(self.panel) != id(result) + # this works, but return a copy + result = self.panel.swapaxes('items', 'items') + assert_panel_equal(self.panel, result) + assert id(self.panel) != id(result) def test_transpose(self): - with catch_warnings(record=True): - result = self.panel.transpose('minor', 'major', 'items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - # test kwargs - result = self.panel.transpose(items='minor', major='major', - minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - # text mixture of args - result = self.panel.transpose( - 'minor', major='major', minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - result = self.panel.transpose('minor', - 'major', - minor='items') - expected = self.panel.swapaxes('items', 'minor') - assert_panel_equal(result, expected) - - # duplicate axes - with tm.assert_raises_regex(TypeError, - 'not enough/duplicate arguments'): - self.panel.transpose('minor', maj='major', minor='items') - - with tm.assert_raises_regex(ValueError, - 'repeated axis in transpose'): - self.panel.transpose('minor', 'major', major='minor', - minor='items') - - result = self.panel.transpose(2, 1, 0) - assert_panel_equal(result, expected) - - result = self.panel.transpose('minor', 'items', 'major') - expected = self.panel.swapaxes('items', 'minor') - expected = expected.swapaxes('major', 'minor') - assert_panel_equal(result, expected) - - result = self.panel.transpose(2, 0, 1) - assert_panel_equal(result, expected) - - pytest.raises(ValueError, self.panel.transpose, 0, 0, 1) + result = self.panel.transpose('minor', 'major', 'items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) + + # test kwargs + result = self.panel.transpose(items='minor', major='major', + minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) + + # text mixture of args + result = self.panel.transpose( + 'minor', major='major', minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) + + result = self.panel.transpose('minor', + 'major', + minor='items') + expected = self.panel.swapaxes('items', 'minor') + assert_panel_equal(result, expected) + + # duplicate axes + with tm.assert_raises_regex(TypeError, + 'not enough/duplicate arguments'): + self.panel.transpose('minor', maj='major', minor='items') + + with tm.assert_raises_regex(ValueError, + 'repeated axis in transpose'): + self.panel.transpose('minor', 'major', major='minor', + minor='items') + + result = self.panel.transpose(2, 1, 0) + assert_panel_equal(result, expected) + + result = self.panel.transpose('minor', 'items', 'major') + expected = self.panel.swapaxes('items', 'minor') + expected = expected.swapaxes('major', 'minor') + assert_panel_equal(result, expected) + + result = self.panel.transpose(2, 0, 1) + assert_panel_equal(result, expected) + + pytest.raises(ValueError, self.panel.transpose, 0, 0, 1) def test_transpose_copy(self): - with catch_warnings(record=True): - panel = self.panel.copy() - result = panel.transpose(2, 0, 1, copy=True) - expected = panel.swapaxes('items', 'minor') - expected = expected.swapaxes('major', 'minor') - assert_panel_equal(result, expected) + panel = self.panel.copy() + result = panel.transpose(2, 0, 1, copy=True) + expected = panel.swapaxes('items', 'minor') + expected = expected.swapaxes('major', 'minor') + assert_panel_equal(result, expected) - panel.values[0, 1, 1] = np.nan - assert notna(result.values[1, 0, 1]) + panel.values[0, 1, 1] = np.nan + assert notna(result.values[1, 0, 1]) def test_to_frame(self): - with catch_warnings(record=True): - # filtered - filtered = self.panel.to_frame() - expected = self.panel.to_frame().dropna(how='any') - assert_frame_equal(filtered, expected) - - # unfiltered - unfiltered = self.panel.to_frame(filter_observations=False) - assert_panel_equal(unfiltered.to_panel(), self.panel) - - # names - assert unfiltered.index.names == ('major', 'minor') - - # unsorted, round trip - df = self.panel.to_frame(filter_observations=False) - unsorted = df.take(np.random.permutation(len(df))) - pan = unsorted.to_panel() - assert_panel_equal(pan, self.panel) - - # preserve original index names - df = DataFrame(np.random.randn(6, 2), - index=[['a', 'a', 'b', 'b', 'c', 'c'], - [0, 1, 0, 1, 0, 1]], - columns=['one', 'two']) - df.index.names = ['foo', 'bar'] - df.columns.name = 'baz' - - rdf = df.to_panel().to_frame() - assert rdf.index.names == df.index.names - assert rdf.columns.names == df.columns.names + # filtered + filtered = self.panel.to_frame() + expected = self.panel.to_frame().dropna(how='any') + assert_frame_equal(filtered, expected) + + # unfiltered + unfiltered = self.panel.to_frame(filter_observations=False) + assert_panel_equal(unfiltered.to_panel(), self.panel) + + # names + assert unfiltered.index.names == ('major', 'minor') + + # unsorted, round trip + df = self.panel.to_frame(filter_observations=False) + unsorted = df.take(np.random.permutation(len(df))) + pan = unsorted.to_panel() + assert_panel_equal(pan, self.panel) + + # preserve original index names + df = DataFrame(np.random.randn(6, 2), + index=[['a', 'a', 'b', 'b', 'c', 'c'], + [0, 1, 0, 1, 0, 1]], + columns=['one', 'two']) + df.index.names = ['foo', 'bar'] + df.columns.name = 'baz' + + rdf = df.to_panel().to_frame() + assert rdf.index.names == df.index.names + assert rdf.columns.names == df.columns.names def test_to_frame_mixed(self): - with catch_warnings(record=True): - panel = self.panel.fillna(0) - panel['str'] = 'foo' - panel['bool'] = panel['ItemA'] > 0 - - lp = panel.to_frame() - wp = lp.to_panel() - assert wp['bool'].values.dtype == np.bool_ - # Previously, this was mutating the underlying - # index and changing its name - assert_frame_equal(wp['bool'], panel['bool'], check_names=False) - - # GH 8704 - # with categorical - df = panel.to_frame() - df['category'] = df['str'].astype('category') - - # to_panel - # TODO: this converts back to object - p = df.to_panel() - expected = panel.copy() - expected['category'] = 'foo' - assert_panel_equal(p, expected) + panel = self.panel.fillna(0) + panel['str'] = 'foo' + panel['bool'] = panel['ItemA'] > 0 + + lp = panel.to_frame() + wp = lp.to_panel() + assert wp['bool'].values.dtype == np.bool_ + # Previously, this was mutating the underlying + # index and changing its name + assert_frame_equal(wp['bool'], panel['bool'], check_names=False) + + # GH 8704 + # with categorical + df = panel.to_frame() + df['category'] = df['str'].astype('category') + + # to_panel + # TODO: this converts back to object + p = df.to_panel() + expected = panel.copy() + expected['category'] = 'foo' + assert_panel_equal(p, expected) def test_to_frame_multi_major(self): - with catch_warnings(record=True): - idx = MultiIndex.from_tuples( - [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]) - df = DataFrame([[1, 'a', 1], [2, 'b', 1], - [3, 'c', 1], [4, 'd', 1]], - columns=['A', 'B', 'C'], index=idx) - wp = Panel({'i1': df, 'i2': df}) - expected_idx = MultiIndex.from_tuples( - [ - (1, 'one', 'A'), (1, 'one', 'B'), - (1, 'one', 'C'), (1, 'two', 'A'), - (1, 'two', 'B'), (1, 'two', 'C'), - (2, 'one', 'A'), (2, 'one', 'B'), - (2, 'one', 'C'), (2, 'two', 'A'), - (2, 'two', 'B'), (2, 'two', 'C') - ], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, - 'c', 1, 4, 'd', 1], - 'i2': [1, 'a', 1, 2, 'b', - 1, 3, 'c', 1, 4, 'd', 1]}, - index=expected_idx) - result = wp.to_frame() - assert_frame_equal(result, expected) - - wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 - result = wp.to_frame() - assert_frame_equal(result, expected[1:]) - - idx = MultiIndex.from_tuples( - [(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')]) - df = DataFrame([[1, 'a', 1], [2, 'b', 1], - [3, 'c', 1], [4, 'd', 1]], - columns=['A', 'B', 'C'], index=idx) - wp = Panel({'i1': df, 'i2': df}) - ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), - (1, 'two', 'C'), - (1, 'one', 'A'), - (1, 'one', 'B'), - (1, 'one', 'C'), - (2, 'one', 'A'), - (2, 'one', 'B'), - (2, 'one', 'C'), - (np.nan, 'two', 'A'), - (np.nan, 'two', 'B'), - (np.nan, 'two', 'C')], - names=[None, None, 'minor']) - expected.index = ex_idx - result = wp.to_frame() - assert_frame_equal(result, expected) + idx = MultiIndex.from_tuples( + [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]) + df = DataFrame([[1, 'a', 1], [2, 'b', 1], + [3, 'c', 1], [4, 'd', 1]], + columns=['A', 'B', 'C'], index=idx) + wp = Panel({'i1': df, 'i2': df}) + expected_idx = MultiIndex.from_tuples( + [ + (1, 'one', 'A'), (1, 'one', 'B'), + (1, 'one', 'C'), (1, 'two', 'A'), + (1, 'two', 'B'), (1, 'two', 'C'), + (2, 'one', 'A'), (2, 'one', 'B'), + (2, 'one', 'C'), (2, 'two', 'A'), + (2, 'two', 'B'), (2, 'two', 'C') + ], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, + 'c', 1, 4, 'd', 1], + 'i2': [1, 'a', 1, 2, 'b', + 1, 3, 'c', 1, 4, 'd', 1]}, + index=expected_idx) + result = wp.to_frame() + assert_frame_equal(result, expected) + + wp.iloc[0, 0].iloc[0] = np.nan # BUG on setting. GH #5773 + result = wp.to_frame() + assert_frame_equal(result, expected[1:]) + + idx = MultiIndex.from_tuples( + [(1, 'two'), (1, 'one'), (2, 'one'), (np.nan, 'two')]) + df = DataFrame([[1, 'a', 1], [2, 'b', 1], + [3, 'c', 1], [4, 'd', 1]], + columns=['A', 'B', 'C'], index=idx) + wp = Panel({'i1': df, 'i2': df}) + ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), + (1, 'two', 'C'), + (1, 'one', 'A'), + (1, 'one', 'B'), + (1, 'one', 'C'), + (2, 'one', 'A'), + (2, 'one', 'B'), + (2, 'one', 'C'), + (np.nan, 'two', 'A'), + (np.nan, 'two', 'B'), + (np.nan, 'two', 'C')], + names=[None, None, 'minor']) + expected.index = ex_idx + result = wp.to_frame() + assert_frame_equal(result, expected) def test_to_frame_multi_major_minor(self): - with catch_warnings(record=True): - cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( - 2, 'two'), (3, 'three'), (4, 'four')]) - df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], - ['a', 'b', 'w', 'x'], - ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], - [-5, -6, -7, -8]], columns=cols, index=idx) - wp = Panel({'i1': df, 'i2': df}) - - exp_idx = MultiIndex.from_tuples( - [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), - (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), - (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), - (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), - (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), - (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), - (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), - (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), - (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), - (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), - (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), - (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], - names=[None, None, None, None]) - exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], - [3, 3], [4, 4], - [13, 13], [14, 14], ['a', 'a'], - ['b', 'b'], ['w', 'w'], - ['x', 'x'], ['c', 'c'], ['d', 'd'], [ - 'y', 'y'], ['z', 'z'], - [-1, -1], [-2, -2], [-3, -3], [-4, -4], - [-5, -5], [-6, -6], - [-7, -7], [-8, -8]] - result = wp.to_frame() - expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) - assert_frame_equal(result, expected) + cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two'), (3, 'three'), (4, 'four')]) + df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], + ['a', 'b', 'w', 'x'], + ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], + [-5, -6, -7, -8]], columns=cols, index=idx) + wp = Panel({'i1': df, 'i2': df}) + + exp_idx = MultiIndex.from_tuples( + [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), + (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), + (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), + (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), + (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), + (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), + (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), + (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), + (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), + (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), + (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), + (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], + names=[None, None, None, None]) + exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], + [3, 3], [4, 4], + [13, 13], [14, 14], ['a', 'a'], + ['b', 'b'], ['w', 'w'], + ['x', 'x'], ['c', 'c'], ['d', 'd'], [ + 'y', 'y'], ['z', 'z'], + [-1, -1], [-2, -2], [-3, -3], [-4, -4], + [-5, -5], [-6, -6], + [-7, -7], [-8, -8]] + result = wp.to_frame() + expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) + assert_frame_equal(result, expected) def test_to_frame_multi_drop_level(self): - with catch_warnings(record=True): - idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) - df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) - wp = Panel({'i1': df, 'i2': df}) - result = wp.to_frame() - exp_idx = MultiIndex.from_tuples( - [(2, 'one', 'A'), (2, 'two', 'A')], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) - assert_frame_equal(result, expected) + idx = MultiIndex.from_tuples([(1, 'one'), (2, 'one'), (2, 'two')]) + df = DataFrame({'A': [np.nan, 1, 2]}, index=idx) + wp = Panel({'i1': df, 'i2': df}) + result = wp.to_frame() + exp_idx = MultiIndex.from_tuples( + [(2, 'one', 'A'), (2, 'two', 'A')], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1., 2], 'i2': [1., 2]}, index=exp_idx) + assert_frame_equal(result, expected) def test_to_panel_na_handling(self): - with catch_warnings(record=True): - df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), - index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], - [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) + df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)), + index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1], + [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]]) - panel = df.to_panel() - assert isna(panel[0].loc[1, [0, 1]]).all() + panel = df.to_panel() + assert isna(panel[0].loc[1, [0, 1]]).all() def test_to_panel_duplicates(self): # #2441 - with catch_warnings(record=True): - df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) - idf = df.set_index(['a', 'b']) - tm.assert_raises_regex( - ValueError, 'non-uniquely indexed', idf.to_panel) + df = DataFrame({'a': [0, 0, 1], 'b': [1, 1, 1], 'c': [1, 2, 3]}) + idf = df.set_index(['a', 'b']) + tm.assert_raises_regex( + ValueError, 'non-uniquely indexed', idf.to_panel) def test_panel_dups(self): - with catch_warnings(record=True): - # GH 4960 - # duplicates in an index + # GH 4960 + # duplicates in an index - # items - data = np.random.randn(5, 100, 5) - no_dup_panel = Panel(data, items=list("ABCDE")) - panel = Panel(data, items=list("AACDE")) + # items + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, items=list("ABCDE")) + panel = Panel(data, items=list("AACDE")) - expected = no_dup_panel['A'] - result = panel.iloc[0] - assert_frame_equal(result, expected) + expected = no_dup_panel['A'] + result = panel.iloc[0] + assert_frame_equal(result, expected) - expected = no_dup_panel['E'] - result = panel.loc['E'] - assert_frame_equal(result, expected) + expected = no_dup_panel['E'] + result = panel.loc['E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[['A', 'B']] - expected.items = ['A', 'A'] - result = panel.loc['A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[['A', 'B']] + expected.items = ['A', 'A'] + result = panel.loc['A'] + assert_panel_equal(result, expected) - # major - data = np.random.randn(5, 5, 5) - no_dup_panel = Panel(data, major_axis=list("ABCDE")) - panel = Panel(data, major_axis=list("AACDE")) + # major + data = np.random.randn(5, 5, 5) + no_dup_panel = Panel(data, major_axis=list("ABCDE")) + panel = Panel(data, major_axis=list("AACDE")) - expected = no_dup_panel.loc[:, 'A'] - result = panel.iloc[:, 0] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, 'A'] + result = panel.iloc[:, 0] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, 'E'] - result = panel.loc[:, 'E'] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, 'E'] + result = panel.loc[:, 'E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, ['A', 'B']] - expected.major_axis = ['A', 'A'] - result = panel.loc[:, 'A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[:, ['A', 'B']] + expected.major_axis = ['A', 'A'] + result = panel.loc[:, 'A'] + assert_panel_equal(result, expected) - # minor - data = np.random.randn(5, 100, 5) - no_dup_panel = Panel(data, minor_axis=list("ABCDE")) - panel = Panel(data, minor_axis=list("AACDE")) + # minor + data = np.random.randn(5, 100, 5) + no_dup_panel = Panel(data, minor_axis=list("ABCDE")) + panel = Panel(data, minor_axis=list("AACDE")) - expected = no_dup_panel.loc[:, :, 'A'] - result = panel.iloc[:, :, 0] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, :, 'A'] + result = panel.iloc[:, :, 0] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, :, 'E'] - result = panel.loc[:, :, 'E'] - assert_frame_equal(result, expected) + expected = no_dup_panel.loc[:, :, 'E'] + result = panel.loc[:, :, 'E'] + assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:, :, ['A', 'B']] - expected.minor_axis = ['A', 'A'] - result = panel.loc[:, :, 'A'] - assert_panel_equal(result, expected) + expected = no_dup_panel.loc[:, :, ['A', 'B']] + expected.minor_axis = ['A', 'A'] + result = panel.loc[:, :, 'A'] + assert_panel_equal(result, expected) def test_filter(self): pass def test_compound(self): - with catch_warnings(record=True): - compounded = self.panel.compound() + compounded = self.panel.compound() - assert_series_equal(compounded['ItemA'], - (1 + self.panel['ItemA']).product(0) - 1, - check_names=False) + assert_series_equal(compounded['ItemA'], + (1 + self.panel['ItemA']).product(0) - 1, + check_names=False) def test_shift(self): - with catch_warnings(record=True): - # major - idx = self.panel.major_axis[0] - idx_lag = self.panel.major_axis[1] - shifted = self.panel.shift(1) - assert_frame_equal(self.panel.major_xs(idx), - shifted.major_xs(idx_lag)) - - # minor - idx = self.panel.minor_axis[0] - idx_lag = self.panel.minor_axis[1] - shifted = self.panel.shift(1, axis='minor') - assert_frame_equal(self.panel.minor_xs(idx), - shifted.minor_xs(idx_lag)) - - # items - idx = self.panel.items[0] - idx_lag = self.panel.items[1] - shifted = self.panel.shift(1, axis='items') - assert_frame_equal(self.panel[idx], shifted[idx_lag]) - - # negative numbers, #2164 - result = self.panel.shift(-1) - expected = Panel({i: f.shift(-1)[:-1] - for i, f in self.panel.iteritems()}) - assert_panel_equal(result, expected) - - # mixed dtypes #6959 - data = [('item ' + ch, makeMixedDataFrame()) - for ch in list('abcde')] - data = dict(data) - mixed_panel = Panel.from_dict(data, orient='minor') - shifted = mixed_panel.shift(1) - assert_series_equal(mixed_panel.dtypes, shifted.dtypes) + # major + idx = self.panel.major_axis[0] + idx_lag = self.panel.major_axis[1] + shifted = self.panel.shift(1) + assert_frame_equal(self.panel.major_xs(idx), + shifted.major_xs(idx_lag)) + + # minor + idx = self.panel.minor_axis[0] + idx_lag = self.panel.minor_axis[1] + shifted = self.panel.shift(1, axis='minor') + assert_frame_equal(self.panel.minor_xs(idx), + shifted.minor_xs(idx_lag)) + + # items + idx = self.panel.items[0] + idx_lag = self.panel.items[1] + shifted = self.panel.shift(1, axis='items') + assert_frame_equal(self.panel[idx], shifted[idx_lag]) + + # negative numbers, #2164 + result = self.panel.shift(-1) + expected = Panel({i: f.shift(-1)[:-1] + for i, f in self.panel.iteritems()}) + assert_panel_equal(result, expected) + + # mixed dtypes #6959 + data = [('item ' + ch, makeMixedDataFrame()) + for ch in list('abcde')] + data = dict(data) + mixed_panel = Panel.from_dict(data, orient='minor') + shifted = mixed_panel.shift(1) + assert_series_equal(mixed_panel.dtypes, shifted.dtypes) def test_tshift(self): # PeriodIndex - with catch_warnings(record=True): - ps = tm.makePeriodPanel() - shifted = ps.tshift(1) - unshifted = shifted.tshift(-1) + ps = tm.makePeriodPanel() + shifted = ps.tshift(1) + unshifted = shifted.tshift(-1) - assert_panel_equal(unshifted, ps) + assert_panel_equal(unshifted, ps) - shifted2 = ps.tshift(freq='B') - assert_panel_equal(shifted, shifted2) + shifted2 = ps.tshift(freq='B') + assert_panel_equal(shifted, shifted2) - shifted3 = ps.tshift(freq=BDay()) - assert_panel_equal(shifted, shifted3) + shifted3 = ps.tshift(freq=BDay()) + assert_panel_equal(shifted, shifted3) - tm.assert_raises_regex(ValueError, 'does not match', - ps.tshift, freq='M') + tm.assert_raises_regex(ValueError, 'does not match', + ps.tshift, freq='M') - # DatetimeIndex - panel = make_test_panel() - shifted = panel.tshift(1) - unshifted = shifted.tshift(-1) + # DatetimeIndex + panel = make_test_panel() + shifted = panel.tshift(1) + unshifted = shifted.tshift(-1) - assert_panel_equal(panel, unshifted) + assert_panel_equal(panel, unshifted) - shifted2 = panel.tshift(freq=panel.major_axis.freq) - assert_panel_equal(shifted, shifted2) + shifted2 = panel.tshift(freq=panel.major_axis.freq) + assert_panel_equal(shifted, shifted2) - inferred_ts = Panel(panel.values, items=panel.items, - major_axis=Index(np.asarray(panel.major_axis)), - minor_axis=panel.minor_axis) - shifted = inferred_ts.tshift(1) - unshifted = shifted.tshift(-1) - assert_panel_equal(shifted, panel.tshift(1)) - assert_panel_equal(unshifted, inferred_ts) + inferred_ts = Panel(panel.values, items=panel.items, + major_axis=Index(np.asarray(panel.major_axis)), + minor_axis=panel.minor_axis) + shifted = inferred_ts.tshift(1) + unshifted = shifted.tshift(-1) + assert_panel_equal(shifted, panel.tshift(1)) + assert_panel_equal(unshifted, inferred_ts) - no_freq = panel.iloc[:, [0, 5, 7], :] - pytest.raises(ValueError, no_freq.tshift) + no_freq = panel.iloc[:, [0, 5, 7], :] + pytest.raises(ValueError, no_freq.tshift) def test_pct_change(self): - with catch_warnings(record=True): - df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]}) - df2 = df1 + 1 - df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]}) - wp = Panel({'i1': df1, 'i2': df2, 'i3': df3}) - # major, 1 - result = wp.pct_change() # axis='major' - expected = Panel({'i1': df1.pct_change(), - 'i2': df2.pct_change(), - 'i3': df3.pct_change()}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=1) - assert_panel_equal(result, expected) - # major, 2 - result = wp.pct_change(periods=2) - expected = Panel({'i1': df1.pct_change(2), - 'i2': df2.pct_change(2), - 'i3': df3.pct_change(2)}) - assert_panel_equal(result, expected) - # minor, 1 - result = wp.pct_change(axis='minor') - expected = Panel({'i1': df1.pct_change(axis=1), - 'i2': df2.pct_change(axis=1), - 'i3': df3.pct_change(axis=1)}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=2) - assert_panel_equal(result, expected) - # minor, 2 - result = wp.pct_change(periods=2, axis='minor') - expected = Panel({'i1': df1.pct_change(periods=2, axis=1), - 'i2': df2.pct_change(periods=2, axis=1), - 'i3': df3.pct_change(periods=2, axis=1)}) - assert_panel_equal(result, expected) - # items, 1 - result = wp.pct_change(axis='items') - expected = Panel( - {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i2': DataFrame({'c1': [1, 0.5, .2], - 'c2': [1. / 3, 0.25, 1. / 6]}), - 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], - 'c2': [.25, .2, 1. / 7]})}) - assert_panel_equal(result, expected) - result = wp.pct_change(axis=0) - assert_panel_equal(result, expected) - # items, 2 - result = wp.pct_change(periods=2, axis='items') - expected = Panel( - {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], - 'c2': [np.nan, np.nan, np.nan]}), - 'i3': DataFrame({'c1': [2, 1, .4], - 'c2': [2. / 3, .5, 1. / 3]})}) - assert_panel_equal(result, expected) + df1 = DataFrame({'c1': [1, 2, 5], 'c2': [3, 4, 6]}) + df2 = df1 + 1 + df3 = DataFrame({'c1': [3, 4, 7], 'c2': [5, 6, 8]}) + wp = Panel({'i1': df1, 'i2': df2, 'i3': df3}) + # major, 1 + result = wp.pct_change() # axis='major' + expected = Panel({'i1': df1.pct_change(), + 'i2': df2.pct_change(), + 'i3': df3.pct_change()}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=1) + assert_panel_equal(result, expected) + # major, 2 + result = wp.pct_change(periods=2) + expected = Panel({'i1': df1.pct_change(2), + 'i2': df2.pct_change(2), + 'i3': df3.pct_change(2)}) + assert_panel_equal(result, expected) + # minor, 1 + result = wp.pct_change(axis='minor') + expected = Panel({'i1': df1.pct_change(axis=1), + 'i2': df2.pct_change(axis=1), + 'i3': df3.pct_change(axis=1)}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=2) + assert_panel_equal(result, expected) + # minor, 2 + result = wp.pct_change(periods=2, axis='minor') + expected = Panel({'i1': df1.pct_change(periods=2, axis=1), + 'i2': df2.pct_change(periods=2, axis=1), + 'i3': df3.pct_change(periods=2, axis=1)}) + assert_panel_equal(result, expected) + # items, 1 + result = wp.pct_change(axis='items') + expected = Panel( + {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i2': DataFrame({'c1': [1, 0.5, .2], + 'c2': [1. / 3, 0.25, 1. / 6]}), + 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], + 'c2': [.25, .2, 1. / 7]})}) + assert_panel_equal(result, expected) + result = wp.pct_change(axis=0) + assert_panel_equal(result, expected) + # items, 2 + result = wp.pct_change(periods=2, axis='items') + expected = Panel( + {'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], + 'c2': [np.nan, np.nan, np.nan]}), + 'i3': DataFrame({'c1': [2, 1, .4], + 'c2': [2. / 3, .5, 1. / 3]})}) + assert_panel_equal(result, expected) def test_round(self): - with catch_warnings(record=True): - values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], - [-1566.213, 88.88], [-12, 94.5]], - [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], - [272.212, -99.99], [23, -76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] - for k in values] - p = Panel(values, items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - result = p.round() - assert_panel_equal(expected, result) + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = p.round() + assert_panel_equal(expected, result) def test_numpy_round(self): - with catch_warnings(record=True): - values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], - [-1566.213, 88.88], [-12, 94.5]], - [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], - [272.212, -99.99], [23, -76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] - for k in values] - p = Panel(values, items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - expected = Panel(evalues, items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B']) - result = np.round(p) - assert_panel_equal(expected, result) - - msg = "the 'out' parameter is not supported" - tm.assert_raises_regex(ValueError, msg, np.round, p, out=p) - + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] + p = Panel(values, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + expected = Panel(evalues, items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B']) + result = np.round(p) + assert_panel_equal(expected, result) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, np.round, p, out=p) + + # removing Panel before NumPy enforces, so just ignore + @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_multiindex_get(self): - with catch_warnings(record=True): - ind = MultiIndex.from_tuples( - [('a', 1), ('a', 2), ('b', 1), ('b', 2)], - names=['first', 'second']) - wp = Panel(np.random.random((4, 5, 5)), - items=ind, - major_axis=np.arange(5), - minor_axis=np.arange(5)) - f1 = wp['a'] - f2 = wp.loc['a'] - assert_panel_equal(f1, f2) - - assert (f1.items == [1, 2]).all() - assert (f2.items == [1, 2]).all() - - MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], - names=['first', 'second']) - + ind = MultiIndex.from_tuples( + [('a', 1), ('a', 2), ('b', 1), ('b', 2)], + names=['first', 'second']) + wp = Panel(np.random.random((4, 5, 5)), + items=ind, + major_axis=np.arange(5), + minor_axis=np.arange(5)) + f1 = wp['a'] + f2 = wp.loc['a'] + assert_panel_equal(f1, f2) + + assert (f1.items == [1, 2]).all() + assert (f2.items == [1, 2]).all() + + MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + names=['first', 'second']) + + @pytest.mark.filterwarnings("ignore:Using a non-tuple:FutureWarning") def test_multiindex_blocks(self): - with catch_warnings(record=True): - ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], - names=['first', 'second']) - wp = Panel(self.panel._data) - wp.items = ind - f1 = wp['a'] - assert (f1.items == [1, 2]).all() + ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1)], + names=['first', 'second']) + wp = Panel(self.panel._data) + wp.items = ind + f1 = wp['a'] + assert (f1.items == [1, 2]).all() - f1 = wp[('b', 1)] - assert (f1.columns == ['A', 'B', 'C', 'D']).all() + f1 = wp[('b', 1)] + assert (f1.columns == ['A', 'B', 'C', 'D']).all() def test_repr_empty(self): - with catch_warnings(record=True): - empty = Panel() - repr(empty) + empty = Panel() + repr(empty) + # ignore warning from us, because removing panel + @pytest.mark.filterwarnings("ignore:Using:FutureWarning") def test_rename(self): - with catch_warnings(record=True): - mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} + mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} - renamed = self.panel.rename_axis(mapper, axis=0) - exp = Index(['foo', 'bar', 'baz']) - tm.assert_index_equal(renamed.items, exp) + renamed = self.panel.rename_axis(mapper, axis=0) + exp = Index(['foo', 'bar', 'baz']) + tm.assert_index_equal(renamed.items, exp) - renamed = self.panel.rename_axis(str.lower, axis=2) - exp = Index(['a', 'b', 'c', 'd']) - tm.assert_index_equal(renamed.minor_axis, exp) + renamed = self.panel.rename_axis(str.lower, axis=2) + exp = Index(['a', 'b', 'c', 'd']) + tm.assert_index_equal(renamed.minor_axis, exp) - # don't copy - renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) - renamed_nocopy['foo'] = 3. - assert (self.panel['ItemA'].values == 3).all() + # don't copy + renamed_nocopy = self.panel.rename_axis(mapper, axis=0, copy=False) + renamed_nocopy['foo'] = 3. + assert (self.panel['ItemA'].values == 3).all() def test_get_attr(self): assert_frame_equal(self.panel['ItemA'], self.panel.ItemA) @@ -2191,13 +2133,12 @@ def test_get_attr(self): assert_frame_equal(self.panel['i'], self.panel.i) def test_from_frame_level1_unsorted(self): - with catch_warnings(record=True): - tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), - ('MSFT', 1)] - midx = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.rand(5, 4), index=midx) - p = df.to_panel() - assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) + tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), + ('MSFT', 1)] + midx = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.rand(5, 4), index=midx) + p = df.to_panel() + assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) def test_to_excel(self): try: @@ -2239,194 +2180,188 @@ def test_to_excel_xlsxwriter(self): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) + @pytest.mark.filterwarnings("ignore:'.reindex:FutureWarning") def test_dropna(self): - with catch_warnings(record=True): - p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) - p.loc[:, ['b', 'd'], 0] = np.nan + p = Panel(np.random.randn(4, 5, 6), major_axis=list('abcde')) + p.loc[:, ['b', 'd'], 0] = np.nan - result = p.dropna(axis=1) - exp = p.loc[:, ['a', 'c', 'e'], :] - assert_panel_equal(result, exp) - inp = p.copy() - inp.dropna(axis=1, inplace=True) - assert_panel_equal(inp, exp) + result = p.dropna(axis=1) + exp = p.loc[:, ['a', 'c', 'e'], :] + assert_panel_equal(result, exp) + inp = p.copy() + inp.dropna(axis=1, inplace=True) + assert_panel_equal(inp, exp) - result = p.dropna(axis=1, how='all') - assert_panel_equal(result, p) + result = p.dropna(axis=1, how='all') + assert_panel_equal(result, p) - p.loc[:, ['b', 'd'], :] = np.nan - result = p.dropna(axis=1, how='all') - exp = p.loc[:, ['a', 'c', 'e'], :] - assert_panel_equal(result, exp) + p.loc[:, ['b', 'd'], :] = np.nan + result = p.dropna(axis=1, how='all') + exp = p.loc[:, ['a', 'c', 'e'], :] + assert_panel_equal(result, exp) - p = Panel(np.random.randn(4, 5, 6), items=list('abcd')) - p.loc[['b'], :, 0] = np.nan + p = Panel(np.random.randn(4, 5, 6), items=list('abcd')) + p.loc[['b'], :, 0] = np.nan - result = p.dropna() - exp = p.loc[['a', 'c', 'd']] - assert_panel_equal(result, exp) + result = p.dropna() + exp = p.loc[['a', 'c', 'd']] + assert_panel_equal(result, exp) - result = p.dropna(how='all') - assert_panel_equal(result, p) + result = p.dropna(how='all') + assert_panel_equal(result, p) - p.loc['b'] = np.nan - result = p.dropna(how='all') - exp = p.loc[['a', 'c', 'd']] - assert_panel_equal(result, exp) + p.loc['b'] = np.nan + result = p.dropna(how='all') + exp = p.loc[['a', 'c', 'd']] + assert_panel_equal(result, exp) def test_drop(self): - with catch_warnings(record=True): - df = DataFrame({"A": [1, 2], "B": [3, 4]}) - panel = Panel({"One": df, "Two": df}) + df = DataFrame({"A": [1, 2], "B": [3, 4]}) + panel = Panel({"One": df, "Two": df}) - def check_drop(drop_val, axis_number, aliases, expected): - try: - actual = panel.drop(drop_val, axis=axis_number) + def check_drop(drop_val, axis_number, aliases, expected): + try: + actual = panel.drop(drop_val, axis=axis_number) + assert_panel_equal(actual, expected) + for alias in aliases: + actual = panel.drop(drop_val, axis=alias) assert_panel_equal(actual, expected) - for alias in aliases: - actual = panel.drop(drop_val, axis=alias) - assert_panel_equal(actual, expected) - except AssertionError: - pprint_thing("Failed with axis_number %d and aliases: %s" % - (axis_number, aliases)) - raise - # Items - expected = Panel({"One": df}) - check_drop('Two', 0, ['items'], expected) - - pytest.raises(KeyError, panel.drop, 'Three') - - # errors = 'ignore' - dropped = panel.drop('Three', errors='ignore') - assert_panel_equal(dropped, panel) - dropped = panel.drop(['Two', 'Three'], errors='ignore') - expected = Panel({"One": df}) - assert_panel_equal(dropped, expected) - - # Major - exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop(0, 1, ['major_axis', 'major'], expected) - - exp_df = DataFrame({"A": [1], "B": [3]}, index=[0]) - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop([1], 1, ['major_axis', 'major'], expected) - - # Minor - exp_df = df[['B']] - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop(["A"], 2, ['minor_axis', 'minor'], expected) - - exp_df = df[['A']] - expected = Panel({"One": exp_df, "Two": exp_df}) - check_drop("B", 2, ['minor_axis', 'minor'], expected) + except AssertionError: + pprint_thing("Failed with axis_number %d and aliases: %s" % + (axis_number, aliases)) + raise + # Items + expected = Panel({"One": df}) + check_drop('Two', 0, ['items'], expected) + + pytest.raises(KeyError, panel.drop, 'Three') + + # errors = 'ignore' + dropped = panel.drop('Three', errors='ignore') + assert_panel_equal(dropped, panel) + dropped = panel.drop(['Two', 'Three'], errors='ignore') + expected = Panel({"One": df}) + assert_panel_equal(dropped, expected) + + # Major + exp_df = DataFrame({"A": [2], "B": [4]}, index=[1]) + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop(0, 1, ['major_axis', 'major'], expected) + + exp_df = DataFrame({"A": [1], "B": [3]}, index=[0]) + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop([1], 1, ['major_axis', 'major'], expected) + + # Minor + exp_df = df[['B']] + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop(["A"], 2, ['minor_axis', 'minor'], expected) + + exp_df = df[['A']] + expected = Panel({"One": exp_df, "Two": exp_df}) + check_drop("B", 2, ['minor_axis', 'minor'], expected) def test_update(self): - with catch_warnings(record=True): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - other = Panel( - [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - - pan.update(other) - - expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[3.6, 2., 3], [1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + + pan.update(other) + + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[3.6, 2., 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + assert_panel_equal(pan, expected) def test_update_from_dict(self): - with catch_warnings(record=True): - pan = Panel({'one': DataFrame([[1.5, np.nan, 3], - [1.5, np.nan, 3], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]), - 'two': DataFrame([[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]])}) - - other = {'two': DataFrame( - [[3.6, 2., np.nan], [np.nan, np.nan, 7]])} - - pan.update(other) - - expected = Panel( - {'one': DataFrame([[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]), - 'two': DataFrame([[3.6, 2., 3], - [1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]) - } - ) - - assert_panel_equal(pan, expected) + pan = Panel({'one': DataFrame([[1.5, np.nan, 3], + [1.5, np.nan, 3], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]])}) + + other = {'two': DataFrame( + [[3.6, 2., np.nan], [np.nan, np.nan, 7]])} + + pan.update(other) + + expected = Panel( + {'one': DataFrame([[1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]), + 'two': DataFrame([[3.6, 2., 3], + [1.5, np.nan, 7], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]) + } + ) + + assert_panel_equal(pan, expected) def test_update_nooverwrite(self): - with catch_warnings(record=True): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - other = Panel( - [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - - pan.update(other, overwrite=False) - - expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, 2., 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + + pan.update(other, overwrite=False) - assert_panel_equal(pan, expected) + expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, 2., 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + assert_panel_equal(pan, expected) def test_update_filtered(self): - with catch_warnings(record=True): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - other = Panel( - [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) + other = Panel( + [[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) - pan.update(other, filter_func=lambda x: x > 2) + pan.update(other, filter_func=lambda x: x > 2) - expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]], - [[1.5, np.nan, 3], [1.5, np.nan, 7], - [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, np.nan, 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) - assert_panel_equal(pan, expected) + assert_panel_equal(pan, expected) def test_update_raise(self): - with catch_warnings(record=True): - pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) - pytest.raises(Exception, pan.update, *(pan, ), - **{'raise_conflict': True}) + pytest.raises(Exception, pan.update, *(pan, ), + **{'raise_conflict': True}) def test_all_any(self): assert (self.panel.all(axis=0).values == nanall( @@ -2452,6 +2387,7 @@ def test_sort_values(self): pytest.raises(NotImplementedError, self.panel.sort_values, 'ItemA') +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") class TestPanelFrame(object): """ Check that conversions to and from Panel to DataFrame work. @@ -2463,90 +2399,82 @@ def setup_method(self, method): self.unfiltered_panel = panel.to_frame(filter_observations=False) def test_ops_differently_indexed(self): - with catch_warnings(record=True): - # trying to set non-identically indexed panel - wp = self.panel.to_panel() - wp2 = wp.reindex(major=wp.major_axis[:-1]) - lp2 = wp2.to_frame() + # trying to set non-identically indexed panel + wp = self.panel.to_panel() + wp2 = wp.reindex(major=wp.major_axis[:-1]) + lp2 = wp2.to_frame() - result = self.panel + lp2 - assert_frame_equal(result.reindex(lp2.index), lp2 * 2) + result = self.panel + lp2 + assert_frame_equal(result.reindex(lp2.index), lp2 * 2) - # careful, mutation - self.panel['foo'] = lp2['ItemA'] - assert_series_equal(self.panel['foo'].reindex(lp2.index), - lp2['ItemA'], - check_names=False) + # careful, mutation + self.panel['foo'] = lp2['ItemA'] + assert_series_equal(self.panel['foo'].reindex(lp2.index), + lp2['ItemA'], + check_names=False) def test_ops_scalar(self): - with catch_warnings(record=True): - result = self.panel.mul(2) - expected = DataFrame.__mul__(self.panel, 2) - assert_frame_equal(result, expected) + result = self.panel.mul(2) + expected = DataFrame.__mul__(self.panel, 2) + assert_frame_equal(result, expected) def test_combineFrame(self): - with catch_warnings(record=True): - wp = self.panel.to_panel() - result = self.panel.add(wp['ItemA'].stack(), axis=0) - assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2) + wp = self.panel.to_panel() + result = self.panel.add(wp['ItemA'].stack(), axis=0) + assert_frame_equal(result.to_panel()['ItemA'], wp['ItemA'] * 2) def test_combinePanel(self): - with catch_warnings(record=True): - wp = self.panel.to_panel() - result = self.panel.add(self.panel) - wide_result = result.to_panel() - assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA']) + wp = self.panel.to_panel() + result = self.panel.add(self.panel) + wide_result = result.to_panel() + assert_frame_equal(wp['ItemA'] * 2, wide_result['ItemA']) - # one item - result = self.panel.add(self.panel.filter(['ItemA'])) + # one item + result = self.panel.add(self.panel.filter(['ItemA'])) def test_combine_scalar(self): - with catch_warnings(record=True): - result = self.panel.mul(2) - expected = DataFrame(self.panel._data) * 2 - assert_frame_equal(result, expected) + result = self.panel.mul(2) + expected = DataFrame(self.panel._data) * 2 + assert_frame_equal(result, expected) def test_combine_series(self): - with catch_warnings(record=True): - s = self.panel['ItemA'][:10] - result = self.panel.add(s, axis=0) - expected = DataFrame.add(self.panel, s, axis=0) - assert_frame_equal(result, expected) + s = self.panel['ItemA'][:10] + result = self.panel.add(s, axis=0) + expected = DataFrame.add(self.panel, s, axis=0) + assert_frame_equal(result, expected) - s = self.panel.iloc[5] - result = self.panel + s - expected = DataFrame.add(self.panel, s, axis=1) - assert_frame_equal(result, expected) + s = self.panel.iloc[5] + result = self.panel + s + expected = DataFrame.add(self.panel, s, axis=1) + assert_frame_equal(result, expected) def test_operators(self): - with catch_warnings(record=True): - wp = self.panel.to_panel() - result = (self.panel + 1).to_panel() - assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) + wp = self.panel.to_panel() + result = (self.panel + 1).to_panel() + assert_frame_equal(wp['ItemA'] + 1, result['ItemA']) def test_arith_flex_panel(self): - with catch_warnings(record=True): - ops = ['add', 'sub', 'mul', 'div', - 'truediv', 'pow', 'floordiv', 'mod'] - if not compat.PY3: - aliases = {} - else: - aliases = {'div': 'truediv'} - self.panel = self.panel.to_panel() - - for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: - for op in ops: - alias = aliases.get(op, op) - f = getattr(operator, alias) - exp = f(self.panel, n) - result = getattr(self.panel, op)(n) - assert_panel_equal(result, exp, check_panel_type=True) - - # rops - r_f = lambda x, y: f(y, x) - exp = r_f(self.panel, n) - result = getattr(self.panel, 'r' + op)(n) - assert_panel_equal(result, exp) + ops = ['add', 'sub', 'mul', 'div', + 'truediv', 'pow', 'floordiv', 'mod'] + if not compat.PY3: + aliases = {} + else: + aliases = {'div': 'truediv'} + self.panel = self.panel.to_panel() + + for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: + for op in ops: + alias = aliases.get(op, op) + f = getattr(operator, alias) + exp = f(self.panel, n) + result = getattr(self.panel, op)(n) + assert_panel_equal(result, exp, check_panel_type=True) + + # rops + r_f = lambda x, y: f(y, x) + exp = r_f(self.panel, n) + result = getattr(self.panel, 'r' + op)(n) + assert_panel_equal(result, exp) def test_sort(self): def is_sorted(arr): @@ -2569,44 +2497,43 @@ def test_to_sparse(self): self.panel.to_sparse) def test_truncate(self): - with catch_warnings(record=True): - dates = self.panel.index.levels[0] - start, end = dates[1], dates[5] + dates = self.panel.index.levels[0] + start, end = dates[1], dates[5] - trunced = self.panel.truncate(start, end).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(start, end) + trunced = self.panel.truncate(start, end).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(start, end) - # TODO truncate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO truncate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - trunced = self.panel.truncate(before=start).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(before=start) + trunced = self.panel.truncate(before=start).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(before=start) - # TODO truncate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO truncate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - trunced = self.panel.truncate(after=end).to_panel() - expected = self.panel.to_panel()['ItemA'].truncate(after=end) + trunced = self.panel.truncate(after=end).to_panel() + expected = self.panel.to_panel()['ItemA'].truncate(after=end) - # TODO truncate drops index.names - assert_frame_equal(trunced['ItemA'], expected, check_names=False) + # TODO truncate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) - # truncate on dates that aren't in there - wp = self.panel.to_panel() - new_index = wp.major_axis[::5] + # truncate on dates that aren't in there + wp = self.panel.to_panel() + new_index = wp.major_axis[::5] - wp2 = wp.reindex(major=new_index) + wp2 = wp.reindex(major=new_index) - lp2 = wp2.to_frame() - lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) + lp2 = wp2.to_frame() + lp_trunc = lp2.truncate(wp.major_axis[2], wp.major_axis[-2]) - wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) + wp_trunc = wp2.truncate(wp.major_axis[2], wp.major_axis[-2]) - assert_panel_equal(wp_trunc, lp_trunc.to_panel()) + assert_panel_equal(wp_trunc, lp_trunc.to_panel()) - # throw proper exception - pytest.raises(Exception, lp2.truncate, wp.major_axis[-2], - wp.major_axis[2]) + # throw proper exception + pytest.raises(Exception, lp2.truncate, wp.major_axis[-2], + wp.major_axis[2]) def test_axis_dummies(self): from pandas.core.reshape.reshape import make_axis_dummies @@ -2635,46 +2562,42 @@ def test_get_dummies(self): tm.assert_numpy_array_equal(dummies.values, minor_dummies.values) def test_mean(self): - with catch_warnings(record=True): - means = self.panel.mean(level='minor') + means = self.panel.mean(level='minor') - # test versus Panel version - wide_means = self.panel.to_panel().mean('major') - assert_frame_equal(means, wide_means) + # test versus Panel version + wide_means = self.panel.to_panel().mean('major') + assert_frame_equal(means, wide_means) def test_sum(self): - with catch_warnings(record=True): - sums = self.panel.sum(level='minor') + sums = self.panel.sum(level='minor') - # test versus Panel version - wide_sums = self.panel.to_panel().sum('major') - assert_frame_equal(sums, wide_sums) + # test versus Panel version + wide_sums = self.panel.to_panel().sum('major') + assert_frame_equal(sums, wide_sums) def test_count(self): - with catch_warnings(record=True): - index = self.panel.index + index = self.panel.index - major_count = self.panel.count(level=0)['ItemA'] - labels = index.labels[0] - for i, idx in enumerate(index.levels[0]): - assert major_count[i] == (labels == i).sum() + major_count = self.panel.count(level=0)['ItemA'] + labels = index.labels[0] + for i, idx in enumerate(index.levels[0]): + assert major_count[i] == (labels == i).sum() - minor_count = self.panel.count(level=1)['ItemA'] - labels = index.labels[1] - for i, idx in enumerate(index.levels[1]): - assert minor_count[i] == (labels == i).sum() + minor_count = self.panel.count(level=1)['ItemA'] + labels = index.labels[1] + for i, idx in enumerate(index.levels[1]): + assert minor_count[i] == (labels == i).sum() def test_join(self): - with catch_warnings(record=True): - lp1 = self.panel.filter(['ItemA', 'ItemB']) - lp2 = self.panel.filter(['ItemC']) + lp1 = self.panel.filter(['ItemA', 'ItemB']) + lp2 = self.panel.filter(['ItemC']) - joined = lp1.join(lp2) + joined = lp1.join(lp2) - assert len(joined.columns) == 3 + assert len(joined.columns) == 3 - pytest.raises(Exception, lp1.join, - self.panel.filter(['ItemB', 'ItemC'])) + pytest.raises(Exception, lp1.join, + self.panel.filter(['ItemB', 'ItemC'])) def test_panel_index(): @@ -2685,8 +2608,8 @@ def test_panel_index(): tm.assert_index_equal(index, expected) +@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_panel_np_all(): - with catch_warnings(record=True): - wp = Panel({"A": DataFrame({'b': [1, 2]})}) + wp = Panel({"A": DataFrame({'b': [1, 2]})}) result = np.all(wp) assert result == np.bool_(True) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 669fa9742a705..377253574d2c1 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -1,6 +1,6 @@ # pylint: disable=E1101 -from warnings import catch_warnings +from warnings import catch_warnings, simplefilter from datetime import datetime, timedelta from functools import partial from textwrap import dedent @@ -1463,6 +1463,7 @@ def test_resample_panel(self): n = len(rng) with catch_warnings(record=True): + simplefilter("ignore", FutureWarning) panel = Panel(np.random.randn(3, n, 5), items=['one', 'two', 'three'], major_axis=rng, @@ -1485,6 +1486,7 @@ def p_apply(panel, f): lambda x: x.resample('M', axis=1).mean()) tm.assert_panel_equal(result, expected) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_resample_panel_numpy(self): rng = date_range('1/1/2000', '6/30/2000') n = len(rng) @@ -3237,25 +3239,25 @@ def test_apply_iteration(self): result = grouped.apply(f) tm.assert_index_equal(result.index, df.index) + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_panel_aggregation(self): ind = pd.date_range('1/1/2000', periods=100) data = np.random.randn(2, len(ind), 4) - with catch_warnings(record=True): - wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, - minor_axis=['A', 'B', 'C', 'D']) + wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, + minor_axis=['A', 'B', 'C', 'D']) - tg = TimeGrouper('M', axis=1) - _, grouper, _ = tg._get_grouper(wp) - bingrouped = wp.groupby(grouper) - binagg = bingrouped.mean() + tg = TimeGrouper('M', axis=1) + _, grouper, _ = tg._get_grouper(wp) + bingrouped = wp.groupby(grouper) + binagg = bingrouped.mean() - def f(x): - assert (isinstance(x, Panel)) - return x.mean(1) + def f(x): + assert (isinstance(x, Panel)) + return x.mean(1) - result = bingrouped.agg(f) - tm.assert_panel_equal(result, binagg) + result = bingrouped.agg(f) + tm.assert_panel_equal(result, binagg) def test_fails_on_no_datetime_index(self): index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index ec6d83062c8b0..052bfd2b858fb 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -153,6 +153,8 @@ def test_agg(self): tm.assert_frame_equal(result, expected) with catch_warnings(record=True): + # using a dict with renaming + warnings.simplefilter("ignore", FutureWarning) result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) expected = concat([a_mean, a_sum], axis=1) expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), @@ -160,6 +162,7 @@ def test_agg(self): tm.assert_frame_equal(result, expected, check_like=True) with catch_warnings(record=True): + warnings.simplefilter("ignore", FutureWarning) result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}, 'B': {'mean2': 'mean', @@ -223,11 +226,13 @@ def f(): expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), ( 'ra', 'std'), ('rb', 'mean'), ('rb', 'std')]) with catch_warnings(record=True): + warnings.simplefilter("ignore", FutureWarning) result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']}, 'B': {'rb': ['mean', 'std']}}) tm.assert_frame_equal(result, expected, check_like=True) with catch_warnings(record=True): + warnings.simplefilter("ignore", FutureWarning) result = r.agg({'A': {'ra': ['mean', 'std']}, 'B': {'rb': ['mean', 'std']}}) expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), ( @@ -278,6 +283,7 @@ def test_count_nonnumeric_types(self): tm.assert_frame_equal(result, expected) @td.skip_if_no_scipy + @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") def test_window_with_args(self): # make sure that we are aggregating window functions correctly with arg r = Series(np.random.randn(100)).rolling(window=10, min_periods=1, @@ -309,6 +315,7 @@ def test_preserve_metadata(self): assert s3.name == 'foo' +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") class TestWindow(Base): def setup_method(self, method): @@ -940,6 +947,7 @@ def _create_data(self): "datetime64[ns, UTC] is not supported ATM") +@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") class TestMoments(Base): def setup_method(self, method): @@ -1901,6 +1909,7 @@ def test_no_pairwise_with_other(self, f): for (df, result) in zip(self.df1s, results): if result is not None: with catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) # we can have int and str columns expected_index = df.index.union(self.df2.index) expected_columns = df.columns.union(self.df2.columns) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index f9f5fc2484bda..b8fabbf52159d 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -1825,6 +1825,7 @@ def test_weekmask_and_holidays(self): xp_egypt = datetime(2013, 5, 5) assert xp_egypt == dt + 2 * bday_egypt + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") def test_calendar(self): calendar = USFederalHolidayCalendar() dt = datetime(2014, 1, 17) @@ -1987,6 +1988,7 @@ def test_holidays(self): assert dt + bm_offset == datetime(2012, 1, 30) assert dt + 2 * bm_offset == datetime(2012, 2, 27) + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") def test_datetimeindex(self): from pandas.tseries.holiday import USFederalHolidayCalendar hcal = USFederalHolidayCalendar() @@ -2105,6 +2107,7 @@ def test_holidays(self): assert dt + bm_offset == datetime(2012, 1, 2) assert dt + 2 * bm_offset == datetime(2012, 2, 3) + @pytest.mark.filterwarnings("ignore:Non:pandas.errors.PerformanceWarning") def test_datetimeindex(self): hcal = USFederalHolidayCalendar() cbmb = CBMonthBegin(calendar=hcal) diff --git a/pandas/tests/tseries/offsets/test_offsets_properties.py b/pandas/tests/tseries/offsets/test_offsets_properties.py index f19066ba76b20..07a6895d1e231 100644 --- a/pandas/tests/tseries/offsets/test_offsets_properties.py +++ b/pandas/tests/tseries/offsets/test_offsets_properties.py @@ -8,6 +8,7 @@ You may wish to consult the previous version for inspiration on further tests, or when trying to pin down the bugs exposed by the tests below. """ +import warnings import pytest from hypothesis import given, assume, strategies as st @@ -25,6 +26,11 @@ # ---------------------------------------------------------------- # Helpers for generating random data +with warnings.catch_warnings(): + warnings.simplefilter('ignore') + min_dt = pd.Timestamp(1900, 1, 1).to_pydatetime(), + max_dt = pd.Timestamp(1900, 1, 1).to_pydatetime(), + gen_date_range = st.builds( pd.date_range, start=st.datetimes( @@ -38,8 +44,8 @@ ) gen_random_datetime = st.datetimes( - min_value=pd.Timestamp.min.to_pydatetime(), - max_value=pd.Timestamp.max.to_pydatetime(), + min_value=min_dt, + max_value=max_dt, timezones=st.one_of(st.none(), dateutil_timezones(), pytz_timezones()) ) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 14c9ca1f6cc54..466a22e5916e9 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -92,6 +92,7 @@ def test_parsers_monthfreq(self): assert result1 == expected +@pytest.mark.filterwarnings("ignore:_timelex:DeprecationWarning") class TestGuessDatetimeFormat(object): @td.skip_if_not_us_locale @@ -160,6 +161,8 @@ def test_guess_datetime_format_invalid_inputs(self): ('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'), ('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'), ('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')]) + # https://github.com/pandas-dev/pandas/issues/21322 for _timelex + @pytest.mark.filterwarnings("ignore:_timelex:DeprecationWarning") def test_guess_datetime_format_nopadding(self, string, format): # GH 11142 result = parsing._guess_datetime_format(string) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 0c14dcb49c56f..b62260071d996 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -1,7 +1,6 @@ import pytest import datetime -from warnings import catch_warnings import numpy as np import pandas as pd @@ -216,12 +215,12 @@ def test_categorical_with_nan_consistency(self): assert result[0] in expected assert result[1] in expected + @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_pandas_errors(self): with pytest.raises(TypeError): hash_pandas_object(pd.Timestamp('20130101')) - with catch_warnings(record=True): - obj = tm.makePanel() + obj = tm.makePanel() with pytest.raises(TypeError): hash_pandas_object(obj) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 33dcf6d64b302..b9c89c4e314f9 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -1,6 +1,7 @@ import warnings from pandas import DateOffset, DatetimeIndex, Series, Timestamp +from pandas.errors import PerformanceWarning from pandas.compat import add_metaclass from datetime import datetime, timedelta from dateutil.relativedelta import MO, TU, WE, TH, FR, SA, SU # noqa @@ -281,7 +282,8 @@ def _apply_rule(self, dates): # if we are adding a non-vectorized value # ignore the PerformanceWarnings: - with warnings.catch_warnings(record=True): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", PerformanceWarning) dates += offset return dates diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1e8c123fa6f13..edd0b0aa82d23 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -205,8 +205,12 @@ def decompress_file(path, compression): msg = 'Unrecognized compression type: {}'.format(compression) raise ValueError(msg) - yield f - f.close() + try: + yield f + finally: + f.close() + if compression == "zip": + zip_file.close() def assert_almost_equal(left, right, check_dtype="equiv", @@ -1897,6 +1901,7 @@ def makePeriodFrame(nper=None): def makePanel(nper=None): with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", "\\nPanel", FutureWarning) cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] data = {c: makeTimeDataFrame(nper) for c in cols} return Panel.fromDict(data) @@ -1904,6 +1909,7 @@ def makePanel(nper=None): def makePeriodPanel(nper=None): with warnings.catch_warnings(record=True): + warnings.filterwarnings("ignore", "\\nPanel", FutureWarning) cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] data = {c: makePeriodFrame(nper) for c in cols} return Panel.fromDict(data) diff --git a/setup.cfg b/setup.cfg index 021159bad99de..fb42dfd3b6d15 100644 --- a/setup.cfg +++ b/setup.cfg @@ -40,8 +40,7 @@ markers = high_memory: mark a test as a high-memory only clipboard: mark a pd.read_clipboard test doctest_optionflags = NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL -addopts = --strict-data-files - +addopts = --strict-data-files --durations=10 [coverage:run] branch = False From 73ff71e6cd62e3816ff2f5997d28e42f3dd6a0f2 Mon Sep 17 00:00:00 2001 From: Jay Offerdahl Date: Wed, 19 Sep 2018 09:16:10 -0500 Subject: [PATCH 78/86] BUG: Allow IOErrors when attempting to retrieve default client encoding. (#21531) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/io/formats/console.py | 2 +- pandas/tests/io/formats/test_console.py | 74 +++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/io/formats/test_console.py diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3a44b0260153c..d1ede31fd5d1d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -749,6 +749,7 @@ I/O - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) +- Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) Plotting ^^^^^^^^ diff --git a/pandas/io/formats/console.py b/pandas/io/formats/console.py index 45d50ea3fa073..b8b28a0b0c98c 100644 --- a/pandas/io/formats/console.py +++ b/pandas/io/formats/console.py @@ -21,7 +21,7 @@ def detect_console_encoding(): encoding = None try: encoding = sys.stdout.encoding or sys.stdin.encoding - except AttributeError: + except (AttributeError, IOError): pass # try again for something better diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py new file mode 100644 index 0000000000000..055763bf62d6e --- /dev/null +++ b/pandas/tests/io/formats/test_console.py @@ -0,0 +1,74 @@ +import pytest + +from pandas.io.formats.console import detect_console_encoding + + +class MockEncoding(object): # TODO(py27): replace with mock + """ + Used to add a side effect when accessing the 'encoding' property. If the + side effect is a str in nature, the value will be returned. Otherwise, the + side effect should be an exception that will be raised. + """ + def __init__(self, encoding): + super(MockEncoding, self).__init__() + self.val = encoding + + @property + def encoding(self): + return self.raise_or_return(self.val) + + @staticmethod + def raise_or_return(val): + if isinstance(val, str): + return val + else: + raise val + + +@pytest.mark.parametrize('empty,filled', [ + ['stdin', 'stdout'], + ['stdout', 'stdin'] +]) +def test_detect_console_encoding_from_stdout_stdin(monkeypatch, empty, filled): + # Ensures that when sys.stdout.encoding or sys.stdin.encoding is used when + # they have values filled. + # GH 21552 + with monkeypatch.context() as context: + context.setattr('sys.{}'.format(empty), MockEncoding('')) + context.setattr('sys.{}'.format(filled), MockEncoding(filled)) + assert detect_console_encoding() == filled + + +@pytest.mark.parametrize('encoding', [ + AttributeError, + IOError, + 'ascii' +]) +def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding): + # GH 21552 + with monkeypatch.context() as context: + context.setattr('locale.getpreferredencoding', lambda: 'foo') + context.setattr('sys.stdout', MockEncoding(encoding)) + assert detect_console_encoding() == 'foo' + + +@pytest.mark.parametrize('std,locale', [ + ['ascii', 'ascii'], + ['ascii', Exception], + [AttributeError, 'ascii'], + [AttributeError, Exception], + [IOError, 'ascii'], + [IOError, Exception] +]) +def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale): + # When both the stdout/stdin encoding and locale preferred encoding checks + # fail (or return 'ascii', we should default to the sys default encoding. + # GH 21552 + with monkeypatch.context() as context: + context.setattr( + 'locale.getpreferredencoding', + lambda: MockEncoding.raise_or_return(locale) + ) + context.setattr('sys.stdout', MockEncoding(std)) + context.setattr('sys.getdefaultencoding', lambda: 'sysDefaultEncoding') + assert detect_console_encoding() == 'sysDefaultEncoding' From b7d9884d808c3bea3d50f203a304e22dc5424309 Mon Sep 17 00:00:00 2001 From: alimcmaster1 Date: Wed, 19 Sep 2018 15:23:26 +0100 Subject: [PATCH 79/86] API: Git version (#22745) --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/__init__.py | 1 + pandas/tests/test_common.py | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d1ede31fd5d1d..487d5d0d2accd 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -185,7 +185,7 @@ Other Enhancements - :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`). - :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`). - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - +- New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). .. _whatsnew_0240.api_breaking: Backwards incompatible API changes diff --git a/pandas/__init__.py b/pandas/__init__.py index f91d0aa84e0ff..e446782d9665e 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -80,6 +80,7 @@ from ._version import get_versions v = get_versions() __version__ = v.get('closest-tag', v['version']) +__git_version__ = v.get('full-revisionid') del get_versions, v # module level doc-string diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 868525e818b62..ae46bee901ff2 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,11 +1,13 @@ # -*- coding: utf-8 -*- import collections +import string from functools import partial import numpy as np import pytest +import pandas as pd from pandas import Series, Timestamp from pandas.core import ( common as com, @@ -110,3 +112,10 @@ def test_standardize_mapping(): dd = collections.defaultdict(list) assert isinstance(com.standardize_mapping(dd), partial) + + +def test_git_version(): + # GH 21295 + git_version = pd.__git_version__ + assert len(git_version) == 40 + assert all(c in string.hexdigits for c in git_version) From 22b2e4ab61d5a52a161430e5b1a26a7dd9a62c05 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 19 Sep 2018 15:39:46 +0100 Subject: [PATCH 80/86] DOC: add more links to the API in advanced.rst (#22746) --- doc/source/advanced.rst | 63 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 611afb3670ebc..835c4cc9d4ab3 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -15,7 +15,8 @@ MultiIndex / Advanced Indexing ****************************** -This section covers indexing with a ``MultiIndex`` and :ref:`more advanced indexing features `. +This section covers :ref:`indexing with a MultiIndex ` +and :ref:`other advanced indexing features `. See the :ref:`Indexing and Selecting Data ` for general indexing documentation. @@ -37,7 +38,7 @@ Hierarchical / Multi-level indexing is very exciting as it opens the door to som quite sophisticated data analysis and manipulation, especially for working with higher dimensional data. In essence, it enables you to store and manipulate data with an arbitrary number of dimensions in lower dimensional data -structures like Series (1d) and DataFrame (2d). +structures like ``Series`` (1d) and ``DataFrame`` (2d). In this section, we will show what exactly we mean by "hierarchical" indexing and how it integrates with all of the pandas indexing functionality @@ -83,8 +84,8 @@ to use the :meth:`MultiIndex.from_product` method: iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']] pd.MultiIndex.from_product(iterables, names=['first', 'second']) -As a convenience, you can pass a list of arrays directly into Series or -DataFrame to construct a ``MultiIndex`` automatically: +As a convenience, you can pass a list of arrays directly into ``Series`` or +``DataFrame`` to construct a ``MultiIndex`` automatically: .. ipython:: python @@ -213,8 +214,8 @@ tuples: s + s[:-2] s + s[::2] -``reindex`` can be called with another ``MultiIndex``, or even a list or array -of tuples: +The :meth:`~DataFrame.reindex` method of ``Series``/``DataFrames`` can be +called with another ``MultiIndex``, or even a list or array of tuples: .. ipython:: python @@ -413,7 +414,7 @@ selecting data at a particular level of a ``MultiIndex`` easier. # using the slicers df.loc[(slice(None),'one'),:] -You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by +You can also select on the columns with ``xs``, by providing the axis argument. .. ipython:: python @@ -426,7 +427,7 @@ providing the axis argument. # using the slicers df.loc[:,(slice(None),'one')] -:meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys. +``xs`` also allows selection with multiple keys. .. ipython:: python @@ -437,7 +438,7 @@ providing the axis argument. # using the slicers df.loc[:,('bar','one')] -You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain +You can pass ``drop_level=False`` to ``xs`` to retain the level that was selected. .. ipython:: python @@ -460,9 +461,9 @@ Compare the above with the result using ``drop_level=True`` (the default value). Advanced reindexing and alignment ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The parameter ``level`` has been added to the ``reindex`` and ``align`` methods -of pandas objects. This is useful to broadcast values across a level. For -instance: +Using the parameter ``level`` in the :meth:`~DataFrame.reindex` and +:meth:`~DataFrame.align` methods of pandas objects is useful to broadcast +values across a level. For instance: .. ipython:: python @@ -480,10 +481,10 @@ instance: df2_aligned -Swapping levels with :meth:`~pandas.MultiIndex.swaplevel` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Swapping levels with ``swaplevel`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``swaplevel`` function can switch the order of two levels: +The :meth:`~MultiIndex.swaplevel` method can switch the order of two levels: .. ipython:: python @@ -492,21 +493,21 @@ The ``swaplevel`` function can switch the order of two levels: .. _advanced.reorderlevels: -Reordering levels with :meth:`~pandas.MultiIndex.reorder_levels` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Reordering levels with ``reorder_levels`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``reorder_levels`` function generalizes the ``swaplevel`` function, -allowing you to permute the hierarchical index levels in one step: +The :meth:`~MultiIndex.reorder_levels` method generalizes the ``swaplevel`` +method, allowing you to permute the hierarchical index levels in one step: .. ipython:: python df[:5].reorder_levels([1,0], axis=0) -Sorting a :class:`~pandas.MultiIndex` -------------------------------------- +Sorting a ``MultiIndex`` +------------------------ -For MultiIndex-ed objects to be indexed and sliced effectively, they need -to be sorted. As with any index, you can use ``sort_index``. +For :class:`MultiIndex`-ed objects to be indexed and sliced effectively, +they need to be sorted. As with any index, you can use :meth:`~DataFrame.sort_index`. .. ipython:: python @@ -658,9 +659,9 @@ faster than fancy indexing. Index Types ----------- -We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex`` -are shown :ref:`here `, and information about -``TimedeltaIndex`` is found :ref:`here `. +We have discussed ``MultiIndex`` in the previous sections pretty extensively. +Documentation about ``DatetimeIndex`` and ``PeriodIndex`` are shown :ref:`here `, +and documentation about ``TimedeltaIndex`` is found :ref:`here `. In the following sub-sections we will highlight some other index types. @@ -1004,8 +1005,8 @@ Non-monotonic indexes require exact matches If the index of a ``Series`` or ``DataFrame`` is monotonically increasing or decreasing, then the bounds of a label-based slice can be outside the range of the index, much like slice indexing a -normal Python ``list``. Monotonicity of an index can be tested with the ``is_monotonic_increasing`` and -``is_monotonic_decreasing`` attributes. +normal Python ``list``. Monotonicity of an index can be tested with the :meth:`~Index.is_monotonic_increasing` and +:meth:`~Index.is_monotonic_decreasing` attributes. .. ipython:: python @@ -1039,9 +1040,9 @@ On the other hand, if the index is not monotonic, then both slice bounds must be In [11]: df.loc[2:3, :] KeyError: 'Cannot get right slice bound for non-unique label: 3' -:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that +``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that an index is weakly monotonic. To check for strict monotonicity, you can combine one of those with -:meth:`Index.is_unique` +the :meth:`~Index.is_unique` attribute. .. ipython:: python @@ -1057,7 +1058,7 @@ Compared with standard Python sequence slicing in which the slice endpoint is not inclusive, label-based slicing in pandas **is inclusive**. The primary reason for this is that it is often not possible to easily determine the "successor" or next element after a particular label in an index. For example, -consider the following Series: +consider the following ``Series``: .. ipython:: python From 27ea6562d397f3a25f75b24e3b620b22a6419d68 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Wed, 19 Sep 2018 11:37:36 -0400 Subject: [PATCH 81/86] DOC: Fix DataFrame.to_xarray doctests and allow the CI to run it. (#22673) --- ci/doctests.sh | 2 +- pandas/core/generic.py | 114 +++++++++++++++++++---------------------- 2 files changed, 53 insertions(+), 63 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index a941515fde4ae..e7fe80e60eb6d 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -35,7 +35,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/generic.py \ - -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -to_xarray -transpose -values -xs" + -k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -sample -to_json -transpose -values -xs" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 373830ec7892e..3f7334131e146 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2500,80 +2500,70 @@ def to_xarray(self): Returns ------- - a DataArray for a Series - a Dataset for a DataFrame - a DataArray for higher dims + xarray.DataArray or xarray.Dataset + Data in the pandas structure converted to Dataset if the object is + a DataFrame, or a DataArray if the object is a Series. + + See Also + -------- + DataFrame.to_hdf : Write DataFrame to an HDF5 file. + DataFrame.to_parquet : Write a DataFrame to the binary parquet format. Examples -------- - >>> df = pd.DataFrame({'A' : [1, 1, 2], - 'B' : ['foo', 'bar', 'foo'], - 'C' : np.arange(4.,7)}) + >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), + ... ('parrot', 'bird', 24.0, 2), + ... ('lion', 'mammal', 80.5, 4), + ... ('monkey', 'mammal', np.nan, 4)], + ... columns=['name', 'class', 'max_speed', + ... 'num_legs']) >>> df - A B C - 0 1 foo 4.0 - 1 1 bar 5.0 - 2 2 foo 6.0 + name class max_speed num_legs + 0 falcon bird 389.0 2 + 1 parrot bird 24.0 2 + 2 lion mammal 80.5 4 + 3 monkey mammal NaN 4 >>> df.to_xarray() - Dimensions: (index: 3) + Dimensions: (index: 4) Coordinates: - * index (index) int64 0 1 2 + * index (index) int64 0 1 2 3 Data variables: - A (index) int64 1 1 2 - B (index) object 'foo' 'bar' 'foo' - C (index) float64 4.0 5.0 6.0 - - >>> df = pd.DataFrame({'A' : [1, 1, 2], - 'B' : ['foo', 'bar', 'foo'], - 'C' : np.arange(4.,7)} - ).set_index(['B','A']) - >>> df - C - B A - foo 1 4.0 - bar 1 5.0 - foo 2 6.0 - - >>> df.to_xarray() + name (index) object 'falcon' 'parrot' 'lion' 'monkey' + class (index) object 'bird' 'bird' 'mammal' 'mammal' + max_speed (index) float64 389.0 24.0 80.5 nan + num_legs (index) int64 2 2 4 4 + + >>> df['max_speed'].to_xarray() + + array([389. , 24. , 80.5, nan]) + Coordinates: + * index (index) int64 0 1 2 3 + + >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', + ... '2018-01-02', '2018-01-02']) + >>> df_multiindex = pd.DataFrame({'date': dates, + ... 'animal': ['falcon', 'parrot', 'falcon', + ... 'parrot'], + ... 'speed': [350, 18, 361, 15]}).set_index(['date', + ... 'animal']) + >>> df_multiindex + speed + date animal + 2018-01-01 falcon 350 + parrot 18 + 2018-01-02 falcon 361 + parrot 15 + + >>> df_multiindex.to_xarray() - Dimensions: (A: 2, B: 2) + Dimensions: (animal: 2, date: 2) Coordinates: - * B (B) object 'bar' 'foo' - * A (A) int64 1 2 + * date (date) datetime64[ns] 2018-01-01 2018-01-02 + * animal (animal) object 'falcon' 'parrot' Data variables: - C (B, A) float64 5.0 nan 4.0 6.0 - - >>> p = pd.Panel(np.arange(24).reshape(4,3,2), - items=list('ABCD'), - major_axis=pd.date_range('20130101', periods=3), - minor_axis=['first', 'second']) - >>> p - - Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis) - Items axis: A to D - Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00 - Minor_axis axis: first to second - - >>> p.to_xarray() - - array([[[ 0, 1], - [ 2, 3], - [ 4, 5]], - [[ 6, 7], - [ 8, 9], - [10, 11]], - [[12, 13], - [14, 15], - [16, 17]], - [[18, 19], - [20, 21], - [22, 23]]]) - Coordinates: - * items (items) object 'A' 'B' 'C' 'D' - * major_axis (major_axis) datetime64[ns] 2013-01-01 2013-01-02 2013-01-03 # noqa - * minor_axis (minor_axis) object 'first' 'second' + speed (date, animal) int64 350 18 361 15 Notes ----- From 4a2a24c3242797d76de1982c75874843b078d60e Mon Sep 17 00:00:00 2001 From: "azure-pipelines[bot]" Date: Wed, 19 Sep 2018 10:42:24 -0500 Subject: [PATCH 82/86] Set up CI with Azure Pipelines (#22760) --- .travis.yml | 5 - appveyor.yml | 91 ------------------ azure-pipelines.yml | 25 +++++ ci/{travis-35-osx.yaml => azure-macos-35.yml} | 0 ...appveyor-27.yaml => azure-windows-27.yaml} | 0 ...appveyor-36.yaml => azure-windows-36.yaml} | 0 ci/azure/macos.yml | 39 ++++++++ ci/azure/windows-py27.yml | 41 +++++++++ ci/azure/windows.yml | 32 +++++++ ci/incremental/build.cmd | 10 ++ ci/incremental/build.sh | 18 ++++ ci/incremental/install_miniconda.sh | 19 ++++ ci/incremental/setup_conda_environment.cmd | 21 +++++ ci/incremental/setup_conda_environment.sh | 48 ++++++++++ ci/install.ps1 | 92 ------------------- 15 files changed, 253 insertions(+), 188 deletions(-) delete mode 100644 appveyor.yml create mode 100644 azure-pipelines.yml rename ci/{travis-35-osx.yaml => azure-macos-35.yml} (100%) rename ci/{appveyor-27.yaml => azure-windows-27.yaml} (100%) rename ci/{appveyor-36.yaml => azure-windows-36.yaml} (100%) create mode 100644 ci/azure/macos.yml create mode 100644 ci/azure/windows-py27.yml create mode 100644 ci/azure/windows.yml create mode 100644 ci/incremental/build.cmd create mode 100755 ci/incremental/build.sh create mode 100755 ci/incremental/install_miniconda.sh create mode 100644 ci/incremental/setup_conda_environment.cmd create mode 100755 ci/incremental/setup_conda_environment.sh delete mode 100644 ci/install.ps1 diff --git a/.travis.yml b/.travis.yml index 76f4715a4abb2..a180e83eeec21 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,11 +30,6 @@ matrix: exclude: # Exclude the default Python 3.5 build - python: 3.5 - include: - - os: osx - language: generic - env: - - JOB="3.5, OSX" ENV_FILE="ci/travis-35-osx.yaml" TEST_ARGS="--skip-slow --skip-network" - dist: trusty env: diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index c6199c1493f22..0000000000000 --- a/appveyor.yml +++ /dev/null @@ -1,91 +0,0 @@ -# With infos from -# http://tjelvarolsson.com/blog/how-to-continuously-test-your-python-code-on-windows-using-appveyor/ -# https://packaging.python.org/en/latest/appveyor/ -# https://github.com/rmcgibbo/python-appveyor-conda-example - -# Backslashes in quotes need to be escaped: \ -> "\\" - -matrix: - fast_finish: true # immediately finish build once one of the jobs fails. - -environment: - global: - # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script interpreter - # See: http://stackoverflow.com/a/13751649/163740 - CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\ci\\run_with_env.cmd" - clone_folder: C:\projects\pandas - PANDAS_TESTING_MODE: "deprecate" - - matrix: - - - CONDA_ROOT: "C:\\Miniconda3_64" - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 - PYTHON_VERSION: "3.6" - PYTHON_ARCH: "64" - CONDA_PY: "36" - CONDA_NPY: "113" - - - CONDA_ROOT: "C:\\Miniconda3_64" - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 - PYTHON_VERSION: "2.7" - PYTHON_ARCH: "64" - CONDA_PY: "27" - CONDA_NPY: "110" - -# We always use a 64-bit machine, but can build x86 distributions -# with the PYTHON_ARCH variable (which is used by CMD_IN_ENV). -platform: - - x64 - -# all our python builds have to happen in tests_script... -build: false - -install: - # cancel older builds for the same PR - - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` - https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` - Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` - throw "There are newer queued builds for this pull request, failing early." } - - # this installs the appropriate Miniconda (Py2/Py3, 32/64 bit) - # updates conda & installs: conda-build jinja2 anaconda-client - - powershell .\ci\install.ps1 - - SET PATH=%CONDA_ROOT%;%CONDA_ROOT%\Scripts;%PATH% - - echo "install" - - cd - - ls -ltr - - git tag --sort v:refname - - # this can conflict with git - - cmd: rmdir C:\cygwin /s /q - - # install our build environment - - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false - - cmd: conda update -q conda - - cmd: conda config --set ssl_verify false - - # add the pandas channel *before* defaults to have defaults take priority - - cmd: conda config --add channels conda-forge - - cmd: conda config --add channels pandas - - cmd: conda config --remove channels defaults - - cmd: conda config --add channels defaults - - # this is now the downloaded conda... - - cmd: conda info -a - - # create our env - - cmd: conda env create -q -n pandas --file=ci\appveyor-%CONDA_PY%.yaml - - cmd: activate pandas - - cmd: conda list -n pandas - # uninstall pandas if it's present - - cmd: conda remove pandas -y --force & exit 0 - - cmd: pip uninstall -y pandas & exit 0 - - # build em using the local source checkout in the correct windows env - - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace' - -test_script: - # tests - - cmd: activate pandas - - cmd: test.bat diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 0000000000000..c82dafa224961 --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,25 @@ +# Adapted from https://github.com/numba/numba/blob/master/azure-pipelines.yml +jobs: +# Mac and Linux could potentially use the same template +# except it isn't clear how to use a different build matrix +# for each, so for now they are separate +- template: ci/azure/macos.yml + parameters: + name: macOS + vmImage: xcode9-macos10.13 +# - template: ci/azure/linux.yml +# parameters: +# name: Linux +# vmImage: ubuntu-16.04 + +# Windows Python 2.7 needs VC 9.0 installed, and not sure +# how to make that a conditional task, so for now these are +# separate templates as well +- template: ci/azure/windows.yml + parameters: + name: Windows + vmImage: vs2017-win2017 +- template: ci/azure/windows-py27.yml + parameters: + name: WindowsPy27 + vmImage: vs2017-win2017 diff --git a/ci/travis-35-osx.yaml b/ci/azure-macos-35.yml similarity index 100% rename from ci/travis-35-osx.yaml rename to ci/azure-macos-35.yml diff --git a/ci/appveyor-27.yaml b/ci/azure-windows-27.yaml similarity index 100% rename from ci/appveyor-27.yaml rename to ci/azure-windows-27.yaml diff --git a/ci/appveyor-36.yaml b/ci/azure-windows-36.yaml similarity index 100% rename from ci/appveyor-36.yaml rename to ci/azure-windows-36.yaml diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml new file mode 100644 index 0000000000000..25b66615dac7e --- /dev/null +++ b/ci/azure/macos.yml @@ -0,0 +1,39 @@ +parameters: + name: '' + vmImage: '' + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + maxParallel: 11 + matrix: + py35_np_110: + ENV_FILE: ci/azure-macos-35.yml + CONDA_PY: "35" + CONDA_ENV: pandas + TEST_ARGS: "--skip-slow --skip-network" + + steps: + - script: | + if [ "$(uname)" == "Linux" ]; then sudo apt-get install -y libc6-dev-i386; fi + echo "Installing Miniconda" + ci/incremental/install_miniconda.sh + export PATH=$HOME/miniconda3/bin:$PATH + echo "Setting up Conda environment" + ci/incremental/setup_conda_environment.sh + displayName: 'Before Install' + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + ci/incremental/build.sh + displayName: 'Build' + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + ci/script_single.sh + ci/script_multi.sh + echo "[Test done]" + displayName: 'Test' + - script: | + export PATH=$HOME/miniconda3/bin:$PATH + source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml new file mode 100644 index 0000000000000..e60844896b71c --- /dev/null +++ b/ci/azure/windows-py27.yml @@ -0,0 +1,41 @@ +parameters: + name: '' + vmImage: '' + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + maxParallel: 11 + matrix: + py36_np14: + ENV_FILE: ci/azure-windows-27.yml + CONDA_PY: "27" + CONDA_ENV: pandas + + steps: + - task: CondaEnvironment@1 + inputs: + updateConda: no + packageSpecs: '' + + # Need to install VC 9.0 only for Python 2.7 + # Once we understand how to do tasks conditional on build matrix variables + # we could merge this into azure-windows.yml + - powershell: | + $wc = New-Object net.webclient + $wc.Downloadfile("https://download.microsoft.com/download/7/9/6/796EF2E4-801B-4FC4-AB28-B59FBF6D907B/VCForPython27.msi", "VCForPython27.msi") + Start-Process "VCForPython27.msi" /qn -Wait + displayName: 'Install VC 9.0' + + - script: | + ci\\incremental\\setup_conda_environment.cmd + displayName: 'Before Install' + - script: | + ci\\incremental\\build.cmd + displayName: 'Build' + - script: | + call activate %CONDA_ENV% + pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* + displayName: 'Test' diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml new file mode 100644 index 0000000000000..6090139fb4f3e --- /dev/null +++ b/ci/azure/windows.yml @@ -0,0 +1,32 @@ +parameters: + name: '' + vmImage: '' + +jobs: +- job: ${{ parameters.name }} + pool: + vmImage: ${{ parameters.vmImage }} + strategy: + maxParallel: 11 + matrix: + py36_np14: + ENV_FILE: ci/azure-windows-36.yml + CONDA_PY: "36" + CONDA_ENV: pandas + + steps: + - task: CondaEnvironment@1 + inputs: + updateConda: no + packageSpecs: '' + + - script: | + ci\\incremental\\setup_conda_environment.cmd + displayName: 'Before Install' + - script: | + ci\\incremental\\build.cmd + displayName: 'Build' + - script: | + call activate %CONDA_ENV% + pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* + displayName: 'Test' diff --git a/ci/incremental/build.cmd b/ci/incremental/build.cmd new file mode 100644 index 0000000000000..d2fd06d7d9e50 --- /dev/null +++ b/ci/incremental/build.cmd @@ -0,0 +1,10 @@ +@rem https://github.com/numba/numba/blob/master/buildscripts/incremental/build.cmd +call activate %CONDA_ENV% + +@rem Build numba extensions without silencing compile errors +python setup.py build_ext -q --inplace + +@rem Install pandas locally +python -m pip install -e . + +if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/ci/incremental/build.sh b/ci/incremental/build.sh new file mode 100755 index 0000000000000..8f2301a3b7ef5 --- /dev/null +++ b/ci/incremental/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +source activate $CONDA_ENV + +# Make sure any error below is reported as such +set -v -e + +echo "[building extensions]" +python setup.py build_ext -q --inplace +python -m pip install -e . + +echo +echo "[show environment]" +conda list + +echo +echo "[done]" +exit 0 diff --git a/ci/incremental/install_miniconda.sh b/ci/incremental/install_miniconda.sh new file mode 100755 index 0000000000000..a47dfdb324b34 --- /dev/null +++ b/ci/incremental/install_miniconda.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -v -e + +# Install Miniconda +unamestr=`uname` +if [[ "$unamestr" == 'Linux' ]]; then + if [[ "$BITS32" == "yes" ]]; then + wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86.sh -O miniconda.sh + else + wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + fi +elif [[ "$unamestr" == 'Darwin' ]]; then + wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh +else + echo Error +fi +chmod +x miniconda.sh +./miniconda.sh -b diff --git a/ci/incremental/setup_conda_environment.cmd b/ci/incremental/setup_conda_environment.cmd new file mode 100644 index 0000000000000..b4446c49fabd3 --- /dev/null +++ b/ci/incremental/setup_conda_environment.cmd @@ -0,0 +1,21 @@ +@rem https://github.com/numba/numba/blob/master/buildscripts/incremental/setup_conda_environment.cmd +@rem The cmd /C hack circumvents a regression where conda installs a conda.bat +@rem script in non-root environments. +set CONDA_INSTALL=cmd /C conda install -q -y +set PIP_INSTALL=pip install -q + +@echo on + +@rem Deactivate any environment +call deactivate +@rem Display root environment (for debugging) +conda list +@rem Clean up any left-over from a previous build +conda remove --all -q -y -n %CONDA_ENV% +@rem Scipy, CFFI, jinja2 and IPython are optional dependencies, but exercised in the test suite +conda env create -n %CONDA_ENV% --file=ci\azure-windows-%CONDA_PY%.yaml + +call activate %CONDA_ENV% +conda list + +if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/ci/incremental/setup_conda_environment.sh b/ci/incremental/setup_conda_environment.sh new file mode 100755 index 0000000000000..c716a39138644 --- /dev/null +++ b/ci/incremental/setup_conda_environment.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +set -v -e + +CONDA_INSTALL="conda install -q -y" +PIP_INSTALL="pip install -q" + +# Deactivate any environment +source deactivate +# Display root environment (for debugging) +conda list +# Clean up any left-over from a previous build +# (note workaround for https://github.com/conda/conda/issues/2679: +# `conda env remove` issue) +conda remove --all -q -y -n $CONDA_ENV + +echo +echo "[create env]" +time conda env create -q -n "${CONDA_ENV}" --file="${ENV_FILE}" || exit 1 + +# Activate first +set +v +source activate $CONDA_ENV +set -v + +# remove any installed pandas package +# w/o removing anything else +echo +echo "[removing installed pandas]" +conda remove pandas -y --force +pip uninstall -y pandas + +echo +echo "[no installed pandas]" +conda list pandas + +# # Install the compiler toolchain +# if [[ $(uname) == Linux ]]; then +# if [[ "$CONDA_SUBDIR" == "linux-32" || "$BITS32" == "yes" ]] ; then +# $CONDA_INSTALL gcc_linux-32 gxx_linux-32 +# else +# $CONDA_INSTALL gcc_linux-64 gxx_linux-64 +# fi +# elif [[ $(uname) == Darwin ]]; then +# $CONDA_INSTALL clang_osx-64 clangxx_osx-64 +# # Install llvm-openmp and intel-openmp on OSX too +# $CONDA_INSTALL llvm-openmp intel-openmp +# fi diff --git a/ci/install.ps1 b/ci/install.ps1 deleted file mode 100644 index 64ec7f81884cd..0000000000000 --- a/ci/install.ps1 +++ /dev/null @@ -1,92 +0,0 @@ -# Sample script to install Miniconda under Windows -# Authors: Olivier Grisel, Jonathan Helmus and Kyle Kastner, Robert McGibbon -# License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/ - -$MINICONDA_URL = "http://repo.continuum.io/miniconda/" - - -function DownloadMiniconda ($python_version, $platform_suffix) { - $webclient = New-Object System.Net.WebClient - $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe" - $url = $MINICONDA_URL + $filename - - $basedir = $pwd.Path + "\" - $filepath = $basedir + $filename - if (Test-Path $filename) { - Write-Host "Reusing" $filepath - return $filepath - } - - # Download and retry up to 3 times in case of network transient errors. - Write-Host "Downloading" $filename "from" $url - $retry_attempts = 2 - for($i=0; $i -lt $retry_attempts; $i++){ - try { - $webclient.DownloadFile($url, $filepath) - break - } - Catch [Exception]{ - Start-Sleep 1 - } - } - if (Test-Path $filepath) { - Write-Host "File saved at" $filepath - } else { - # Retry once to get the error message if any at the last try - $webclient.DownloadFile($url, $filepath) - } - return $filepath -} - - -function InstallMiniconda ($python_version, $architecture, $python_home) { - Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home - if (Test-Path $python_home) { - Write-Host $python_home "already exists, skipping." - return $false - } - if ($architecture -match "32") { - $platform_suffix = "x86" - } else { - $platform_suffix = "x86_64" - } - - $filepath = DownloadMiniconda $python_version $platform_suffix - Write-Host "Installing" $filepath "to" $python_home - $install_log = $python_home + ".log" - $args = "/S /D=$python_home" - Write-Host $filepath $args - Start-Process -FilePath $filepath -ArgumentList $args -Wait -Passthru - if (Test-Path $python_home) { - Write-Host "Python $python_version ($architecture) installation complete" - } else { - Write-Host "Failed to install Python in $python_home" - Get-Content -Path $install_log - Exit 1 - } -} - - -function InstallCondaPackages ($python_home, $spec) { - $conda_path = $python_home + "\Scripts\conda.exe" - $args = "install --yes " + $spec - Write-Host ("conda " + $args) - Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru -} - -function UpdateConda ($python_home) { - $conda_path = $python_home + "\Scripts\conda.exe" - Write-Host "Updating conda..." - $args = "update --yes conda" - Write-Host $conda_path $args - Start-Process -FilePath "$conda_path" -ArgumentList $args -Wait -Passthru -} - - -function main () { - InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT - UpdateConda $env:CONDA_ROOT - InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client" -} - -main From 96b7d8490be9ecdc40f907ccdb328529bea337e3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Sep 2018 10:49:36 -0500 Subject: [PATCH 83/86] CI: Fix travis CI (#22765) --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a180e83eeec21..40baee2c03ea0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,6 +31,7 @@ matrix: # Exclude the default Python 3.5 build - python: 3.5 + include: - dist: trusty env: - JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network" From 113ff5028448001388338f799f520e86d143af53 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Sep 2018 13:31:10 -0500 Subject: [PATCH 84/86] CI: Publish test summary (#22770) --- ci/azure/macos.yml | 4 ++++ ci/azure/windows-py27.yml | 6 +++++- ci/azure/windows.yml | 6 +++++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml index 25b66615dac7e..5bf8d18d6cbb9 100644 --- a/ci/azure/macos.yml +++ b/ci/azure/macos.yml @@ -37,3 +37,7 @@ jobs: - script: | export PATH=$HOME/miniconda3/bin:$PATH source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd + - task: PublishTestResults@2 + inputs: + testResultsFiles: '/tmp/*.xml' + testRunTitle: 'MacOS-35' diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml index e60844896b71c..3e92c96263930 100644 --- a/ci/azure/windows-py27.yml +++ b/ci/azure/windows-py27.yml @@ -37,5 +37,9 @@ jobs: displayName: 'Build' - script: | call activate %CONDA_ENV% - pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* + pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict %* displayName: 'Test' + - task: PublishTestResults@2 + inputs: + testResultsFiles: 'test-data.xml' + testRunTitle: 'Windows 27' diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 6090139fb4f3e..2ab8c6f320188 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -28,5 +28,9 @@ jobs: displayName: 'Build' - script: | call activate %CONDA_ENV% - pytest --skip-slow --skip-network pandas -n 2 -r sxX --strict %* + pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict %* displayName: 'Test' + - task: PublishTestResults@2 + inputs: + testResultsFiles: 'test-data.xml' + testRunTitle: 'Windows 36' From 5474d324566ed5c953c474135554df3b5bc0d7e5 Mon Sep 17 00:00:00 2001 From: Yeojin Kim <38222260+yeojin-dev@users.noreply.github.com> Date: Thu, 20 Sep 2018 06:17:12 +0900 Subject: [PATCH 85/86] BUG: Check types in Index.__contains__ (#22085) (#22602) --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/numeric.py | 23 +++++++++++++++++++++-- pandas/tests/indexing/test_indexing.py | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 487d5d0d2accd..9e2c20c78f489 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -723,6 +723,7 @@ Indexing - ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`) - Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`) - :class:`Index` no longer mangles ``None``, ``NaN`` and ``NaT``, i.e. they are treated as three different keys. However, for numeric Index all three are still coerced to a ``NaN`` (:issue:`22332`) +- Bug in `scalar in Index` if scalar is a float while the ``Index`` is of integer dtype (:issue:`22085`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 8d616468a87d9..7f64fb744c682 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -6,6 +6,7 @@ pandas_dtype, needs_i8_conversion, is_integer_dtype, + is_float, is_bool, is_bool_dtype, is_scalar) @@ -162,7 +163,25 @@ def insert(self, loc, item): ) -class Int64Index(NumericIndex): +class IntegerIndex(NumericIndex): + """ + This is an abstract class for Int64Index, UInt64Index. + """ + + def __contains__(self, key): + """ + Check if key is a float and has a decimal. If it has, return False. + """ + hash(key) + try: + if is_float(key) and int(key) != key: + return False + return key in self._engine + except (OverflowError, TypeError, ValueError): + return False + + +class Int64Index(IntegerIndex): __doc__ = _num_index_shared_docs['class_descr'] % _int64_descr_args _typ = 'int64index' @@ -220,7 +239,7 @@ def _assert_safe_casting(cls, data, subarr): ) -class UInt64Index(NumericIndex): +class UInt64Index(IntegerIndex): __doc__ = _num_index_shared_docs['class_descr'] % _uint64_descr_args _typ = 'uint64index' diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 33b7c1b8154c7..761c633f89da3 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -631,6 +631,21 @@ def test_mixed_index_not_contains(self, index, val): # GH 19860 assert val not in index + def test_contains_with_float_index(self): + # GH#22085 + integer_index = pd.Int64Index([0, 1, 2, 3]) + uinteger_index = pd.UInt64Index([0, 1, 2, 3]) + float_index = pd.Float64Index([0.1, 1.1, 2.2, 3.3]) + + for index in (integer_index, uinteger_index): + assert 1.1 not in index + assert 1.0 in index + assert 1 in index + + assert 1.1 in float_index + assert 1.0 not in float_index + assert 1 not in float_index + def test_index_type_coercion(self): with catch_warnings(record=True): From ecfaf47ebef2757d19c627e32b44784ac77bb47d Mon Sep 17 00:00:00 2001 From: Abeer Eltanawy Date: Fri, 21 Sep 2018 09:06:54 -0700 Subject: [PATCH 86/86] Removing -assign from pandas/ci/doctests.sh --- ci/doctests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index e7fe80e60eb6d..48774a1e4d00d 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-assign -axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + -k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" if [ $? -ne "0" ]; then RET=1