From f3b114fe63de3701135e0e107c14c555d8af3175 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Wed, 6 Jul 2016 22:14:41 +0100 Subject: [PATCH 01/44] Fix bug in contains when looking up a string in a non-monotonic datetime index and the object in question is first in the index. --- pandas/tests/indexes/test_datetimelike.py | 11 +++++++++++ pandas/tseries/base.py | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 4a664ed3542d7..e9b5403d12afc 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -721,6 +721,17 @@ def test_fillna_datetime64(self): dtype=object) self.assert_index_equal(idx.fillna('x'), exp) + def test_contains(self): + #GH13572 + dates = ['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02'] + monotonic = pd.to_datetime(sorted(dates)) + non_monotonic = pd.to_datetime(['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02']) + for idx in [non_monotonic, monotonic]: + self.assertNotIn('2015-01-06', idx) + self.assertNotIn(pd.Timestamp('2015-01-06'), idx) + for dt in reversed(dates): + self.assertIn(dt, idx) + self.assertIn(pd.Timestamp(dt), idx) class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 42631d442a990..37f7e1f284ef5 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -202,7 +202,7 @@ def _format_with_header(self, header, **kwargs): def __contains__(self, key): try: res = self.get_loc(key) - return lib.isscalar(res) or type(res) == slice or np.any(res) + return lib.isscalar(res) or type(res) == slice or np.size(res) except (KeyError, TypeError, ValueError): return False From cc0a188addb46f7b4986dce32947e66295f1bb3b Mon Sep 17 00:00:00 2001 From: adneu Date: Wed, 6 Jul 2016 17:41:28 -0400 Subject: [PATCH 02/44] BUG: Groupby.nth includes group key inconsistently #12839 closes #12839 Author: adneu Closes #13316 from adneu/12839 and squashes the following commits: 16f5cd3 [adneu] Name change ac1851a [adneu] Added docstrings/comments, and new tests. 4d73cbf [adneu] Updated tests 9b75df4 [adneu] BUG: Groupby.nth includes group key inconsistently #12839 --- doc/source/whatsnew/v0.18.2.txt | 2 +- pandas/core/groupby.py | 35 ++++++++++++++++++++++++--------- pandas/tests/test_groupby.py | 31 ++++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index be1f745537d05..b9afa7fcb7959 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -521,7 +521,7 @@ Bug Fixes - Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) - +- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`) - Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 04e4db9d1fdc6..8d33c27481d93 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -95,7 +95,7 @@ def _groupby_function(name, alias, npfunc, numeric_only=True, @Appender(_doc_template) @Appender(_local_template) def f(self): - self._set_selection_from_grouper() + self._set_group_selection() try: return self._cython_agg_general(alias, numeric_only=numeric_only) except AssertionError as e: @@ -457,8 +457,21 @@ def _selected_obj(self): else: return self.obj[self._selection] - def _set_selection_from_grouper(self): - """ we may need create a selection if we have non-level groupers """ + def _reset_group_selection(self): + """ + Clear group based selection. Used for methods needing to return info on + each group regardless of whether a group selection was previously set. + """ + if self._group_selection is not None: + self._group_selection = None + # GH12839 clear cached selection too when changing group selection + self._reset_cache('_selected_obj') + + def _set_group_selection(self): + """ + Create group based selection. Used when selection is not passed + directly but instead via a grouper. + """ grp = self.grouper if self.as_index and getattr(grp, 'groupings', None) is not None and \ self.obj.ndim > 1: @@ -468,6 +481,8 @@ def _set_selection_from_grouper(self): if len(groupers): self._group_selection = ax.difference(Index(groupers)).tolist() + # GH12839 clear selected obj cache when group selection changes + self._reset_cache('_selected_obj') def _set_result_index_ordered(self, result): # set the result index on the passed values object and @@ -511,7 +526,7 @@ def _make_wrapper(self, name): # need to setup the selection # as are not passed directly but in the grouper - self._set_selection_from_grouper() + self._set_group_selection() f = getattr(self._selected_obj, name) if not isinstance(f, types.MethodType): @@ -979,7 +994,7 @@ def mean(self, *args, **kwargs): except GroupByError: raise except Exception: # pragma: no cover - self._set_selection_from_grouper() + self._set_group_selection() f = lambda x: x.mean(axis=self.axis) return self._python_agg_general(f) @@ -997,7 +1012,7 @@ def median(self): raise except Exception: # pragma: no cover - self._set_selection_from_grouper() + self._set_group_selection() def f(x): if isinstance(x, np.ndarray): @@ -1040,7 +1055,7 @@ def var(self, ddof=1, *args, **kwargs): if ddof == 1: return self._cython_agg_general('var') else: - self._set_selection_from_grouper() + self._set_group_selection() f = lambda x: x.var(ddof=ddof) return self._python_agg_general(f) @@ -1217,7 +1232,7 @@ def nth(self, n, dropna=None): raise TypeError("n needs to be an int or a list/set/tuple of ints") nth_values = np.array(nth_values, dtype=np.intp) - self._set_selection_from_grouper() + self._set_group_selection() if not dropna: mask = np.in1d(self._cumcount_array(), nth_values) | \ @@ -1325,7 +1340,7 @@ def cumcount(self, ascending=True): dtype: int64 """ - self._set_selection_from_grouper() + self._set_group_selection() index = self._selected_obj.index cumcounts = self._cumcount_array(ascending=ascending) @@ -1403,6 +1418,7 @@ def head(self, n=5): 0 1 2 2 5 6 """ + self._reset_group_selection() mask = self._cumcount_array() < n return self._selected_obj[mask] @@ -1429,6 +1445,7 @@ def tail(self, n=5): 0 a 1 2 b 1 """ + self._reset_group_selection() mask = self._cumcount_array(ascending=False) < n return self._selected_obj[mask] diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 10362cbb24888..d6d601f03d561 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -354,6 +354,35 @@ def test_nth_multi_index_as_expected(self): names=['A', 'B'])) assert_frame_equal(result, expected) + def test_group_selection_cache(self): + # GH 12839 nth, head, and tail should return same result consistently + df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) + expected = df.iloc[[0, 2]].set_index('A') + + g = df.groupby('A') + result1 = g.head(n=2) + result2 = g.nth(0) + assert_frame_equal(result1, df) + assert_frame_equal(result2, expected) + + g = df.groupby('A') + result1 = g.tail(n=2) + result2 = g.nth(0) + assert_frame_equal(result1, df) + assert_frame_equal(result2, expected) + + g = df.groupby('A') + result1 = g.nth(0) + result2 = g.head(n=2) + assert_frame_equal(result1, expected) + assert_frame_equal(result2, df) + + g = df.groupby('A') + result1 = g.nth(0) + result2 = g.tail(n=2) + assert_frame_equal(result1, expected) + assert_frame_equal(result2, df) + def test_grouper_index_types(self): # related GH5375 # groupby misbehaving when using a Floatlike index @@ -6116,7 +6145,7 @@ def test_cython_transform(self): # bit a of hack to make sure the cythonized shift # is equivalent to pre 0.17.1 behavior if op == 'shift': - gb._set_selection_from_grouper() + gb._set_group_selection() for (op, args), targop in ops: if op != 'shift' and 'int' not in gb_target: From 2655daef1b7346feabd00d4d40910a80386d0812 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 7 Jul 2016 03:26:12 -0400 Subject: [PATCH 03/44] In gbq, use googleapiclient instead of apiclient #13454 (#13458) closes #13454 --- doc/source/whatsnew/v0.18.2.txt | 2 ++ pandas/io/gbq.py | 38 ++++++++++++++++++++++++++------- pandas/io/tests/test_gbq.py | 19 +++++++++++++++-- 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index b9afa7fcb7959..64644bd9a7a26 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -528,3 +528,5 @@ Bug Fixes - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) - Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) + +- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py index e706434f29dc5..140f5cc6bb6e3 100644 --- a/pandas/io/gbq.py +++ b/pandas/io/gbq.py @@ -46,8 +46,12 @@ def _test_google_api_imports(): try: import httplib2 # noqa - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa + try: + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa + except: + from apiclient.discovery import build # noqa + from apiclient.errors import HttpError # noqa from oauth2client.client import AccessTokenRefreshError # noqa from oauth2client.client import OAuth2WebServerFlow # noqa from oauth2client.file import Storage # noqa @@ -266,7 +270,10 @@ def sizeof_fmt(num, suffix='b'): def get_service(self): import httplib2 - from apiclient.discovery import build + try: + from googleapiclient.discovery import build + except: + from apiclient.discovery import build http = httplib2.Http() http = self.credentials.authorize(http) @@ -315,7 +322,10 @@ def process_insert_errors(self, insert_errors): raise StreamingInsertError def run_query(self, query): - from apiclient.errors import HttpError + try: + from googleapiclient.errors import HttpError + except: + from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError _check_google_client_version() @@ -420,7 +430,10 @@ def run_query(self, query): return schema, result_pages def load_data(self, dataframe, dataset_id, table_id, chunksize): - from apiclient.errors import HttpError + try: + from googleapiclient.errors import HttpError + except: + from apiclient.errors import HttpError job_id = uuid.uuid4().hex rows = [] @@ -474,7 +487,10 @@ def load_data(self, dataframe, dataset_id, table_id, chunksize): self._print("\n") def verify_schema(self, dataset_id, table_id, schema): - from apiclient.errors import HttpError + try: + from googleapiclient.errors import HttpError + except: + from apiclient.errors import HttpError try: return (self.service.tables().get( @@ -765,7 +781,10 @@ class _Table(GbqConnector): def __init__(self, project_id, dataset_id, reauth=False, verbose=False, private_key=None): - from apiclient.errors import HttpError + try: + from googleapiclient.errors import HttpError + except: + from apiclient.errors import HttpError self.http_error = HttpError self.dataset_id = dataset_id super(_Table, self).__init__(project_id, reauth, verbose, private_key) @@ -865,7 +884,10 @@ class _Dataset(GbqConnector): def __init__(self, project_id, reauth=False, verbose=False, private_key=None): - from apiclient.errors import HttpError + try: + from googleapiclient.errors import HttpError + except: + from apiclient.errors import HttpError self.http_error = HttpError super(_Dataset, self).__init__(project_id, reauth, verbose, private_key) diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py index 5cb681f4d2e7d..278c5d7215624 100644 --- a/pandas/io/tests/test_gbq.py +++ b/pandas/io/tests/test_gbq.py @@ -73,8 +73,12 @@ def _test_imports(): if _SETUPTOOLS_INSTALLED: try: - from apiclient.discovery import build # noqa - from apiclient.errors import HttpError # noqa + try: + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa + except: + from apiclient.discovery import build # noqa + from apiclient.errors import HttpError # noqa from oauth2client.client import OAuth2WebServerFlow # noqa from oauth2client.client import AccessTokenRefreshError # noqa @@ -280,6 +284,17 @@ class GBQUnitTests(tm.TestCase): def setUp(self): test_requirements() + def test_import_google_api_python_client(self): + if compat.PY2: + with tm.assertRaises(ImportError): + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa + from apiclient.discovery import build # noqa + from apiclient.errors import HttpError # noqa + else: + from googleapiclient.discovery import build # noqa + from googleapiclient.errors import HttpError # noqa + def test_should_return_bigquery_integers_as_python_floats(self): result = gbq._parse_entry(1, 'INTEGER') tm.assert_equal(result, float(1)) From f11b9c1eef4bb161a35a1a5695aebb934f7c8b96 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 8 Jul 2016 17:08:02 +0200 Subject: [PATCH 04/44] RLS: switch master from 0.18.2 to 0.19.0 (#13586) --- doc/source/categorical.rst | 2 +- doc/source/merging.rst | 2 +- doc/source/text.rst | 2 +- doc/source/whatsnew.rst | 2 +- doc/source/whatsnew/v0.18.2.txt | 532 -------------------------------- doc/source/whatsnew/v0.19.0.txt | 485 +++++++++++++++++++++++++++-- doc/source/whatsnew/v0.20.0.txt | 83 +++++ pandas/computation/ops.py | 2 +- pandas/core/base.py | 4 +- pandas/core/categorical.py | 2 +- pandas/core/generic.py | 2 +- pandas/indexes/base.py | 6 +- pandas/indexes/category.py | 2 +- pandas/io/html.py | 2 +- pandas/io/pytables.py | 2 +- pandas/tools/merge.py | 4 +- pandas/tseries/base.py | 2 +- pandas/tseries/index.py | 2 +- pandas/tseries/offsets.py | 4 +- pandas/tslib.pyx | 4 +- pandas/types/concat.py | 2 +- 21 files changed, 574 insertions(+), 574 deletions(-) delete mode 100644 doc/source/whatsnew/v0.18.2.txt create mode 100644 doc/source/whatsnew/v0.20.0.txt diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index e971f1f28903f..f0e01ddc3fc2d 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -653,7 +653,7 @@ The same applies to ``df.append(df_different)``. Unioning ~~~~~~~~ -.. versionadded:: 0.18.2 +.. versionadded:: 0.19.0 If you want to combine categoricals that do not necessarily have the same categories, the `union_categorical` function will diff --git a/doc/source/merging.rst b/doc/source/merging.rst index b69d0d8ba3015..f14e5741c6e2e 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1133,7 +1133,7 @@ fill/interpolate missing data: Merging AsOf ~~~~~~~~~~~~ -.. versionadded:: 0.18.2 +.. versionadded:: 0.19.0 A :func:`merge_asof` is similar to an ordered left-join except that we match on nearest key rather than equal keys. For each row in the ``left`` DataFrame, we select the last row in the ``right`` DataFrame whose ``on`` key is less than the left's key. Both DataFrames must be sorted by the key. diff --git a/doc/source/text.rst b/doc/source/text.rst index 3822c713d7f85..3a4a57ff4da95 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -316,7 +316,7 @@ then ``extractall(pat).xs(0, level='match')`` gives the same result as ``Index`` also supports ``.str.extractall``. It returns a ``DataFrame`` which has the same result as a ``Series.str.extractall`` with a default index (starts from 0). -.. versionadded:: 0.18.2 +.. versionadded:: 0.19.0 .. ipython:: python diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index 685f1d2086c69..77dc249aeb788 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,7 +18,7 @@ What's New These are new features and improvements of note in each release. -.. include:: whatsnew/v0.18.2.txt +.. include:: whatsnew/v0.19.0.txt .. include:: whatsnew/v0.18.1.txt diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt deleted file mode 100644 index 64644bd9a7a26..0000000000000 --- a/doc/source/whatsnew/v0.18.2.txt +++ /dev/null @@ -1,532 +0,0 @@ -.. _whatsnew_0182: - -v0.18.2 (July ??, 2016) ------------------------ - -This is a minor bug-fix release from 0.18.1 and includes a large number of -bug fixes along with several new features, enhancements, and performance improvements. -We recommend that all users upgrade to this version. - -Highlights include: - -- :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` - -.. contents:: What's new in v0.18.2 - :local: - :backlinks: none - -.. _whatsnew_0182.new_features: - -New features -~~~~~~~~~~~~ - -.. _whatsnew_0182.enhancements.asof_merge: - -:func:`merge_asof` for asof-style time-series joining -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A long-time requested feature has been added through the :func:`merge_asof` function, to -support asof style joining of time-series. (:issue:`1870`). Full documentation is -:ref:`here ` - -The :func:`merge_asof` performs an asof merge, which is similar to a left-join -except that we match on nearest key rather than equal keys. - -.. ipython:: python - - left = pd.DataFrame({'a': [1, 5, 10], - 'left_val': ['a', 'b', 'c']}) - right = pd.DataFrame({'a': [1, 2, 3, 6, 7], - 'right_val': [1, 2, 3, 6, 7]}) - - left - right - -We typically want to match exactly when possible, and use the most -recent value otherwise. - -.. ipython:: python - - pd.merge_asof(left, right, on='a') - -We can also match rows ONLY with prior data, and not an exact match. - -.. ipython:: python - - pd.merge_asof(left, right, on='a', allow_exact_matches=False) - - -In a typical time-series example, we have ``trades`` and ``quotes`` and we want to ``asof-join`` them. -This also illustrates using the ``by`` parameter to group data before merging. - -.. ipython:: python - - trades = pd.DataFrame({ - 'time': pd.to_datetime(['20160525 13:30:00.023', - '20160525 13:30:00.038', - '20160525 13:30:00.048', - '20160525 13:30:00.048', - '20160525 13:30:00.048']), - 'ticker': ['MSFT', 'MSFT', - 'GOOG', 'GOOG', 'AAPL'], - 'price': [51.95, 51.95, - 720.77, 720.92, 98.00], - 'quantity': [75, 155, - 100, 100, 100]}, - columns=['time', 'ticker', 'price', 'quantity']) - - quotes = pd.DataFrame({ - 'time': pd.to_datetime(['20160525 13:30:00.023', - '20160525 13:30:00.023', - '20160525 13:30:00.030', - '20160525 13:30:00.041', - '20160525 13:30:00.048', - '20160525 13:30:00.049', - '20160525 13:30:00.072', - '20160525 13:30:00.075']), - 'ticker': ['GOOG', 'MSFT', 'MSFT', - 'MSFT', 'GOOG', 'AAPL', 'GOOG', - 'MSFT'], - 'bid': [720.50, 51.95, 51.97, 51.99, - 720.50, 97.99, 720.50, 52.01], - 'ask': [720.93, 51.96, 51.98, 52.00, - 720.93, 98.01, 720.88, 52.03]}, - columns=['time', 'ticker', 'bid', 'ask']) - -.. ipython:: python - - trades - quotes - -An asof merge joins on the ``on``, typically a datetimelike field, which is ordered, and -in this case we are using a grouper in the ``by`` field. This is like a left-outer join, except -that forward filling happens automatically taking the most recent non-NaN value. - -.. ipython:: python - - pd.merge_asof(trades, quotes, - on='time', - by='ticker') - -This returns a merged DataFrame with the entries in the same order as the original left -passed DataFrame (``trades`` in this case), with the fields of the ``quotes`` merged. - -.. _whatsnew_0182.enhancements.read_csv_dupe_col_names_support: - -:func:`read_csv` has improved support for duplicate column names -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -:ref:`Duplicate column names ` are now supported in :func:`read_csv` whether -they are in the file or passed in as the ``names`` parameter (:issue:`7160`, :issue:`9424`) - -.. ipython :: python - - data = '0,1,2\n3,4,5' - names = ['a', 'b', 'a'] - -Previous behaviour: - -.. code-block:: ipython - - In [2]: pd.read_csv(StringIO(data), names=names) - Out[2]: - a b a - 0 2 1 2 - 1 5 4 5 - -The first 'a' column contains the same data as the second 'a' column, when it should have -contained the array ``[0, 3]``. - -New behaviour: - -.. ipython :: python - - In [2]: pd.read_csv(StringIO(data), names=names) - -.. _whatsnew_0182.enhancements.semi_month_offsets: - -Semi-Month Offsets -^^^^^^^^^^^^^^^^^^ - -Pandas has gained new frequency offsets, ``SemiMonthEnd`` ('SM') and ``SemiMonthBegin`` ('SMS'). -These provide date offsets anchored (by default) to the 15th and end of month, and 15th and 1st of month respectively. -(:issue:`1543`) - -.. ipython:: python - - from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin - -SemiMonthEnd: - -.. ipython:: python - - Timestamp('2016-01-01') + SemiMonthEnd() - - pd.date_range('2015-01-01', freq='SM', periods=4) - -SemiMonthBegin: - -.. ipython:: python - - Timestamp('2016-01-01') + SemiMonthBegin() - - pd.date_range('2015-01-01', freq='SMS', periods=4) - -Using the anchoring suffix, you can also specify the day of month to use instead of the 15th. - -.. ipython:: python - - pd.date_range('2015-01-01', freq='SMS-16', periods=4) - - pd.date_range('2015-01-01', freq='SM-14', periods=4) - -.. _whatsnew_0182.enhancements.other: - -Other enhancements -^^^^^^^^^^^^^^^^^^ - -- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`) - -- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see :ref:`documentation here ` (:issue:`10008`, :issue:`13156`) -- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) - - .. ipython:: python - - idx = pd.Index(["a1a2", "b1", "c1"]) - idx.str.extractall("[ab](?P\d)") - -- ``Timestamp`` s can now accept positional and keyword parameters like :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) - - .. ipython:: python - - pd.Timestamp(2012, 1, 1) - - pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) - -- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`) -- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`) -- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`) - -- ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) -- ``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`) - - .. ipython:: python - - idx = pd.Index(['a', 'b', 'c']) - idx.where([True, False, True]) - -- ``Categorical.astype()`` now accepts an optional boolean argument ``copy``, effective when dtype is categorical (:issue:`13209`) -- ``DataFrame`` has gained the ``.asof()`` method to return the last non-NaN values according to the selected subset (:issue:`13358`) -- Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) -- The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) -- ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) -- A ``union_categorical`` function has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) -- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) -- ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) - -.. _whatsnew_0182.api: - -API changes -~~~~~~~~~~~ - - -- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) -- An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`) -- Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`) -- ``Styler.apply`` is now more strict about the outputs your function must return. For ``axis=0`` or ``axis=1``, the output shape must be identical. For ``axis=None``, the output must be a DataFrame with identical columns and index labels. (:issue:`13222`) - -.. _whatsnew_0182.api.tolist: - -``Series.tolist()`` will now return Python types -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``Series.tolist()`` will now return Python types in the output, mimicking NumPy ``.tolist()`` behaviour (:issue:`10904`) - - -.. ipython:: python - - s = pd.Series([1,2,3]) - type(s.tolist()[0]) - -Previous Behavior: - -.. code-block:: ipython - - In [7]: type(s.tolist()[0]) - Out[7]: - - -New Behavior: - -.. ipython:: python - - type(s.tolist()[0]) - -.. _whatsnew_0182.api.promote: - -``Series`` type promotion on assignment -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A ``Series`` will now correctly promote its dtype for assignment with incompat values to the current dtype (:issue:`13234`) - - -.. ipython:: python - - s = pd.Series() - -Previous Behavior: - -.. code-block:: ipython - - In [2]: s["a"] = pd.Timestamp("2016-01-01") - - In [3]: s["b"] = 3.0 - TypeError: invalid type promotion - -New Behavior: - -.. ipython:: python - - s["a"] = pd.Timestamp("2016-01-01") - s["b"] = 3.0 - s - s.dtype - -.. _whatsnew_0182.api.to_datetime_coerce: - -``.to_datetime()`` when coercing -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A bug is fixed in ``.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`). -Previously if ``.to_datetime()`` encountered mixed integers/floats and strings, but no datetimes with ``errors='coerce'`` it would convert all to ``NaT``. - -Previous Behavior: - -.. code-block:: ipython - - In [2]: pd.to_datetime([1, 'foo'], errors='coerce') - Out[2]: DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns]', freq=None) - -This will now convert integers/floats with the default unit of ``ns``. - -.. ipython:: python - - pd.to_datetime([1, 'foo'], errors='coerce') - -.. _whatsnew_0182.api.merging: - -Merging changes -^^^^^^^^^^^^^^^ - -Merging will now preserve the dtype of the join keys (:issue:`8596`) - -.. ipython:: python - - df1 = pd.DataFrame({'key': [1], 'v1': [10]}) - df1 - df2 = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) - df2 - -Previous Behavior: - -.. code-block:: ipython - - In [5]: pd.merge(df1, df2, how='outer') - Out[5]: - key v1 - 0 1.0 10.0 - 1 1.0 20.0 - 2 2.0 30.0 - - In [6]: pd.merge(df1, df2, how='outer').dtypes - Out[6]: - key float64 - v1 float64 - dtype: object - -New Behavior: - -We are able to preserve the join keys - -.. ipython:: python - - pd.merge(df1, df2, how='outer') - pd.merge(df1, df2, how='outer').dtypes - -Of course if you have missing values that are introduced, then the -resulting dtype will be upcast (unchanged from previous). - -.. ipython:: python - - pd.merge(df1, df2, how='outer', on='key') - pd.merge(df1, df2, how='outer', on='key').dtypes - -.. _whatsnew_0182.describe: - -``.describe()`` changes -^^^^^^^^^^^^^^^^^^^^^^^ - -Percentile identifiers in the index of a ``.describe()`` output will now be rounded to the least precision that keeps them distinct (:issue:`13104`) - -.. ipython:: python - - s = pd.Series([0, 1, 2, 3, 4]) - df = pd.DataFrame([0, 1, 2, 3, 4]) - -Previous Behavior: - -The percentiles were rounded to at most one decimal place, which could raise ``ValueError`` for a data frame if the percentiles were duplicated. - -.. code-block:: ipython - - In [3]: s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) - Out[3]: - count 5.000000 - mean 2.000000 - std 1.581139 - min 0.000000 - 0.0% 0.000400 - 0.1% 0.002000 - 0.1% 0.004000 - 50% 2.000000 - 99.9% 3.996000 - 100.0% 3.998000 - 100.0% 3.999600 - max 4.000000 - dtype: float64 - - In [4]: df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) - Out[4]: - ... - ValueError: cannot reindex from a duplicate axis - -New Behavior: - -.. ipython:: python - - s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) - df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) - -Furthermore: - -- Passing duplicated ``percentiles`` will now raise a ``ValueError``. -- Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) - -.. _whatsnew_0182.api.other: - -Other API changes -^^^^^^^^^^^^^^^^^ - -- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) -- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) -- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) -- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) -- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) - -.. _whatsnew_0182.deprecations: - -Deprecations -^^^^^^^^^^^^ - -- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`) -- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) -- ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`) -- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) - -.. _whatsnew_0182.performance: - -Performance Improvements -~~~~~~~~~~~~~~~~~~~~~~~~ - -- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`) -- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`) -- increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`) - -- Improved performance of float64 hash table operations, fixing some very slow indexing and groupby operations in python 3 (:issue:`13166`, :issue:`13334`) -- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`) - - -.. _whatsnew_0182.bug_fixes: - -Bug Fixes -~~~~~~~~~ - -- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`) -- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) -- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) -- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) -- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`) -- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) -- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) -- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) - - -- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`) -- Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`) - -- Bug in calling ``.memory_usage()`` on object which doesn't implement (:issue:`12924`) - -- Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()`` ); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`) - -- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`) - -- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`) -- Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`) - - -- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`) -- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) -- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) -- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` appropriately when empty (:issue:`13212`) -- Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) -- Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue:`13306`) -- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) -- Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) - -- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) -- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) -- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) - -- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) -- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) -- Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`) - - -- Bug in ``pd.read_csv()`` with ``engine='python'`` in which ``NaN`` values weren't being detected after data was converted to numeric values (:issue:`13314`) -- Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) -- Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) -- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) -- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3 (:issue:`13398`) -- Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) -- Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) -- Bug in ``pd.read_csv()`` with ``engine=='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) -- Bug in ``pd.read_csv()`` with ``engine=='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`) -- Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`) - - - -- Bug in ``Series`` arithmetic raises ``TypeError`` if it contains datetime-like as ``object`` dtype (:issue:`13043`) - - -- Bug in ``pd.to_datetime()`` when passing invalid datatypes (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) -- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, `int16`` dtypes (:issue:`13451`) -- Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) - -- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) -- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) -- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) -- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) -- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`) - - -- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) -- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) -- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`) - -- Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) - - -- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) -- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) - -- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 42db0388ca5d9..70d54ea0d364d 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1,7 +1,7 @@ .. _whatsnew_0190: -v0.19.0 (????, 2016) --------------------- +v0.19.0 (August ??, 2016) +------------------------- This is a major release from 0.18.2 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -9,75 +9,524 @@ users upgrade to this version. Highlights include: +- :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` -Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. - -.. contents:: What's new in v0.19.0 +.. contents:: What's new in v0.18.2 :local: :backlinks: none -.. _whatsnew_0190.enhancements: +.. _whatsnew_0190.new_features: New features ~~~~~~~~~~~~ +.. _whatsnew_0190.enhancements.asof_merge: + +:func:`merge_asof` for asof-style time-series joining +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A long-time requested feature has been added through the :func:`merge_asof` function, to +support asof style joining of time-series. (:issue:`1870`). Full documentation is +:ref:`here ` + +The :func:`merge_asof` performs an asof merge, which is similar to a left-join +except that we match on nearest key rather than equal keys. + +.. ipython:: python + + left = pd.DataFrame({'a': [1, 5, 10], + 'left_val': ['a', 'b', 'c']}) + right = pd.DataFrame({'a': [1, 2, 3, 6, 7], + 'right_val': [1, 2, 3, 6, 7]}) + + left + right + +We typically want to match exactly when possible, and use the most +recent value otherwise. + +.. ipython:: python + + pd.merge_asof(left, right, on='a') + +We can also match rows ONLY with prior data, and not an exact match. + +.. ipython:: python + + pd.merge_asof(left, right, on='a', allow_exact_matches=False) + + +In a typical time-series example, we have ``trades`` and ``quotes`` and we want to ``asof-join`` them. +This also illustrates using the ``by`` parameter to group data before merging. + +.. ipython:: python + + trades = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', + 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100]}, + columns=['time', 'ticker', 'price', 'quantity']) + + quotes = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.023', + '20160525 13:30:00.030', + '20160525 13:30:00.041', + '20160525 13:30:00.048', + '20160525 13:30:00.049', + '20160525 13:30:00.072', + '20160525 13:30:00.075']), + 'ticker': ['GOOG', 'MSFT', 'MSFT', + 'MSFT', 'GOOG', 'AAPL', 'GOOG', + 'MSFT'], + 'bid': [720.50, 51.95, 51.97, 51.99, + 720.50, 97.99, 720.50, 52.01], + 'ask': [720.93, 51.96, 51.98, 52.00, + 720.93, 98.01, 720.88, 52.03]}, + columns=['time', 'ticker', 'bid', 'ask']) + +.. ipython:: python + + trades + quotes + +An asof merge joins on the ``on``, typically a datetimelike field, which is ordered, and +in this case we are using a grouper in the ``by`` field. This is like a left-outer join, except +that forward filling happens automatically taking the most recent non-NaN value. + +.. ipython:: python + + pd.merge_asof(trades, quotes, + on='time', + by='ticker') + +This returns a merged DataFrame with the entries in the same order as the original left +passed DataFrame (``trades`` in this case), with the fields of the ``quotes`` merged. + +.. _whatsnew_0190.enhancements.read_csv_dupe_col_names_support: + +:func:`read_csv` has improved support for duplicate column names +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`Duplicate column names ` are now supported in :func:`read_csv` whether +they are in the file or passed in as the ``names`` parameter (:issue:`7160`, :issue:`9424`) + +.. ipython :: python + + data = '0,1,2\n3,4,5' + names = ['a', 'b', 'a'] + +Previous behaviour: + +.. code-block:: ipython + + In [2]: pd.read_csv(StringIO(data), names=names) + Out[2]: + a b a + 0 2 1 2 + 1 5 4 5 + +The first 'a' column contains the same data as the second 'a' column, when it should have +contained the array ``[0, 3]``. + +New behaviour: + +.. ipython :: python + + In [2]: pd.read_csv(StringIO(data), names=names) + +.. _whatsnew_0190.enhancements.semi_month_offsets: +Semi-Month Offsets +^^^^^^^^^^^^^^^^^^ + +Pandas has gained new frequency offsets, ``SemiMonthEnd`` ('SM') and ``SemiMonthBegin`` ('SMS'). +These provide date offsets anchored (by default) to the 15th and end of month, and 15th and 1st of month respectively. +(:issue:`1543`) + +.. ipython:: python + + from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin + +SemiMonthEnd: + +.. ipython:: python + + Timestamp('2016-01-01') + SemiMonthEnd() + + pd.date_range('2015-01-01', freq='SM', periods=4) + +SemiMonthBegin: + +.. ipython:: python + Timestamp('2016-01-01') + SemiMonthBegin() + pd.date_range('2015-01-01', freq='SMS', periods=4) + +Using the anchoring suffix, you can also specify the day of month to use instead of the 15th. + +.. ipython:: python + + pd.date_range('2015-01-01', freq='SMS-16', periods=4) + + pd.date_range('2015-01-01', freq='SM-14', periods=4) .. _whatsnew_0190.enhancements.other: Other enhancements ^^^^^^^^^^^^^^^^^^ +- The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`) + +- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see :ref:`documentation here ` (:issue:`10008`, :issue:`13156`) +- ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) + + .. ipython:: python + + idx = pd.Index(["a1a2", "b1", "c1"]) + idx.str.extractall("[ab](?P\d)") + +- ``Timestamp`` s can now accept positional and keyword parameters like :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) + + .. ipython:: python + pd.Timestamp(2012, 1, 1) + pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) +- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``decimal`` option (:issue:`12933`) +- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``na_filter`` option (:issue:`13321`) +- The ``pd.read_csv()`` with ``engine='python'`` has gained support for the ``memory_map`` option (:issue:`13381`) +- ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) +- ``Index`` now supports the ``.where()`` function for same shape indexing (:issue:`13170`) -.. _whatsnew_0190.api_breaking: + .. ipython:: python -Backwards incompatible API changes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + idx = pd.Index(['a', 'b', 'c']) + idx.where([True, False, True]) + +- ``Categorical.astype()`` now accepts an optional boolean argument ``copy``, effective when dtype is categorical (:issue:`13209`) +- ``DataFrame`` has gained the ``.asof()`` method to return the last non-NaN values according to the selected subset (:issue:`13358`) +- Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) +- The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) +- ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) +- A ``union_categorical`` function has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) +- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) +- ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) .. _whatsnew_0190.api: +API changes +~~~~~~~~~~~ +- Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) +- An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`) +- Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`) +- ``Styler.apply`` is now more strict about the outputs your function must return. For ``axis=0`` or ``axis=1``, the output shape must be identical. For ``axis=None``, the output must be a DataFrame with identical columns and index labels. (:issue:`13222`) +.. _whatsnew_0190.api.tolist: +``Series.tolist()`` will now return Python types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Other API Changes -^^^^^^^^^^^^^^^^^ +``Series.tolist()`` will now return Python types in the output, mimicking NumPy ``.tolist()`` behaviour (:issue:`10904`) -.. _whatsnew_0190.deprecations: -Deprecations -^^^^^^^^^^^^ +.. ipython:: python + + s = pd.Series([1,2,3]) + type(s.tolist()[0]) + +Previous Behavior: + +.. code-block:: ipython + + In [7]: type(s.tolist()[0]) + Out[7]: + + +New Behavior: + +.. ipython:: python + + type(s.tolist()[0]) + +.. _whatsnew_0190.api.promote: + +``Series`` type promotion on assignment +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A ``Series`` will now correctly promote its dtype for assignment with incompat values to the current dtype (:issue:`13234`) + + +.. ipython:: python + + s = pd.Series() +Previous Behavior: +.. code-block:: ipython + In [2]: s["a"] = pd.Timestamp("2016-01-01") + In [3]: s["b"] = 3.0 + TypeError: invalid type promotion -.. _whatsnew_0190.prior_deprecations: +New Behavior: -Removal of prior version deprecations/changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. ipython:: python + s["a"] = pd.Timestamp("2016-01-01") + s["b"] = 3.0 + s + s.dtype +.. _whatsnew_0190.api.to_datetime_coerce: +``.to_datetime()`` when coercing +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +A bug is fixed in ``.to_datetime()`` when passing integers or floats, and no ``unit`` and ``errors='coerce'`` (:issue:`13180`). +Previously if ``.to_datetime()`` encountered mixed integers/floats and strings, but no datetimes with ``errors='coerce'`` it would convert all to ``NaT``. + +Previous Behavior: + +.. code-block:: ipython + + In [2]: pd.to_datetime([1, 'foo'], errors='coerce') + Out[2]: DatetimeIndex(['NaT', 'NaT'], dtype='datetime64[ns]', freq=None) + +This will now convert integers/floats with the default unit of ``ns``. + +.. ipython:: python + + pd.to_datetime([1, 'foo'], errors='coerce') + +.. _whatsnew_0190.api.merging: + +Merging changes +^^^^^^^^^^^^^^^ + +Merging will now preserve the dtype of the join keys (:issue:`8596`) + +.. ipython:: python + + df1 = pd.DataFrame({'key': [1], 'v1': [10]}) + df1 + df2 = pd.DataFrame({'key': [1, 2], 'v1': [20, 30]}) + df2 + +Previous Behavior: + +.. code-block:: ipython + + In [5]: pd.merge(df1, df2, how='outer') + Out[5]: + key v1 + 0 1.0 10.0 + 1 1.0 20.0 + 2 2.0 30.0 + + In [6]: pd.merge(df1, df2, how='outer').dtypes + Out[6]: + key float64 + v1 float64 + dtype: object + +New Behavior: + +We are able to preserve the join keys + +.. ipython:: python + + pd.merge(df1, df2, how='outer') + pd.merge(df1, df2, how='outer').dtypes + +Of course if you have missing values that are introduced, then the +resulting dtype will be upcast (unchanged from previous). + +.. ipython:: python + + pd.merge(df1, df2, how='outer', on='key') + pd.merge(df1, df2, how='outer', on='key').dtypes + +.. _whatsnew_0190.describe: + +``.describe()`` changes +^^^^^^^^^^^^^^^^^^^^^^^ + +Percentile identifiers in the index of a ``.describe()`` output will now be rounded to the least precision that keeps them distinct (:issue:`13104`) + +.. ipython:: python + + s = pd.Series([0, 1, 2, 3, 4]) + df = pd.DataFrame([0, 1, 2, 3, 4]) + +Previous Behavior: + +The percentiles were rounded to at most one decimal place, which could raise ``ValueError`` for a data frame if the percentiles were duplicated. + +.. code-block:: ipython + + In [3]: s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + Out[3]: + count 5.000000 + mean 2.000000 + std 1.581139 + min 0.000000 + 0.0% 0.000400 + 0.1% 0.002000 + 0.1% 0.004000 + 50% 2.000000 + 99.9% 3.996000 + 100.0% 3.998000 + 100.0% 3.999600 + max 4.000000 + dtype: float64 + + In [4]: df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + Out[4]: + ... + ValueError: cannot reindex from a duplicate axis + +New Behavior: + +.. ipython:: python + + s.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + df.describe(percentiles=[0.0001, 0.0005, 0.001, 0.999, 0.9995, 0.9999]) + +Furthermore: + +- Passing duplicated ``percentiles`` will now raise a ``ValueError``. +- Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) + +.. _whatsnew_0190.api.other: + +Other API changes +^^^^^^^^^^^^^^^^^ + +- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) +- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) +- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) +- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) + +.. _whatsnew_0190.deprecations: + +Deprecations +^^^^^^^^^^^^ + +- ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`) +- ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) +- ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`) +- top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) .. _whatsnew_0190.performance: Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Improved performance of sparse ``IntIndex.intersect`` (:issue:`13082`) +- Improved performance of sparse arithmetic with ``BlockIndex`` when the number of blocks are large, though recommended to use ``IntIndex`` in such cases (:issue:`13082`) +- increased performance of ``DataFrame.quantile()`` as it now operates per-block (:issue:`11623`) - +- Improved performance of float64 hash table operations, fixing some very slow indexing and groupby operations in python 3 (:issue:`13166`, :issue:`13334`) +- Improved performance of ``DataFrameGroupBy.transform`` (:issue:`12737`) .. _whatsnew_0190.bug_fixes: Bug Fixes ~~~~~~~~~ + +- Bug in ``io.json.json_normalize()``, where non-ascii keys raised an exception (:issue:`13213`) +- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`) +- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) +- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) +- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`) +- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) +- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) +- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) + + +- Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`) +- Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`) + +- Bug in calling ``.memory_usage()`` on object which doesn't implement (:issue:`12924`) + +- Regression in ``Series.quantile`` with nans (also shows up in ``.median()`` and ``.describe()`` ); furthermore now names the ``Series`` with the quantile (:issue:`13098`, :issue:`13146`) + +- Bug in ``SeriesGroupBy.transform`` with datetime values and missing groups (:issue:`13191`) + +- Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`) +- Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`) + + +- Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`) +- Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) +- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` appropriately when empty (:issue:`13212`) +- Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) +- Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue:`13306`) +- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) +- Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) + +- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) +- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) +- Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) + +- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`) +- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`) +- Bug in ``.str.replace`` does not raise ``TypeError`` for invalid replacement (:issue:`13438`) + + +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which ``NaN`` values weren't being detected after data was converted to numeric values (:issue:`13314`) +- Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3 (:issue:`13398`) +- Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) +- Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) +- Bug in ``pd.read_csv()`` with ``engine=='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) +- Bug in ``pd.read_csv()`` with ``engine=='c'`` in which fields were not properly cast to float when quoting was specified as non-numeric (:issue:`13411`) +- Bug in ``pd.pivot_table()`` where ``margins_name`` is ignored when ``aggfunc`` is a list (:issue:`13354`) + + + +- Bug in ``Series`` arithmetic raises ``TypeError`` if it contains datetime-like as ``object`` dtype (:issue:`13043`) + + +- Bug in ``pd.to_datetime()`` when passing invalid datatypes (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) +- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, `int16`` dtypes (:issue:`13451`) +- Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) + +- Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) +- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) +- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) +- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) +- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`) + + +- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) +- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) +- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`) + +- Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) + + +- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) +- Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) + +- Bug where ``pd.read_gbq()`` could throw ``ImportError: No module named discovery`` as a result of a naming conflict with another python package called apiclient (:issue:`13454`) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt new file mode 100644 index 0000000000000..695e917c76ba0 --- /dev/null +++ b/doc/source/whatsnew/v0.20.0.txt @@ -0,0 +1,83 @@ +.. _whatsnew_0200: + +v0.20.0 (????, 2016) +-------------------- + +This is a major release from 0.19 and includes a small number of API changes, several new features, +enhancements, and performance improvements along with a large number of bug fixes. We recommend that all +users upgrade to this version. + +Highlights include: + + +Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. + +.. contents:: What's new in v0.19.0 + :local: + :backlinks: none + +.. _whatsnew_0200.enhancements: + +New features +~~~~~~~~~~~~ + + + + + +.. _whatsnew_0200.enhancements.other: + +Other enhancements +^^^^^^^^^^^^^^^^^^ + + + + + + +.. _whatsnew_0200.api_breaking: + +Backwards incompatible API changes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. _whatsnew_0200.api: + + + + + + +Other API Changes +^^^^^^^^^^^^^^^^^ + +.. _whatsnew_0200.deprecations: + +Deprecations +^^^^^^^^^^^^ + + + + + +.. _whatsnew_0200.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + + + + +.. _whatsnew_0200.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + + + + +.. _whatsnew_0200.bug_fixes: + +Bug Fixes +~~~~~~~~~ diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index bf6fa35cf255f..7a0743f6b2778 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -286,7 +286,7 @@ def _cast_inplace(terms, acceptable_dtypes, dtype): acceptable_dtypes : list of acceptable numpy.dtype Will not cast if term's dtype in this list. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 dtype : str or numpy.dtype The dtype to cast to. diff --git a/pandas/core/base.py b/pandas/core/base.py index 96732a7140f9e..13a6b4b7b4ce0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1001,7 +1001,7 @@ def is_monotonic(self): Return boolean if values in the object are monotonic_increasing - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Returns ------- @@ -1017,7 +1017,7 @@ def is_monotonic_decreasing(self): Return boolean if values in the object are monotonic_decreasing - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Returns ------- diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 6dba41a746e19..f4aeaf9184d09 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -348,7 +348,7 @@ def astype(self, dtype, copy=True): If copy is set to False and dtype is categorical, the original object is returned. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 """ if is_categorical_dtype(dtype): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cc5c45158bf4f..7b271df4085cc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3642,7 +3642,7 @@ def asof(self, where, subset=None): The last row without any NaN is taken (or the last row without NaN considering only the subset of columns in the case of a DataFrame) - .. versionadded:: 0.18.2 For DataFrame + .. versionadded:: 0.19.0 For DataFrame If there is no good value, NaN is returned. diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 96472698ba9d9..ad27010714f63 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -378,7 +378,7 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): def _deepcopy_if_needed(self, orig, copy=False): """ - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Make a copy of self if data coincides (in memory) with orig. Subclasses should override this if self._base is not an ndarray. @@ -494,7 +494,7 @@ def repeat(self, n, *args, **kwargs): def where(self, cond, other=None): """ - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Return an Index of same shape as self and whose corresponding entries are from self where cond is True and otherwise are from @@ -813,7 +813,7 @@ def _to_embed(self, keep_tz=False): satisfied, the original data is used to create a new Index or the original Index is returned. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 """ diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 3b7c660f5faa1..84b8926f4177f 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -313,7 +313,7 @@ def _can_reindex(self, indexer): def where(self, cond, other=None): """ - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Return an Index of same shape as self and whose corresponding entries are from self where cond is True and otherwise are from diff --git a/pandas/io/html.py b/pandas/io/html.py index 48caaa39dd711..609642e248eda 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -837,7 +837,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None, Character to recognize as decimal point (e.g. use ',' for European data). - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Returns ------- diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index cbe04349b5105..d4ca717ddbc4e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -276,7 +276,7 @@ def read_hdf(path_or_buf, key=None, **kwargs): path_or_buf : path (string), buffer, or path object (pathlib.Path or py._path.local.LocalPath) to read from - .. versionadded:: 0.18.2 support for pathlib, py.path. + .. versionadded:: 0.19.0 support for pathlib, py.path. key : group identifier in the store. Can be omitted a HDF file contains a single pandas object. diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 4b7162398738e..d65dfc3254465 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -182,7 +182,7 @@ def merge_ordered(left, right, on=None, * outer: use union of keys from both frames (SQL: full outer join) * inner: use intersection of keys from both frames (SQL: inner join) - .. versionadded 0.18.2 + .. versionadded:: 0.19.0 Examples -------- @@ -263,7 +263,7 @@ def merge_asof(left, right, on=None, Optionally perform group-wise merge. This searches for the nearest match on the 'on' key within the same group according to 'by'. - .. versionadded 0.18.2 + .. versionadded:: 0.19.0 Parameters ---------- diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 42631d442a990..2e3d1ace9734c 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -747,7 +747,7 @@ def repeat(self, repeats, *args, **kwargs): def where(self, cond, other=None): """ - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Return an Index of same shape as self and whose corresponding entries are from self where cond is True and otherwise are from diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 77500081be62c..83cb768b37aaa 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1857,7 +1857,7 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'): - 'coerce' will return NaT if the timestamp can not be converted into the specified timezone - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 infer_dst : boolean, default False (DEPRECATED) Attempt to infer fall dst-transition hours based on order diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index f4b75ddd72126..d0b1fd746d0d5 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1258,7 +1258,7 @@ class SemiMonthEnd(SemiMonthOffset): Two DateOffset's per month repeating on the last day of the month and day_of_month. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Parameters ---------- @@ -1317,7 +1317,7 @@ class SemiMonthBegin(SemiMonthOffset): Two DateOffset's per month repeating on the first day of the month and day_of_month. - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Parameters ---------- diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 8837881af0b6c..df6554fe1d5de 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -246,7 +246,7 @@ class Timestamp(_Timestamp): :func:`datetime.datetime` Parameters ------------------------------------ - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 year : int month : int @@ -539,7 +539,7 @@ class Timestamp(_Timestamp): - 'coerce' will return NaT if the timestamp can not be converted into the specified timezone - .. versionadded:: 0.18.2 + .. versionadded:: 0.19.0 Returns ------- diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 53db9ddf79a5c..44338f26eb2e8 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -206,7 +206,7 @@ def union_categoricals(to_union): Combine list-like of Categoricals, unioning categories. All must have the same dtype, and none can be ordered. - .. versionadded 0.18.2 + .. versionadded:: 0.19.0 Parameters ---------- From ba82b511c76d87421c8900348efebe4577548ec6 Mon Sep 17 00:00:00 2001 From: Haleemur Ali Date: Fri, 8 Jul 2016 17:16:15 +0200 Subject: [PATCH 05/44] BUG: Datetime64Formatter not respecting ``formatter`` - [x] closes #10690 - [x] tests added / passed - [x] passes ``git diff upstream/master | flake8 --diff`` - [x] whatsnew entry the Datetime64Formatter class did not accept a `formatter` argument, so custom formatters passed in through `df.to_string` or `df.to_html` were silently ignored. Author: Haleemur Ali This patch had conflicts when merged, resolved by Committer: Joris Van den Bossche Closes #13567 from haleemur/fix/dt64_outputformat and squashes the following commits: 8d84283 [Haleemur Ali] fix bug in Datetime64Formatter, which affected custom date formatted output for df.to_string, df.to_html methods --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/formats/format.py | 4 + pandas/tests/formats/test_format.py | 128 ++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 70d54ea0d364d..657de7ec26efc 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -522,6 +522,7 @@ Bug Fixes - Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`) - Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`) +- Bug in ``.to_html``, ``.to_latex`` and ``.to_string`` silently ignore custom datetime formatter passed through the ``formatters`` key word (:issue:`10690`) - Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) diff --git a/pandas/formats/format.py b/pandas/formats/format.py index a8e184ce94c89..0c6a15db4ccfe 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -2239,9 +2239,13 @@ def _format_strings(self): """ we by definition have DO NOT have a TZ """ values = self.values + if not isinstance(values, DatetimeIndex): values = DatetimeIndex(values) + if self.formatter is not None and callable(self.formatter): + return [self.formatter(x) for x in values] + fmt_values = format_array_from_datetime( values.asi8.ravel(), format=_get_format_datetime64_from_values(values, diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index e67fe2cddde77..c5e9c258b293a 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -456,6 +456,28 @@ def test_to_string_with_formatters(self): '2 0x3 [ 3.0] -False-')) self.assertEqual(result, result2) + def test_to_string_with_datetime64_monthformatter(self): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({'months': months}) + + def format_func(x): + return x.strftime('%Y-%m') + result = x.to_string(formatters={'months': format_func}) + expected = 'months\n0 2016-01\n1 2016-02' + self.assertEqual(result.strip(), expected) + + def test_to_string_with_datetime64_hourformatter(self): + + x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], + format='%H:%M:%S.%f')}) + + def format_func(x): + return x.strftime('%H:%M') + + result = x.to_string(formatters={'hod': format_func}) + expected = 'hod\n0 10:10\n1 12:12' + self.assertEqual(result.strip(), expected) + def test_to_string_with_formatters_unicode(self): df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) result = df.to_string(formatters={u('c/\u03c3'): lambda x: '%s' % x}) @@ -1233,6 +1255,63 @@ def test_to_html_index_formatter(self): self.assertEqual(result, expected) + def test_to_html_datetime64_monthformatter(self): + months = [datetime(2016, 1, 1), datetime(2016, 2, 2)] + x = DataFrame({'months': months}) + + def format_func(x): + return x.strftime('%Y-%m') + result = x.to_html(formatters={'months': format_func}) + expected = """\ + + + + + + + + + + + + + + + + + +
months
02016-01
12016-02
""" + self.assertEqual(result, expected) + + def test_to_html_datetime64_hourformatter(self): + + x = DataFrame({'hod': pd.to_datetime(['10:10:10.100', '12:12:12.120'], + format='%H:%M:%S.%f')}) + + def format_func(x): + return x.strftime('%H:%M') + result = x.to_html(formatters={'hod': format_func}) + expected = """\ + + + + + + + + + + + + + + + + + +
hod
010:10
112:12
""" + self.assertEqual(result, expected) + def test_to_html_regression_GH6098(self): df = DataFrame({u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), @@ -2775,6 +2854,33 @@ def test_to_latex_format(self): self.assertEqual(withindex_result, withindex_expected) + def test_to_latex_with_formatters(self): + df = DataFrame({'int': [1, 2, 3], + 'float': [1.0, 2.0, 3.0], + 'object': [(1, 2), True, False], + 'datetime64': [datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3)]}) + + formatters = {'int': lambda x: '0x%x' % x, + 'float': lambda x: '[% 4.1f]' % x, + 'object': lambda x: '-%s-' % str(x), + 'datetime64': lambda x: x.strftime('%Y-%m'), + '__index__': lambda x: 'index: %s' % x} + result = df.to_latex(formatters=dict(formatters)) + + expected = r"""\begin{tabular}{llrrl} +\toprule +{} & datetime64 & float & int & object \\ +\midrule +index: 0 & 2016-01 & [ 1.0] & 0x1 & -(1, 2)- \\ +index: 1 & 2016-02 & [ 2.0] & 0x2 & -True- \\ +index: 2 & 2016-03 & [ 3.0] & 0x3 & -False- \\ +\bottomrule +\end{tabular} +""" + self.assertEqual(result, expected) + def test_to_latex_multiindex(self): df = DataFrame({('x', 'y'): ['a']}) result = df.to_latex() @@ -4161,6 +4267,28 @@ def test_dates_display(self): self.assertEqual(result[1].strip(), "NaT") self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000000004") + def test_datetime64formatter_yearmonth(self): + x = Series([datetime(2016, 1, 1), datetime(2016, 2, 2)]) + + def format_func(x): + return x.strftime('%Y-%m') + + formatter = fmt.Datetime64Formatter(x, formatter=format_func) + result = formatter.get_result() + self.assertEqual(result, ['2016-01', '2016-02']) + + def test_datetime64formatter_hoursecond(self): + + x = Series(pd.to_datetime(['10:10:10.100', '12:12:12.120'], + format='%H:%M:%S.%f')) + + def format_func(x): + return x.strftime('%H:%M') + + formatter = fmt.Datetime64Formatter(x, formatter=format_func) + result = formatter.get_result() + self.assertEqual(result, ['10:10', '12:12']) + class TestNaTFormatting(tm.TestCase): From f95576b883d919cdde30fdbaa6065cf9f5a6c1f4 Mon Sep 17 00:00:00 2001 From: Yuichiro Kaneko Date: Sun, 10 Jul 2016 02:01:59 +0900 Subject: [PATCH 06/44] BUG: Fix TimeDelta to Timedelta (#13600) --- pandas/tseries/tests/test_timedeltas.py | 4 ++-- pandas/tslib.pyx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index e515ba624d203..1586d0385732f 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -137,12 +137,12 @@ def test_construction(self): self.assertRaises(ValueError, lambda: Timedelta('3.1415')) # invalid construction - tm.assertRaisesRegexp(ValueError, "cannot construct a TimeDelta", + tm.assertRaisesRegexp(ValueError, "cannot construct a Timedelta", lambda: Timedelta()) tm.assertRaisesRegexp(ValueError, "unit abbreviation w/o a number", lambda: Timedelta('foo')) tm.assertRaisesRegexp(ValueError, - "cannot construct a TimeDelta from the passed " + "cannot construct a Timedelta from the passed " "arguments, allowed keywords are ", lambda: Timedelta(day=10)) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index df6554fe1d5de..61c0f9c5a093b 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -2615,7 +2615,7 @@ class Timedelta(_Timedelta): if value is None: if not len(kwargs): - raise ValueError("cannot construct a TimeDelta without a value/unit or descriptive keywords (days,seconds....)") + raise ValueError("cannot construct a Timedelta without a value/unit or descriptive keywords (days,seconds....)") def _to_py_int_float(v): if is_integer_object(v): @@ -2630,7 +2630,7 @@ class Timedelta(_Timedelta): nano = kwargs.pop('nanoseconds',0) value = convert_to_timedelta64(timedelta(**kwargs),'ns',False) + nano except TypeError as e: - raise ValueError("cannot construct a TimeDelta from the passed arguments, allowed keywords are " + raise ValueError("cannot construct a Timedelta from the passed arguments, allowed keywords are " "[weeks, days, hours, minutes, seconds, milliseconds, microseconds, nanoseconds]") if isinstance(value, Timedelta): From 5701c69369264f3aa6f571384602ceec1133dabc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 7 Jul 2016 13:36:49 -0700 Subject: [PATCH 07/44] COMPAT: 32-bit compat fixes mainly in testing closes #13566 closes #13584 --- pandas/core/internals.py | 2 +- pandas/tests/indexes/test_datetimelike.py | 7 +-- pandas/tests/indexes/test_multi.py | 8 ++-- pandas/tests/series/test_analytics.py | 24 +++++----- pandas/tests/test_algos.py | 6 ++- pandas/tests/test_categorical.py | 23 +++++----- pandas/tests/test_groupby.py | 16 +++---- pandas/tools/merge.py | 3 +- pandas/tools/tests/test_merge.py | 12 ++--- pandas/tools/tests/test_tile.py | 5 ++- pandas/tseries/tests/test_base.py | 54 +++++++++++++---------- pandas/tseries/tests/test_timedeltas.py | 6 ++- pandas/tslib.pyx | 20 ++++----- 13 files changed, 104 insertions(+), 82 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c931adc9a31df..1ea567f15cb7f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3085,7 +3085,7 @@ def reduction(self, f, axis=0, consolidate=True, transposed=False, # compute the orderings of our original data if len(self.blocks) > 1: - indexer = np.empty(len(self.axes[0]), dtype='int64') + indexer = np.empty(len(self.axes[0]), dtype=np.intp) i = 0 for b in self.blocks: for j in b.mgr_locs: diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 4a664ed3542d7..9eba481a66685 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -534,9 +534,9 @@ def test_get_loc(self): # time indexing idx = pd.date_range('2000-01-01', periods=24, freq='H') tm.assert_numpy_array_equal(idx.get_loc(time(12)), - np.array([12], dtype=np.int64)) + np.array([12]), check_dtype=False) tm.assert_numpy_array_equal(idx.get_loc(time(12, 30)), - np.array([], dtype=np.int64)) + np.array([]), check_dtype=False) with tm.assertRaises(NotImplementedError): idx.get_loc(time(12, 30), method='pad') @@ -587,7 +587,8 @@ def test_time_loc(self): # GH8667 ts = pd.Series(np.random.randn(n), index=idx) i = np.arange(start, n, step) - tm.assert_numpy_array_equal(ts.index.get_loc(key), i) + tm.assert_numpy_array_equal(ts.index.get_loc(key), i, + check_dtype=False) tm.assert_series_equal(ts[key], ts.iloc[i]) left, right = ts.copy(), ts.copy() diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index fb5576bed90b4..e6a8aafc32be4 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1750,12 +1750,12 @@ def test_reindex_level(self): exp_index2 = self.index.join(idx, level='second', how='left') self.assertTrue(target.equals(exp_index)) - exp_indexer = np.array([0, 2, 4], dtype=np.int64) - tm.assert_numpy_array_equal(indexer, exp_indexer) + exp_indexer = np.array([0, 2, 4]) + tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False) self.assertTrue(target2.equals(exp_index2)) - exp_indexer2 = np.array([0, -1, 0, -1, 0, -1], dtype=np.int64) - tm.assert_numpy_array_equal(indexer2, exp_indexer2) + exp_indexer2 = np.array([0, -1, 0, -1, 0, -1]) + tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False) assertRaisesRegexp(TypeError, "Fill method not supported", self.index.reindex, self.index, method='pad', diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 433f0f4bc67f5..0dbff0a028619 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -262,7 +262,7 @@ def test_kurt(self): self.assertTrue((df.kurt() == 0).all()) def test_argsort(self): - self._check_accum_op('argsort') + self._check_accum_op('argsort', check_dtype=False) argsorted = self.ts.argsort() self.assertTrue(issubclass(argsorted.dtype.type, np.integer)) @@ -289,8 +289,10 @@ def test_argsort_stable(self): mexpected = np.argsort(s.values, kind='mergesort') qexpected = np.argsort(s.values, kind='quicksort') - self.assert_series_equal(mindexer, Series(mexpected)) - self.assert_series_equal(qindexer, Series(qexpected)) + self.assert_series_equal(mindexer, Series(mexpected), + check_dtype=False) + self.assert_series_equal(qindexer, Series(qexpected), + check_dtype=False) self.assertFalse(np.array_equal(qindexer, mindexer)) def test_cumsum(self): @@ -487,10 +489,11 @@ def testit(): except ImportError: pass - def _check_accum_op(self, name): + def _check_accum_op(self, name, check_dtype=True): func = getattr(np, name) self.assert_numpy_array_equal(func(self.ts).values, - func(np.array(self.ts))) + func(np.array(self.ts)), + check_dtype=check_dtype) # with missing values ts = self.ts.copy() @@ -499,7 +502,8 @@ def _check_accum_op(self, name): result = func(ts)[1::2] expected = func(np.array(ts.valid())) - self.assert_numpy_array_equal(result.values, expected) + self.assert_numpy_array_equal(result.values, expected, + check_dtype=False) def test_compress(self): cond = [True, False, True, False, False] @@ -1360,13 +1364,13 @@ def test_searchsorted_numeric_dtypes_scalar(self): self.assertEqual(r, e) r = s.searchsorted([30]) - e = np.array([2], dtype=np.int64) + e = np.array([2], dtype=np.intp) tm.assert_numpy_array_equal(r, e) def test_searchsorted_numeric_dtypes_vector(self): s = Series([1, 2, 90, 1000, 3e9]) r = s.searchsorted([91, 2e6]) - e = np.array([3, 4], dtype=np.int64) + e = np.array([3, 4], dtype=np.intp) tm.assert_numpy_array_equal(r, e) def test_search_sorted_datetime64_scalar(self): @@ -1380,14 +1384,14 @@ def test_search_sorted_datetime64_list(self): s = Series(pd.date_range('20120101', periods=10, freq='2D')) v = [pd.Timestamp('20120102'), pd.Timestamp('20120104')] r = s.searchsorted(v) - e = np.array([1, 2], dtype=np.int64) + e = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(r, e) def test_searchsorted_sorter(self): # GH8490 s = Series([3, 1, 2]) r = s.searchsorted([0, 3], sorter=np.argsort(s)) - e = np.array([0, 2], dtype=np.int64) + e = np.array([0, 2], dtype=np.intp) tm.assert_numpy_array_equal(r, e) def test_is_unique(self): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8af93ad0ecb2e..cb90110c953c1 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -702,12 +702,14 @@ def test_unique_label_indices(): left = unique_label_indices(a) right = np.unique(a, return_index=True)[1] - tm.assert_numpy_array_equal(left, right) + tm.assert_numpy_array_equal(left, right, + check_dtype=False) a[np.random.choice(len(a), 10)] = -1 left = unique_label_indices(a) right = np.unique(a, return_index=True)[1][1:] - tm.assert_numpy_array_equal(left, right) + tm.assert_numpy_array_equal(left, right, + check_dtype=False) def test_rank(): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index cff5bbe14f1eb..90876a4541da6 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -515,17 +515,20 @@ def f(): def test_argsort(self): c = Categorical([5, 3, 1, 4, 2], ordered=True) - expected = np.array([2, 4, 1, 3, 0], dtype=np.int64) - tm.assert_numpy_array_equal(c.argsort(ascending=True), expected) + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(c.argsort(ascending=True), expected, + check_dtype=False) expected = expected[::-1] - tm.assert_numpy_array_equal(c.argsort(ascending=False), expected) + tm.assert_numpy_array_equal(c.argsort(ascending=False), expected, + check_dtype=False) def test_numpy_argsort(self): c = Categorical([5, 3, 1, 4, 2], ordered=True) - expected = np.array([2, 4, 1, 3, 0], dtype=np.int64) - tm.assert_numpy_array_equal(np.argsort(c), expected) + expected = np.array([2, 4, 1, 3, 0]) + tm.assert_numpy_array_equal(np.argsort(c), expected, + check_dtype=False) msg = "the 'kind' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.argsort, @@ -1505,7 +1508,7 @@ def test_searchsorted(self): # Single item array res = c1.searchsorted(['bread']) chk = s1.searchsorted(['bread']) - exp = np.array([1], dtype=np.int64) + exp = np.array([1], dtype=np.intp) self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) @@ -1514,21 +1517,21 @@ def test_searchsorted(self): # np.array.searchsorted() res = c1.searchsorted('bread') chk = s1.searchsorted('bread') - exp = np.array([1], dtype=np.int64) + exp = np.array([1], dtype=np.intp) self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) # Searching for a value that is not present in the Categorical res = c1.searchsorted(['bread', 'eggs']) chk = s1.searchsorted(['bread', 'eggs']) - exp = np.array([1, 4], dtype=np.int64) + exp = np.array([1, 4], dtype=np.intp) self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) # Searching for a value that is not present, to the right res = c1.searchsorted(['bread', 'eggs'], side='right') chk = s1.searchsorted(['bread', 'eggs'], side='right') - exp = np.array([3, 4], dtype=np.int64) # eggs before milk + exp = np.array([3, 4], dtype=np.intp) # eggs before milk self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) @@ -1538,7 +1541,7 @@ def test_searchsorted(self): chk = s2.searchsorted(['bread', 'eggs'], side='right', sorter=[0, 1, 2, 3, 5, 4]) # eggs after donuts, after switching milk and donuts - exp = np.array([3, 5], dtype=np.int64) + exp = np.array([3, 5], dtype=np.intp) self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index d6d601f03d561..efcba758e3b38 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -5934,49 +5934,49 @@ def test_nargsort(self): result = _nargsort(items, kind='mergesort', ascending=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = _nargsort(items, kind='mergesort', ascending=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = _nargsort(items, kind='mergesort', ascending=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = _nargsort(items, kind='mergesort', ascending=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='last' result = _nargsort(items2, kind='mergesort', ascending=True, na_position='last') exp = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=True, na_position='first' result = _nargsort(items2, kind='mergesort', ascending=True, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='last' result = _nargsort(items2, kind='mergesort', ascending=False, na_position='last') exp = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) # mergesort, ascending=False, na_position='first' result = _nargsort(items2, kind='mergesort', ascending=False, na_position='first') exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) - tm.assert_numpy_array_equal(result, np.array(exp, dtype=np.int64)) + tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) def test_datetime_count(self): df = DataFrame({'a': [1, 2, 3] * 2, diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index d65dfc3254465..075dff9cf6c38 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -436,7 +436,8 @@ def _merger(x, y): # if we DO have duplicates, then # we cannot guarantee order - sorter = np.concatenate([groupby.indices[g] for g, _ in groupby]) + sorter = com._ensure_platform_int( + np.concatenate([groupby.indices[g] for g, _ in groupby])) if len(result) != len(sorter): if check_duplicates: raise AssertionError("invalid reverse grouping") diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 2505309768997..c8d1bae78dad3 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -91,8 +91,8 @@ def test_cython_left_outer_join(self): exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 - self.assert_numpy_array_equal(ls, exp_ls) - self.assert_numpy_array_equal(rs, exp_rs) + self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) def test_cython_right_outer_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) @@ -117,8 +117,8 @@ def test_cython_right_outer_join(self): exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 - self.assert_numpy_array_equal(ls, exp_ls) - self.assert_numpy_array_equal(rs, exp_rs) + self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) def test_cython_inner_join(self): left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) @@ -141,8 +141,8 @@ def test_cython_inner_join(self): exp_rs = exp_rs.take(exp_ri) exp_rs[exp_ri == -1] = -1 - self.assert_numpy_array_equal(ls, exp_ls) - self.assert_numpy_array_equal(rs, exp_rs) + self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) def test_left_outer_join(self): joined_key2 = merge(self.df, self.df2, on='key2') diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index bb5429b5e8836..16731620a1dcd 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -19,8 +19,9 @@ class TestCut(tm.TestCase): def test_simple(self): data = np.ones(5) result = cut(data, 4, labels=False) - desired = np.array([1, 1, 1, 1, 1], dtype=np.int64) - tm.assert_numpy_array_equal(result, desired) + desired = np.array([1, 1, 1, 1, 1]) + tm.assert_numpy_array_equal(result, desired, + check_dtype=False) def test_bins(self): data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1]) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 7077a23d5abcb..7eadbfb031222 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -505,7 +505,8 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, idx) self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2], dtype=np.int64)) + np.array([0, 1, 2]), + check_dtype=False) self.assertEqual(ordered.freq, idx.freq) ordered, indexer = idx.sort_values(return_indexer=True, @@ -513,7 +514,8 @@ def test_order(self): expected = idx[::-1] self.assert_index_equal(ordered, expected) self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0], dtype=np.int64)) + np.array([2, 1, 0]), + check_dtype=False) self.assertEqual(ordered.freq, expected.freq) self.assertEqual(ordered.freq.n, -1) @@ -550,16 +552,16 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, expected) - exp = np.array([0, 4, 3, 1, 2], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertIsNone(ordered.freq) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) self.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertIsNone(ordered.freq) def test_getitem(self): @@ -1271,7 +1273,8 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, idx) self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2], dtype=np.int64)) + np.array([0, 1, 2]), + check_dtype=False) self.assertEqual(ordered.freq, idx.freq) ordered, indexer = idx.sort_values(return_indexer=True, @@ -1309,16 +1312,16 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, expected) - exp = np.array([0, 4, 3, 1, 2], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertIsNone(ordered.freq) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) self.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertIsNone(ordered.freq) def test_getitem(self): @@ -2074,14 +2077,16 @@ def _check_freq(index, expected_index): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, idx) self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2], dtype=np.int64)) + np.array([0, 1, 2]), + check_dtype=False) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) self.assert_index_equal(ordered, idx[::-1]) self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0], dtype=np.int64)) + np.array([2, 1, 0]), + check_dtype=False) _check_freq(ordered, idx[::-1]) pidx = PeriodIndex(['2011', '2013', '2015', '2012', @@ -2103,16 +2108,17 @@ def _check_freq(index, expected_index): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, expected) - exp = np.array([0, 4, 3, 1, 2], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) _check_freq(ordered, idx) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) self.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, + check_dtype=False) _check_freq(ordered, idx) pidx = PeriodIndex(['2011', '2013', 'NaT', '2011'], name='pidx', @@ -2148,7 +2154,8 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, idx) self.assert_numpy_array_equal(indexer, - np.array([0, 1, 2], dtype=np.int64)) + np.array([0, 1, 2]), + check_dtype=False) self.assertEqual(ordered.freq, idx.freq) self.assertEqual(ordered.freq, freq) @@ -2157,7 +2164,8 @@ def test_order(self): expected = idx[::-1] self.assert_index_equal(ordered, expected) self.assert_numpy_array_equal(indexer, - np.array([2, 1, 0], dtype=np.int64)) + np.array([2, 1, 0]), + check_dtype=False) self.assertEqual(ordered.freq, expected.freq) self.assertEqual(ordered.freq, freq) @@ -2191,16 +2199,16 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, expected) - exp = np.array([0, 4, 3, 1, 2], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([0, 4, 3, 1, 2]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertEqual(ordered.freq, 'D') ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) self.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0], dtype=np.int64) - self.assert_numpy_array_equal(indexer, exp) + exp = np.array([2, 1, 3, 4, 0]) + self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertEqual(ordered.freq, 'D') def test_getitem(self): diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 1586d0385732f..c3bd62849bf82 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -1547,12 +1547,14 @@ def test_sort_values(self): ordered, dexer = idx.sort_values(return_indexer=True) self.assertTrue(ordered.is_monotonic) self.assert_numpy_array_equal(dexer, - np.array([1, 2, 0], dtype=np.int64)) + np.array([1, 2, 0]), + check_dtype=False) ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) self.assertTrue(ordered[::-1].is_monotonic) self.assert_numpy_array_equal(dexer, - np.array([0, 2, 1], dtype=np.int64)) + np.array([0, 2, 1]), + check_dtype=False) def test_insert(self): diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 61c0f9c5a093b..0db4282808a26 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3754,11 +3754,11 @@ except: def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): cdef: - ndarray[int64_t] utc_dates, tt, result, trans, deltas, posn + ndarray[int64_t] utc_dates, tt, result, trans, deltas Py_ssize_t i, j, pos, n = len(vals) - int64_t v, offset + ndarray[Py_ssize_t] posn + int64_t v, offset, delta pandas_datetimestruct dts - Py_ssize_t trans_len if not have_pytz: import pytz @@ -3790,7 +3790,6 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): if not len(tt): return vals - trans_len = len(trans) posn = trans.searchsorted(tt, side='right') j = 0 for i in range(n): @@ -3826,18 +3825,19 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): # Convert UTC to other timezone trans, deltas, typ = _get_dst_info(tz2) - trans_len = len(trans) - - # if all NaT, return all NaT - if (utc_dates==NPY_NAT).all(): - return utc_dates # use first non-NaT element # if all-NaT, return all-NaT if (result==NPY_NAT).all(): return result - posn = trans.searchsorted(utc_dates[utc_dates!=NPY_NAT], side='right') + # if all NaT, return all NaT + tt = utc_dates[utc_dates!=NPY_NAT] + if not len(tt): + return utc_dates + + posn = trans.searchsorted(tt, side='right') + j = 0 for i in range(n): v = utc_dates[i] From 3c202b1cbcc73c4006c967c8abe1b8d9089c5be4 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Sat, 9 Jul 2016 22:49:58 +0100 Subject: [PATCH 08/44] Added more exhaustive tests for __contains__. --- pandas/tests/indexes/test_datetimelike.py | 105 +++++++++++++++++++--- 1 file changed, 94 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index e9b5403d12afc..804a0f4ed5cc1 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time +from datetime import timedelta, time, date, datetime import numpy as np @@ -19,6 +19,9 @@ class DatetimeLike(Base): + def create_nonmonotonic_index(self): + return self.create_index()[[2, 0, 3, 4, 1]] + def test_shift_identity(self): idx = self.create_index() @@ -64,6 +67,9 @@ def setUp(self): def create_index(self): return date_range('20130101', periods=5) + def create_elem_outside_index(self): + return pd.Timestamp('20130106') + def test_shift(self): # test shift for datetimeIndex and non datetimeIndex @@ -722,16 +728,36 @@ def test_fillna_datetime64(self): self.assert_index_equal(idx.fillna('x'), exp) def test_contains(self): - #GH13572 - dates = ['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02'] - monotonic = pd.to_datetime(sorted(dates)) - non_monotonic = pd.to_datetime(['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02']) - for idx in [non_monotonic, monotonic]: - self.assertNotIn('2015-01-06', idx) - self.assertNotIn(pd.Timestamp('2015-01-06'), idx) - for dt in reversed(dates): - self.assertIn(dt, idx) - self.assertIn(pd.Timestamp(dt), idx) + # GH13572 + monotonic = self.create_index() + ascending_nat_first = monotonic.insert( + 0, pd.NaT) # Not monotonic after inserting NaT + ascending_nat_last = monotonic.insert(5, pd.NaT) + non_monotonic = self.create_nonmonotonic_index() + non_monotonic_nat_first = non_monotonic.insert(0, pd.NaT) + non_monotonic_nat_last = non_monotonic.insert(5, pd.NaT) + idx_with_nat = [ascending_nat_first, ascending_nat_last, + non_monotonic_nat_first, non_monotonic_nat_last] + idx_no_nat = [monotonic, non_monotonic] + for idx in idx_no_nat + idx_with_nat: + elem = self.create_elem_outside_index() + elem_str = str(elem) + elem_date_str = str(elem.date()) + for e in [elem, elem_str, elem_date_str, elem.date(), elem.to_datetime()]: + self.assertNotIn(e, idx) + for elem in monotonic: + elem_str = str(elem) + elem_date_str = str(elem.date()) + for e in [elem, elem_str, elem_date_str, elem.date(), elem.to_datetime()]: + self.assertIn(e, idx) + nat_elems = [pd.NaT, None, float('nan'), np.nan] + for idx in idx_no_nat: + for nn in nat_elems: + self.assertNotIn(nn, idx) + for idx in idx_with_nat: + for nn in nat_elems: + self.assertIn(nn, idx) + class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex @@ -744,6 +770,9 @@ def setUp(self): def create_index(self): return period_range('20130101', periods=5, freq='D') + def create_elem_outside_index(self): + return pd.Period('20130106') + def test_astype(self): # GH 13149, GH 13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') @@ -928,6 +957,35 @@ def test_no_millisecond_field(self): with self.assertRaises(AttributeError): DatetimeIndex([]).millisecond + def test_contains(self): + # GH13572 + monotonic = self.create_index() + ascending_nat_first = monotonic.insert( + 0, pd.NaT) # Not monotonic after inserting NaT + ascending_nat_last = monotonic.insert(5, pd.NaT) + non_monotonic = self.create_nonmonotonic_index() + non_monotonic_nat_first = non_monotonic.insert(0, pd.NaT) + non_monotonic_nat_last = non_monotonic.insert(5, pd.NaT) + idx_with_nat = [ascending_nat_first, ascending_nat_last, + non_monotonic_nat_first, non_monotonic_nat_last] + idx_no_nat = [monotonic, non_monotonic] + for idx in idx_no_nat + idx_with_nat: + elem = self.create_elem_outside_index() + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertNotIn(e, idx) + for elem in monotonic: + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertIn(e, idx) + nat_elems = [pd.Period('NaT', freq='D')] + for idx in idx_no_nat: + for nn in nat_elems: + self.assertNotIn(nn, idx) + for idx in idx_with_nat: + for nn in nat_elems: + self.assertIn(nn, idx) + class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex @@ -940,6 +998,9 @@ def setUp(self): def create_index(self): return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + def create_elem_outside_index(self): + return pd.Timedelta(days=5, hours=1) + def test_shift(self): # test shift for TimedeltaIndex # err8083 @@ -1128,3 +1189,25 @@ def test_fillna_timedelta(self): exp = pd.Index( [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) self.assert_index_equal(idx.fillna('x'), exp) + + def test_contains(self): + # GH13572 + monotonic = self.create_index() + ascending_nat_first = monotonic.insert( + 0, pd.NaT) # Not monotonic after inserting NaT + ascending_nat_last = monotonic.insert(5, pd.NaT) + non_monotonic = self.create_nonmonotonic_index() + non_monotonic_nat_first = non_monotonic.insert(0, pd.NaT) + non_monotonic_nat_last = non_monotonic.insert(5, pd.NaT) + idx_with_nat = [ascending_nat_first, ascending_nat_last, + non_monotonic_nat_first, non_monotonic_nat_last] + idx_no_nat = [monotonic, non_monotonic] + for idx in idx_no_nat + idx_with_nat: + elem = self.create_elem_outside_index() + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertNotIn(e, idx) + for elem in monotonic: + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertIn(e, idx) From 713eaa6837127f619619bca8a5a32ed02b145754 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 10 Jul 2016 17:01:51 -0400 Subject: [PATCH 09/44] BUG: DatetimeIndex - Period shows ununderstandable error closes #13078 Author: sinhrks Closes #13581 from sinhrks/dti_period_error and squashes the following commits: c957541 [sinhrks] BUG: DatetimeIndex - Period shows ununderstandable error --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/tseries/tests/test_base.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 657de7ec26efc..6a1d450cf083f 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -473,7 +473,7 @@ Bug Fixes - Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`) - Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`) - +- Bug in ``DatetimeIndex`` and ``Period`` subtraction raises ``ValueError`` or ``AttributeError`` rather than ``TypeError`` (:issue:`13078`) - Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`) - Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) - Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 7eadbfb031222..360944e355b4d 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -443,6 +443,20 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'D']: + idx = pd.DatetimeIndex(['2011-01-01', '2011-01-02'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + def test_value_counts_unique(self): # GH 7735 for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']: @@ -1159,6 +1173,20 @@ def test_dti_tdi_numeric_ops(self): expected = DatetimeIndex(['20121231', pd.NaT, '20130101']) tm.assert_index_equal(result, expected) + def test_sub_period(self): + # GH 13078 + # not supported, check TypeError + p = pd.Period('2011-01-01', freq='D') + + for freq in [None, 'H']: + idx = pd.TimedeltaIndex(['1 hours', '2 hours'], freq=freq) + + with tm.assertRaises(TypeError): + idx - p + + with tm.assertRaises(TypeError): + p - idx + def test_addition_ops(self): # with datetimes/timedelta and tdi/dti From 675a6e35cc78063f68a14338ae69c099588e23d1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Sun, 10 Jul 2016 17:06:14 -0400 Subject: [PATCH 10/44] ENH: add downcast to pd.to_numeric Title is self-explanatory. Closes #13352. Author: gfyoung Closes #13425 from gfyoung/to-numeric-enhance and squashes the following commits: 4758dcc [gfyoung] ENH: add 'downcast' to pd.to_numeric --- asv_bench/benchmarks/inference.py | 21 +++++- doc/source/basics.rst | 102 ++++++++++++++++++++++------- doc/source/whatsnew/v0.19.0.txt | 7 ++ pandas/tools/tests/test_util.py | 77 ++++++++++++++++++++++ pandas/tools/util.py | 103 ++++++++++++++++++++++++++---- 5 files changed, 273 insertions(+), 37 deletions(-) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 3fceed087facb..6809c351beade 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -135,4 +135,23 @@ def setup(self): self.df_timedelta64 = DataFrame(dict(A=(self.df_datetime64['A'] - self.df_datetime64['B']), B=self.df_datetime64['B'])) def time_dtype_infer_uint32(self): - (self.df_uint32['A'] + self.df_uint32['B']) \ No newline at end of file + (self.df_uint32['A'] + self.df_uint32['B']) + + +class to_numeric(object): + N = 500000 + + param_names = ['data', 'downcast'] + params = [ + [(['1'] * N / 2) + ([2] * N / 2), + (['-1'] * N / 2) + ([2] * N / 2), + np.repeat(np.array('1970-01-01', '1970-01-02', + dtype='datetime64[D]'), N), + (['1.1'] * N / 2) + ([2] * N / 2), + ([1] * N / 2) + ([2] * N / 2), + np.repeat(np.int32(1), N)], + [None, 'integer', 'signed', 'unsigned', 'float'], + ] + + def time_to_numeric(self, data, downcast): + pd.to_numeric(data, downcast=downcast) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 8145e9536a82a..63a7c8fded2db 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1754,39 +1754,93 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype` object conversion ~~~~~~~~~~~~~~~~~ -:meth:`~DataFrame.convert_objects` is a method to try to force conversion of types from the ``object`` dtype to other types. -To force conversion of specific types that are *number like*, e.g. could be a string that represents a number, -pass ``convert_numeric=True``. This will force strings and numbers alike to be numbers if possible, otherwise -they will be set to ``np.nan``. +pandas offers various functions to try to force conversion of types from the ``object`` dtype to other types. +The following functions are available for one dimensional object arrays or scalars: + +- :meth:`~pandas.to_numeric` (conversion to numeric dtypes) + + .. ipython:: python + + m = ['1.1', 2, 3] + pd.to_numeric(m) + +- :meth:`~pandas.to_datetime` (conversion to datetime objects) + + .. ipython:: python + + import datetime + m = ['2016-07-09', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m) + +- :meth:`~pandas.to_timedelta` (conversion to timedelta objects) + + .. ipython:: python + + m = ['5us', pd.Timedelta('1day')] + pd.to_timedelta(m) + +To force a conversion, we can pass in an ``errors`` argument, which specifies how pandas should deal with elements +that cannot be converted to desired dtype or object. By default, ``errors='raise'``, meaning that any errors encountered +will be raised during the conversion process. However, if ``errors='coerce'``, these errors will be ignored and pandas +will convert problematic elements to ``pd.NaT`` (for datetime and timedelta) or ``np.nan`` (for numeric). This might be +useful if you are reading in data which is mostly of the desired dtype (e.g. numeric, datetime), but occasionally has +non-conforming elements intermixed that you want to represent as missing: .. ipython:: python - :okwarning: - df3['D'] = '1.' - df3['E'] = '1' - df3.convert_objects(convert_numeric=True).dtypes + import datetime + m = ['apple', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m, errors='coerce') - # same, but specific dtype conversion - df3['D'] = df3['D'].astype('float16') - df3['E'] = df3['E'].astype('int32') - df3.dtypes + m = ['apple', 2, 3] + pd.to_numeric(m, errors='coerce') + + m = ['apple', pd.Timedelta('1day')] + pd.to_timedelta(m, errors='coerce') -To force conversion to ``datetime64[ns]``, pass ``convert_dates='coerce'``. -This will convert any datetime-like object to dates, forcing other values to ``NaT``. -This might be useful if you are reading in data which is mostly dates, -but occasionally has non-dates intermixed and you want to represent as missing. +The ``errors`` parameter has a third option of ``errors='ignore'``, which will simply return the passed in data if it +encounters any errors with the conversion to a desired data type: .. ipython:: python - import datetime - s = pd.Series([datetime.datetime(2001,1,1,0,0), - 'foo', 1.0, 1, pd.Timestamp('20010104'), - '20010105'], dtype='O') - s - pd.to_datetime(s, errors='coerce') + import datetime + m = ['apple', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m, errors='ignore') + + m = ['apple', 2, 3] + pd.to_numeric(m, errors='ignore') + + m = ['apple', pd.Timedelta('1day')] + pd.to_timedelta(m, errors='ignore') + +In addition to object conversion, :meth:`~pandas.to_numeric` provides another argument ``downcast``, which gives the +option of downcasting the newly (or already) numeric data to a smaller dtype, which can conserve memory: + +.. ipython:: python + + m = ['1', 2, 3] + pd.to_numeric(m, downcast='integer') # smallest signed int dtype + pd.to_numeric(m, downcast='signed') # same as 'integer' + pd.to_numeric(m, downcast='unsigned') # smallest unsigned int dtype + pd.to_numeric(m, downcast='float') # smallest float dtype + +As these methods apply only to one-dimensional arrays, lists or scalars; they cannot be used directly on multi-dimensional objects such +as DataFrames. However, with :meth:`~pandas.DataFrame.apply`, we can "apply" the function over each column efficiently: -In addition, :meth:`~DataFrame.convert_objects` will attempt the *soft* conversion of any *object* dtypes, meaning that if all -the objects in a Series are of the same type, the Series will have that dtype. +.. ipython:: python + + import datetime + df = pd.DataFrame([['2016-07-09', datetime.datetime(2016, 3, 2)]] * 2, dtype='O') + df + df.apply(pd.to_datetime) + + df = pd.DataFrame([['1.1', 2, 3]] * 2, dtype='O') + df + df.apply(pd.to_numeric) + + df = pd.DataFrame([['5us', pd.Timedelta('1day')]] * 2, dtype='O') + df + df.apply(pd.to_timedelta) gotchas ~~~~~~~ diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 6a1d450cf083f..40ae38f12fccb 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -186,6 +186,13 @@ Other enhancements ^^^^^^^^^^^^^^^^^^ - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behaviour remains to raising a ``NonExistentTimeError`` (:issue:`13057`) +- ``pd.to_numeric()`` now accepts a ``downcast`` parameter, which will downcast the data if possible to smallest specified numerical dtype (:issue:`13352`) + + .. ipython:: python + + s = ['1', 2, 3] + pd.to_numeric(s, downcast='unsigned') + pd.to_numeric(s, downcast='integer') - ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see :ref:`documentation here ` (:issue:`10008`, :issue:`13156`) - ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py index c592b33bdab9a..5b738086a1ad4 100644 --- a/pandas/tools/tests/test_util.py +++ b/pandas/tools/tests/test_util.py @@ -291,6 +291,83 @@ def test_non_hashable(self): with self.assertRaisesRegexp(TypeError, "Invalid object type"): pd.to_numeric(s) + def test_downcast(self): + # see gh-13352 + mixed_data = ['1', 2, 3] + int_data = [1, 2, 3] + date_data = np.array(['1970-01-02', '1970-01-03', + '1970-01-04'], dtype='datetime64[D]') + + invalid_downcast = 'unsigned-integer' + msg = 'invalid downcasting method provided' + + smallest_int_dtype = np.dtype(np.typecodes['Integer'][0]) + smallest_uint_dtype = np.dtype(np.typecodes['UnsignedInteger'][0]) + + # support below np.float32 is rare and far between + float_32_char = np.dtype(np.float32).char + smallest_float_dtype = float_32_char + + for data in (mixed_data, int_data, date_data): + with self.assertRaisesRegexp(ValueError, msg): + pd.to_numeric(data, downcast=invalid_downcast) + + expected = np.array([1, 2, 3], dtype=np.int64) + + res = pd.to_numeric(data) + tm.assert_numpy_array_equal(res, expected) + + res = pd.to_numeric(data, downcast=None) + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_int_dtype) + + for signed_downcast in ('integer', 'signed'): + res = pd.to_numeric(data, downcast=signed_downcast) + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_uint_dtype) + res = pd.to_numeric(data, downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + expected = np.array([1, 2, 3], dtype=smallest_float_dtype) + res = pd.to_numeric(data, downcast='float') + tm.assert_numpy_array_equal(res, expected) + + # if we can't successfully cast the given + # data to a numeric dtype, do not bother + # with the downcast parameter + data = ['foo', 2, 3] + expected = np.array(data, dtype=object) + res = pd.to_numeric(data, errors='ignore', + downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + # cannot cast to an unsigned integer because + # we have a negative number + data = ['-1', 2, 3] + expected = np.array([-1, 2, 3], dtype=np.int64) + res = pd.to_numeric(data, downcast='unsigned') + tm.assert_numpy_array_equal(res, expected) + + # cannot cast to an integer (signed or unsigned) + # because we have a float number + data = ['1.1', 2, 3] + expected = np.array([1.1, 2, 3], dtype=np.float64) + + for downcast in ('integer', 'signed', 'unsigned'): + res = pd.to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) + + # the smallest integer dtype need not be np.(u)int8 + data = ['256', 257, 258] + + for downcast, expected_dtype in zip( + ['integer', 'signed', 'unsigned'], + [np.int16, np.int16, np.uint16]): + expected = np.array([256, 257, 258], dtype=expected_dtype) + res = pd.to_numeric(data, downcast=downcast) + tm.assert_numpy_array_equal(res, expected) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 61d2c0adce2fe..d70904e1bf286 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -50,7 +50,7 @@ def compose(*funcs): return reduce(_compose2, funcs) -def to_numeric(arg, errors='raise'): +def to_numeric(arg, errors='raise', downcast=None): """ Convert argument to a numeric type. @@ -61,6 +61,27 @@ def to_numeric(arg, errors='raise'): - If 'raise', then invalid parsing will raise an exception - If 'coerce', then invalid parsing will be set as NaN - If 'ignore', then invalid parsing will return the input + downcast : {'integer', 'signed', 'unsigned', 'float'} , default None + If not None, and if the data has been successfully cast to a + numerical dtype (or if the data was numeric to begin with), + downcast that resulting data to the smallest numerical dtype + possible according to the following rules: + + - 'integer' or 'signed': smallest signed int dtype (min.: np.int8) + - 'unsigned': smallest unsigned int dtype (min.: np.uint8) + - 'float': smallest float dtype (min.: np.float32) + + As this behaviour is separate from the core conversion to + numeric values, any errors raised during the downcasting + will be surfaced regardless of the value of the 'errors' input. + + In addition, downcasting will only occur if the size + of the resulting data's dtype is strictly larger than + the dtype it is to be cast to, so if none of the dtypes + checked satisfy that specification, no downcasting will be + performed on the data. + + .. versionadded:: 0.19.0 Returns ------- @@ -74,10 +95,37 @@ def to_numeric(arg, errors='raise'): >>> import pandas as pd >>> s = pd.Series(['1.0', '2', -3]) >>> pd.to_numeric(s) + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float64 + >>> pd.to_numeric(s, downcast='float') + 0 1.0 + 1 2.0 + 2 -3.0 + dtype: float32 + >>> pd.to_numeric(s, downcast='signed') + 0 1 + 1 2 + 2 -3 + dtype: int8 >>> s = pd.Series(['apple', '1.0', '2', -3]) >>> pd.to_numeric(s, errors='ignore') + 0 apple + 1 1.0 + 2 2 + 3 -3 + dtype: object >>> pd.to_numeric(s, errors='coerce') + 0 NaN + 1 1.0 + 2 2.0 + 3 -3.0 + dtype: float64 """ + if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'): + raise ValueError('invalid downcasting method provided') + is_series = False is_index = False is_scalar = False @@ -102,20 +150,51 @@ def to_numeric(arg, errors='raise'): else: values = arg - if com.is_numeric_dtype(values): - pass - elif com.is_datetime_or_timedelta_dtype(values): - values = values.astype(np.int64) - else: - values = com._ensure_object(values) - coerce_numeric = False if errors in ('ignore', 'raise') else True + try: + if com.is_numeric_dtype(values): + pass + elif com.is_datetime_or_timedelta_dtype(values): + values = values.astype(np.int64) + else: + values = com._ensure_object(values) + coerce_numeric = False if errors in ('ignore', 'raise') else True - try: values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) - except: - if errors == 'raise': - raise + + except Exception: + if errors == 'raise': + raise + + # attempt downcast only if the data has been successfully converted + # to a numerical dtype and if a downcast method has been specified + if downcast is not None and com.is_numeric_dtype(values): + typecodes = None + + if downcast in ('integer', 'signed'): + typecodes = np.typecodes['Integer'] + elif downcast == 'unsigned' and np.min(values) > 0: + typecodes = np.typecodes['UnsignedInteger'] + elif downcast == 'float': + typecodes = np.typecodes['Float'] + + # pandas support goes only to np.float32, + # as float dtypes smaller than that are + # extremely rare and not well supported + float_32_char = np.dtype(np.float32).char + float_32_ind = typecodes.index(float_32_char) + typecodes = typecodes[float_32_ind:] + + if typecodes is not None: + # from smallest to largest + for dtype in typecodes: + if np.dtype(dtype).itemsize < values.dtype.itemsize: + values = com._possibly_downcast_to_dtype( + values, dtype) + + # successful conversion + if values.dtype == dtype: + break if is_series: return pd.Series(values, index=arg.index, name=arg.name) From 1edc1df161f3274218fcd19c23663ea63386f105 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 10 Jul 2016 17:16:00 -0400 Subject: [PATCH 11/44] CLN: remove radd workaround in ops.py Remove workaround added in #353. Author: sinhrks Closes #13606 from sinhrks/ops_radd_cln and squashes the following commits: d873aad [sinhrks] CLN: remove radd workaround --- pandas/core/ops.py | 36 +++---------- pandas/sparse/series.py | 3 +- pandas/tests/series/test_operators.py | 75 +++++++++++++++++++++++++-- 3 files changed, 80 insertions(+), 34 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index f27a83f50e115..34ab3ae6863b5 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -34,7 +34,7 @@ # methods -def _create_methods(arith_method, radd_func, comp_method, bool_method, +def _create_methods(arith_method, comp_method, bool_method, use_numexpr, special=False, default_axis='columns'): # creates actual methods based upon arithmetic, comp and bool method # constructors. @@ -55,14 +55,14 @@ def names(x): return "__%s__" % x else: names = lambda x: x - radd_func = radd_func or operator.add + # Inframe, all special methods have default_axis=None, flex methods have # default_axis set to the default (columns) # yapf: disable new_methods = dict( add=arith_method(operator.add, names('add'), op('+'), default_axis=default_axis), - radd=arith_method(radd_func, names('radd'), op('+'), + radd=arith_method(lambda x, y: y + x, names('radd'), op('+'), default_axis=default_axis), sub=arith_method(operator.sub, names('sub'), op('-'), default_axis=default_axis), @@ -149,7 +149,7 @@ def add_methods(cls, new_methods, force, select, exclude): # ---------------------------------------------------------------------- # Arithmetic -def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None, +def add_special_arithmetic_methods(cls, arith_method=None, comp_method=None, bool_method=None, use_numexpr=True, force=False, select=None, exclude=None): @@ -162,8 +162,6 @@ def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None, arith_method : function (optional) factory for special arithmetic methods, with op string: f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) - radd_func : function (optional) - Possible replacement for ``operator.add`` for compatibility comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) use_numexpr : bool, default True @@ -176,12 +174,11 @@ def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None, exclude : iterable of strings (optional) if passed, will not set functions with names in exclude """ - radd_func = radd_func or operator.add # in frame, special methods have default_axis = None, comp methods use # 'columns' - new_methods = _create_methods(arith_method, radd_func, comp_method, + new_methods = _create_methods(arith_method, comp_method, bool_method, use_numexpr, default_axis=None, special=True) @@ -218,7 +215,7 @@ def f(self, other): exclude=exclude) -def add_flex_arithmetic_methods(cls, flex_arith_method, radd_func=None, +def add_flex_arithmetic_methods(cls, flex_arith_method, flex_comp_method=None, flex_bool_method=None, use_numexpr=True, force=False, select=None, exclude=None): @@ -231,9 +228,6 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, radd_func=None, flex_arith_method : function factory for special arithmetic methods, with op string: f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs) - radd_func : function (optional) - Possible replacement for ``lambda x, y: operator.add(y, x)`` for - compatibility flex_comp_method : function, optional, factory for rich comparison - signature: f(op, name, str_rep) use_numexpr : bool, default True @@ -246,9 +240,8 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, radd_func=None, exclude : iterable of strings (optional) if passed, will not set functions with names in exclude """ - radd_func = radd_func or (lambda x, y: operator.add(y, x)) # in frame, default axis is 'columns', doesn't matter for series and panel - new_methods = _create_methods(flex_arith_method, radd_func, + new_methods = _create_methods(flex_arith_method, flex_comp_method, flex_bool_method, use_numexpr, default_axis='columns', special=False) @@ -858,17 +851,6 @@ def wrapper(self, other): return wrapper -def _radd_compat(left, right): - radd = lambda x, y: y + x - # GH #353, NumPy 1.5.1 workaround - try: - output = radd(left, right) - except TypeError: - raise - - return output - - _op_descriptions = {'add': {'op': '+', 'desc': 'Addition', 'reversed': False, @@ -963,11 +945,9 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): series_flex_funcs = dict(flex_arith_method=_flex_method_SERIES, - radd_func=_radd_compat, flex_comp_method=_comp_method_SERIES) series_special_funcs = dict(arith_method=_arith_method_SERIES, - radd_func=_radd_compat, comp_method=_comp_method_SERIES, bool_method=_bool_method_SERIES) @@ -1209,11 +1189,9 @@ def f(self, other): frame_flex_funcs = dict(flex_arith_method=_arith_method_FRAME, - radd_func=_radd_compat, flex_comp_method=_flex_comp_method_FRAME) frame_special_funcs = dict(arith_method=_arith_method_FRAME, - radd_func=_radd_compat, comp_method=_comp_method_FRAME, bool_method=_arith_method_FRAME) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 519068b97a010..5c7762c56ec6d 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -7,7 +7,6 @@ import numpy as np import warnings -import operator from pandas.compat.numpy import function as nv from pandas.core.common import isnull, _values_from_object, _maybe_match_name @@ -803,7 +802,7 @@ def from_coo(cls, A, dense_index=False): # overwrite basic arithmetic to use SparseSeries version # force methods to overwrite previous definitions. ops.add_special_arithmetic_methods(SparseSeries, _arith_method, - radd_func=operator.add, comp_method=None, + comp_method=None, bool_method=None, use_numexpr=False, force=True) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 1e23c87fdb4ca..6ab382beb7973 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -1259,8 +1259,6 @@ def _check_op(arr, op): _check_op(arr, operator.floordiv) def test_series_frame_radd_bug(self): - import operator - # GH 353 vals = Series(tm.rands_array(5, 10)) result = 'foo_' + vals @@ -1273,7 +1271,78 @@ def test_series_frame_radd_bug(self): tm.assert_frame_equal(result, expected) # really raise this time - self.assertRaises(TypeError, operator.add, datetime.now(), self.ts) + with tm.assertRaises(TypeError): + datetime.now() + self.ts + + with tm.assertRaises(TypeError): + self.ts + datetime.now() + + def test_series_radd_more(self): + data = [[1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), + pd.NaT], + ['x', 'y', 1]] + + for d in data: + for dtype in [None, object]: + s = Series(d, dtype=dtype) + with tm.assertRaises(TypeError): + 'foo_' + s + + for dtype in [None, object]: + res = 1 + pd.Series([1, 2, 3], dtype=dtype) + exp = pd.Series([2, 3, 4], dtype=dtype) + tm.assert_series_equal(res, exp) + res = pd.Series([1, 2, 3], dtype=dtype) + 1 + tm.assert_series_equal(res, exp) + + res = np.nan + pd.Series([1, 2, 3], dtype=dtype) + exp = pd.Series([np.nan, np.nan, np.nan], dtype=dtype) + tm.assert_series_equal(res, exp) + res = pd.Series([1, 2, 3], dtype=dtype) + np.nan + tm.assert_series_equal(res, exp) + + s = pd.Series([pd.Timedelta('1 days'), pd.Timedelta('2 days'), + pd.Timedelta('3 days')], dtype=dtype) + exp = pd.Series([pd.Timedelta('4 days'), pd.Timedelta('5 days'), + pd.Timedelta('6 days')]) + tm.assert_series_equal(pd.Timedelta('3 days') + s, exp) + tm.assert_series_equal(s + pd.Timedelta('3 days'), exp) + + s = pd.Series(['x', np.nan, 'x']) + tm.assert_series_equal('a' + s, pd.Series(['ax', np.nan, 'ax'])) + tm.assert_series_equal(s + 'a', pd.Series(['xa', np.nan, 'xa'])) + + def test_frame_radd_more(self): + data = [[1, 2, 3], + [1.1, 2.2, 3.3], + [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), + pd.NaT], + ['x', 'y', 1]] + + for d in data: + for dtype in [None, object]: + s = DataFrame(d, dtype=dtype) + with tm.assertRaises(TypeError): + 'foo_' + s + + for dtype in [None, object]: + res = 1 + pd.DataFrame([1, 2, 3], dtype=dtype) + exp = pd.DataFrame([2, 3, 4], dtype=dtype) + tm.assert_frame_equal(res, exp) + res = pd.DataFrame([1, 2, 3], dtype=dtype) + 1 + tm.assert_frame_equal(res, exp) + + res = np.nan + pd.DataFrame([1, 2, 3], dtype=dtype) + exp = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype) + tm.assert_frame_equal(res, exp) + res = pd.DataFrame([1, 2, 3], dtype=dtype) + np.nan + tm.assert_frame_equal(res, exp) + + df = pd.DataFrame(['x', np.nan, 'x']) + tm.assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax'])) + tm.assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa'])) def test_operators_frame(self): # rpow does not work with DataFrame From 2a96ab7bd9614be79f349975908b42c676a244ab Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 10 Jul 2016 17:21:11 -0400 Subject: [PATCH 12/44] DEPR: rename Timestamp.offset to .freq closes #12160 Author: sinhrks Closes #13593 from sinhrks/depr_timestamp_offset and squashes the following commits: c7749d5 [sinhrks] DEPR: rename Timestamp.offset to .freq --- doc/source/whatsnew/v0.19.0.txt | 32 +++--- pandas/io/packers.py | 11 +- .../0.18.1_x86_64_darwin_2.7.12.msgpack | Bin 0 -> 119258 bytes .../0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack | Bin 0 -> 119206 bytes .../0.18.1/0.18.1_x86_64_darwin_2.7.12.pickle | Bin 127220 -> 127584 bytes .../0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle | Bin 0 -> 125826 bytes .../io/tests/generate_legacy_storage_files.py | 12 ++- pandas/io/tests/test_packers.py | 28 +++-- pandas/io/tests/test_pickle.py | 6 ++ pandas/lib.pxd | 1 + pandas/src/inference.pyx | 5 +- pandas/src/period.pyx | 7 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/series/test_constructors.py | 4 +- pandas/tests/test_multilevel.py | 2 +- pandas/tseries/index.py | 7 +- pandas/tseries/tests/test_base.py | 35 ++++--- pandas/tseries/tests/test_timeseries.py | 60 +++++------ pandas/tseries/tests/test_tslib.py | 19 +++- pandas/tslib.pyx | 98 +++++++++--------- 20 files changed, 187 insertions(+), 142 deletions(-) create mode 100644 pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack create mode 100644 pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack create mode 100644 pandas/io/tests/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 40ae38f12fccb..a6c3c0c5d7f79 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -194,7 +194,7 @@ Other enhancements pd.to_numeric(s, downcast='unsigned') pd.to_numeric(s, downcast='integer') -- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see :ref:`documentation here ` (:issue:`10008`, :issue:`13156`) +- ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, the see :ref:`docs here ` (:issue:`10008`, :issue:`13156`) - ``.to_hdf/read_hdf()`` now accept path objects (e.g. ``pathlib.Path``, ``py.path.local``) for the file path (:issue:`11773`) .. ipython:: python @@ -202,7 +202,7 @@ Other enhancements idx = pd.Index(["a1a2", "b1", "c1"]) idx.str.extractall("[ab](?P\d)") -- ``Timestamp`` s can now accept positional and keyword parameters like :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) +- ``Timestamp`` can now accept positional and keyword parameters similar to :func:`datetime.datetime` (:issue:`10758`, :issue:`11630`) .. ipython:: python @@ -227,8 +227,7 @@ Other enhancements - Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) - The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) -- A ``union_categorical`` function has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) -- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) +- A top-level function :func:`union_categorical` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) - ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) .. _whatsnew_0190.api: @@ -238,9 +237,16 @@ API changes - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) +- ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) - An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`) - Calls to ``.sample()`` will respect the random seed set via ``numpy.random.seed(n)`` (:issue:`13161`) - ``Styler.apply`` is now more strict about the outputs your function must return. For ``axis=0`` or ``axis=1``, the output shape must be identical. For ``axis=None``, the output must be a DataFrame with identical columns and index labels. (:issue:`13222`) +- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) +- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) +- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) +- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) + .. _whatsnew_0190.api.tolist: @@ -361,7 +367,7 @@ We are able to preserve the join keys pd.merge(df1, df2, how='outer').dtypes Of course if you have missing values that are introduced, then the -resulting dtype will be upcast (unchanged from previous). +resulting dtype will be upcast, which is unchanged from previous. .. ipython:: python @@ -419,17 +425,6 @@ Furthermore: - Passing duplicated ``percentiles`` will now raise a ``ValueError``. - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) -.. _whatsnew_0190.api.other: - -Other API changes -^^^^^^^^^^^^^^^^^ - -- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`) -- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`) -- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`) -- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) -- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`) - .. _whatsnew_0190.deprecations: Deprecations @@ -439,6 +434,7 @@ Deprecations - ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) - ``as_recarray`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13373`) - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) +- ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`) .. _whatsnew_0190.performance: @@ -503,7 +499,7 @@ Bug Fixes - Bug in ``pd.read_csv()`` in which the ``nrows`` argument was not properly validated for both engines (:issue:`10476`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which infinities of mixed-case forms were not being interpreted properly (:issue:`13274`) - Bug in ``pd.read_csv()`` with ``engine='python'`` in which trailing ``NaN`` values were not being parsed (:issue:`13320`) -- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a tempfile.TemporaryFile on Windows with Python 3 (:issue:`13398`) +- Bug in ``pd.read_csv()`` with ``engine='python'`` when reading from a ``tempfile.TemporaryFile`` on Windows with Python 3 (:issue:`13398`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` kwarg from accepting single-byte unicode strings (:issue:`13219`) - Bug in ``pd.read_csv()`` that prevents ``usecols`` from being an empty set (:issue:`13402`) - Bug in ``pd.read_csv()`` with ``engine=='c'`` in which null ``quotechar`` was not accepted even though ``quoting`` was specified as ``None`` (:issue:`13411`) @@ -516,7 +512,7 @@ Bug Fixes - Bug in ``pd.to_datetime()`` when passing invalid datatypes (e.g. bool); will now respect the ``errors`` keyword (:issue:`13176`) -- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, `int16`` dtypes (:issue:`13451`) +- Bug in ``pd.to_datetime()`` which overflowed on ``int8``, and ``int16`` dtypes (:issue:`13451`) - Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 23aa133125213..ff06a5f212f8b 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -481,12 +481,12 @@ def encode(obj): tz = obj.tzinfo if tz is not None: tz = u(tz.zone) - offset = obj.offset - if offset is not None: - offset = u(offset.freqstr) + freq = obj.freq + if freq is not None: + freq = u(freq.freqstr) return {u'typ': u'timestamp', u'value': obj.value, - u'offset': offset, + u'freq': freq, u'tz': tz} if isinstance(obj, NaTType): return {u'typ': u'nat'} @@ -556,7 +556,8 @@ def decode(obj): if typ is None: return obj elif typ == u'timestamp': - return Timestamp(obj[u'value'], tz=obj[u'tz'], offset=obj[u'offset']) + freq = obj[u'freq'] if 'freq' in obj else obj[u'offset'] + return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq) elif typ == u'nat': return NaT elif typ == u'period': diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack b/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_2.7.12.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..978c2c5045314bbac06fd0965346f15cfb9db120 GIT binary patch literal 119258 zcmeEqWq965x}{3Y%<#owW@ct)2FH*%i6OSb%uEh*97C9ynVFfHnQ86V>7LuSd+y9! z*xhGWKXgj-C9YqLx)i zdJ`QI8dNp;!-T--9uYyylfO>xD^#F)SX9gS;K-oPu@&NyUj6o__0MsWhxI>D2ZEse z4=>~AWC+rFhBr*cKn9IZ#x=91y-r3rSI)BMld$fNUE`8*UY$nQlkvr^$2l$}VX3Ho zy^?VZsvf=|$A|fc2JMUs35y<@@|Yr5(qf_u75IG>zfYeeLDB_M0D@ByNkudju~fuU zkw`@{6{%EA(ahqi$+P`bOY+iFG?^F?6d4j8IP4!03r&>+OGP{tiBu$0kxE566`53I zQ;|!>6tR}n`6r{PM&75R@1m92^xCy)!W?X@a1r;mPVBma^(pE5t`dheu!uN#jLELrIOG z2#~6(dj4VUQamVmiY1ZB%~TgjF=bM3>bSp~FXi9F`ah@pTMMVyHA|{Bez)tdj$d~E z)t(<3@=MT^VG_gJb_fcH{(Y*SBTkM1NDM$@U?c`kx?`!b*X+_MI^;JqF9}U?ie+!g(k{4p}D3^D?^V5CF76p zZPzAat??h=q}1a#JMHMY=jHj)QK{1Z-hAw~ax zRT@9``}^<3GAZwqiqcPJySP$v0sghXwuv(fqRp|8k>A|A)HKpG4>X zW;gm3F2CIBSNjiclq%&Ps^34IBrp8NvHs!cd@}FTcvXU+V)@42NX8L+(&f^U@M41v zy^}HX{pv?FDMj4hw(~#pUaHLhzUusAh4sR3JL!K!*2(cnY$|W^Eno62fAXzB@~vQs zXiIAU+?4%u33<{lnw2ufA4%4KYk6Fnw72;gQ%O6Vs9$mQD?R_6Ey%y%6TkUf3UT?9 zOgQ0h6ZUU=nqPaHKgSfEtw^f(KTmTSH2YhpSd&DZem~^N;a9$5gO>k-q>$x#a(agzE3m$pgPv#9xd1MfCo>(){T`(SMw;llL3Hyy_PN{+EmX z0rQs&{Z>_f37u*gU@PioaOGFVf|v_Gd`NY^eyP zqMC|+shBAhbNqW&@ef;h;?Lah7u&D@*37UdB`y4o0snuw{o75}pRvndu>_@Fe{83O z{BNx7|G{#5@wamO>%clSbpP%?c1CiE{nN7hpFR;uG0DGFwcka?Xr6>6zsV7BGi~mVNAK)K7DEU+_ zf4*S}0pWo`|Nf*SZ=t^_$jK5PtmT&!Js`I>9h=&A7gd|9Y z6i9_MNQVr_ge=I09LR+{$Ool?{GhZ@Iw(Dq0m=wvf-*x{psY|fC_9t`$_eFyazlBb zyih(UKU4rJ2o-_~Lq(vXP%)@DR01jqm4ZqqCG@gTIjB5T0jda9f+|B*psG+cs5(>w zstMJCYD0CPx==l+KGXne2sMHlLrtKjP&24G)Br-)DP+p4S)thgP_6C z5NIeg3>prNfJQ>2pwZA6Xe=}i8V^l?VxWo8Bxo`;1)2&?gQi0>pqbDtXf`wlnhVW? zVxjp^925^FK#9--Xd$!+S`00LmO{&*<0p!?7R=ppn7dJH{*o+Ww4>Ev^AQQ+8vVg208^{iFfSe!~$PMy;YXbsu`f6x{LfItugfsu{1Hm9L7z_bJ!7wl!i~u9SC@>m~0b{{9 zFdj?*F<>H?1SW$iU@Djfrh^$^CYS|ggE?R>m|nw3&29K2rLFmz*4Xb zEC(yVO0Wv725Z1tunw#T8^A`e32X*iz*evgYzI5QPOuB?27AC>un+792f#sa2pk4S zz)^4v90w=BNpK3B24}!oa1NXY7r;eu30wwOz*TS!Tn9J6O>hg`26w<+a1Y!E55Pn4 z2s{Q)z*F!HJO?kpOYjQ325-Py@D98OAHYZO348`$z*q1Md<6cX)4}QC3~)v`6Py{& z0%wJ@!P(&)a85WEoEy#q=Y{jZ`QZX^LAVfH7%l=Ag^R((;Sz93xD;F(E(4c^%faR0 z3UEcZ5?mRs0#}8r!PVg!a80-tTpO+f*M;lB_2C9^L%0##7;XYLg`2_6;TCX9xE0(Q zZUg(nZQ%eo5DtQa;dXF)I0WthcZ55^p>P-+4oAS9;Yc_Nj)uFyUEywUcen@K6Yd4~ zhWo&M;eK#`cmO;Q9t01DhrmPOVeoKx1UwQR1&@Zuz+>TY@OXFv90N~;C&829DezQy z8ay4I0ndbI!L#8x@LYHv91G8fh=G`hh1iILxQK`NkTi%Nk`_saq(?F!8Iep#W+V%e z70HHVM{*!Jkz7b_BoC4o$%o`e3LpiMLP%kx2vQU&h7?CiASIDfNNJ=DQWhzPlt(Hc z6_H9vWuyvH6{&_)M`|E7ky=P?qz+OSsfW}@8XygkMo43%3DOj4hBQZ7AT5zrNNc1G z;*YdN0+2u?2nj~oA?=Y6qyy3s>4bzLVMsU4J1cx*^?>9!O857t$N) zgY-rEA^nj7$UtNeG8h?x3`K?^!;ullNMsZ;8X1F(MaCiHkqJl)G7*`COh%?4Q;})N zbYuoH6Pbm~M&=-Mk$FfgG9QUU;*kU-5m|sNL>3{7ktN7dWErv?S%IuXRw1jAHON|I z9kL$TfNVrIA)Apc$W~+_m1UyOBM}USuD#A31;=L=GW`kt4`a?iJktfJg zi$XDbW@*Vk+RQLf3qX>$k7>c6=N}?1> zqYTQT9Ll2tDxwl9qYA2`8mglPYN8fuqYmn#9_mBWpnhmtG##2A&46Y^GohK$ENE6V z8=4)>f#yVWp}EmKXkIiQnjbBI7DNl7h0!8tQM4FZ94&#CL`$Ki(K2XRv>aL>xedvDl0D2HTgdRqZphwYT=yCJ}dJ;W_ zo<`50XVG)$dGrE$5xs<7Mz5e((QD{+^agqpy@lRJ@1S?ld+2@i0s0Vqgg!=}pij|f z=yUW1`VxJGzDD1mZ_#(?d-MbP5&eXIM!%q6(QoK?^al!I0E00ELop1)F#;no3ZpRw zV=)fnF#!`X36n7eQ!x$GF#|I(3$rl?b1@I|VQDZwEG?D}OOIv1GGdvq%vcsIE0zt* zj^)5|V!5!~SRO1dmJiF16~GE&g|Na{5v(Xy3@eV6z)E7Ju+mr=tSnXzE00ycDq@we z%2*YwDpn1vj@7_wVzsc^SRJe`Ru8L>HNYBTjj+a86RauL3~P?Hz*=Ihu+~@`%pYru z1z>?#5EhKJ!`fpZSO=^l)(H#6!mw~G0_%)LVo_K$)&=W|b;G)2J+PiwFRVA#2kVRV z!}?DUZx zCN>M3jm^R4V)L+AY(5r;#bXIrBDMfqh%LevV@t55*fMN6wgOv;t-@AgYp}K0I&3|* z0o#ae!Zu@Du&vlOY&*6C+llSMc4K?6z1Ti%KXw2+h#kTXV@I%~*fH!lb^<$zox)CI zXRx!_IqW=k0lSD@!Y*T1u&dZL>^gP>yNTVxZew?_yVyPKKK1~6h&{p{V^6TB*fZ=o z_5yo}y~18&Z?L!6JM2C70sDx3!aiePu&>xR>^t@YgK&VuID(@%hT}MalQ@ObID@k| zhx53Ai@1c#xPq&=hU>V2o4AGBxP!a6hx_m}xF4PtPlu<+GvFEVOn7EI3!W9vhG)lf z;5qSJcy2roo)^!D=f?}+1@S_7VY~=l6fcGs$4lTP@ltqcybN9zFNc@ME8rFJN_b_w z3SJejhF8aH;5G4Dcx}86UKg*2*T);+4e>^JW4sC86mNz%$6Men@m6?iybbP;x5WeS zKs*Q!#@pfT@esTN-VyJFhvH#)I39s_#v}14JR0wUcg4Hm-SHlHPrMi28}Ebn#rxs? z@d5Zid=NeuAA%3XhvCEV5%@@a6h0asgOA0>;p6cMcnm%fpM+1wr{GiZY4~(}20jy? zh0n(4;B)bLcq~32kHh2f1UwO6fG@-s;fwJl_)>fsz8qhHuf$j3tMN7XT6`V89^Zg( z#5dua@h$jPd>g(U-+}MMcj3G7J@{UHAHE+yfFHyU;fL`f_)+{AejGo6pTtk$r|~oR zS^OM+9>0KJ#4q8O@hkXM{2G28zk%PxZ{fG`JNRAv9)2HxfIq|^;g9hr_*48D{v3aS zzrj`6NK2$6(i0hoj6^0PGm(YJN@OFl6FG>S zL@pvXk%!1j}DpqBK#4C`*(h$`cicibN%%GEs%7 zN>n4N6E%pML@lB=QHQ8Y)FbK>4Ty$BBcd_UglI}MBbpN}h?YbvqBYTm@F&_50Yo4X zL05{rn%#1djDv5Z(wtRPkrtBBRa8e%Q6j#y7@AT|=4 zh|RE^&{zPdp$V5|4<-#1rBv@r-y*ydYi@ zuZY*g8{#eTj(AUeAU+bGh|k0q;w$lu_)h#FAQF%;iI6CXkvK_^BuSAp$&f6`kvu7o zA}NtFsgNqEkveIRCTWp2>5wkzkv=jF=|`p|(~;@P3}i+!6PcOJLS`kik=e-{WKJ>{ znVZZ*<|Xry`N;xgL9!58m@GmTC5w^8$r5BqvJ_dGEJKzh%aP^D3S>pH5?Pt7LRKZK zk=4l>WKFUbS(~gw)+Ota^~nZgL$VRsm~28eC7Y4W$rfZwvK85yY(x5!ZOH&KkPIS& z$#!IWGKB0vb|gEIp=1~tPDYTO$w)Gaj3&E~UCC}_cd`fBlk7$ICi{?m$$n&iasWAy z97GN#hmb?bVdQXf1UZr%MUE!NkYmYl_4tbZnN8TqNkPpd6B!e~=IbD40Shl)@;SA}ErgD4Jp@mf|R$5-5?9 zD49|ymC`7kGANU>D4TL9m+~kdm4@=8(o*TD^i&2aBbABDOl6_6QrW2NR1PX9m5a(v z<)QLY`KbI<0jeNXh$>7Kp^8$)sNz%!sw7p4DovH4%2MU1@>B(?B2|g1OjV((Qq`#H zR1K;oRg0=k)uHNA^{Dz(1F9j_h-yqVp_)?7sOD4)swLHmYE8AF{HeB702N3DQNdI@ zsy!7#b)Y&@ov2VMj0&eBsLoU*6-7l;U8t^9H>x|;gX&53qIy$(sJ>J`sy{V=8b}SI z22(?*q0}&HI5mPANsXdLQ)8&H)HrH9HGztuCQ_5A$N0hOx=LN6u2VOto764pHg$)(OWmXHQxB+z)FbLK^@Ms#J)@peFQ}K)E9y1% zhI&iAqux^=sE^bq>NE9)`bvGHzEeLahz2xFBQ#25G)@yVNmDdUGc-$cG*1h(NK3R# zE3`^$v`!neNn5l{JG4uCw2w|h`_XCXbaZ+;1D%o1L}#Y6&{^qhbapxios-T*=ce<} zdFgy~e!2i%kS;_Qri;);>0)$ox&&R4E=8B7%g|-%a&&pR0$q`=L|3M(&{gSbbalE0 zU6Zaw*QV>xb?JI^eYyeNkZwdbrkl`B>1K3ux&_^mZbi4I+tB`WTRMOaq=V>Sx*gq~ z4xu~H9qCSVC>=(J(-CxMI+Bi}qvDs zx6#|_9rR9m7rmR_L+_>c(fjEG^g;R%eV9H%AEl4c$LSOFN%|Chnm$9HrO(ml=?nBl z`VxJazCvH6uhG}(8}v>37JZw(L*J$E(f8>G^h5d){g{42Kc%11&*>NROZpZ4ntnsS zrQgx-=@0Zr`V;+`{z8AHztP|6A2h@O24)ZjWiSS32!>=RhGrOsWjKas1V&^eMrIU7 zWi&=-48~+E#%3JGWjw~mq+$G+v`jiCJ(GdS$Yf$NGg+9dOg1JvlY`00LR zJ|;g?fGNlnVhS@wn4(NErZ`iADan*#N;74cvP?OqJX3+G$W&q~GgX+XOf{xDQ-i6= z)M9Egb(p$LJ*Ga>fN97yVj43|n5IlKra9AsY00!=S~G1Jf2J)HzyvZuOfb`qY0rc( z9hiCX&c1~P+~!ORe5 zC^L*1&WvD2GNYK$%ot`YGmaV0OkiS|iOeKsGBbsl%1mRXGc%Z(%q(U$Gl!YW%wuAi z`Ai%W&m=I3%mQX1vxr&DEMb;1%b4ZN3T7p6 zxy)Q)t}@q{>&y-2CUc9q&D>$`GWVGK%md~j^N4xOJYk+P&zR@T3+5&Bih0evVcs(D znD@*F<|Ffo`OJJ_zB1pK@5~PdVgU=Y2#c~9i?akvvJ^|R49l_{%d-M2vJxw^3ahdj ztFs1cvKDKz4(qZW>toZfer#Gc9h;ubz-DAKv6Y*n@zTb-@J)?{n3 zwb?psUA7)upKZW4WE-)K*(PjLwi(-;ZNau=Td}R#HmpC}mJMJ7*&sHUZO67}L)Z>% zN466i%7(GwYy{hxjbx+PXtoR6mF>oMXM3}YljJC+^Cj%O#ZG3-Qk5<8il!cJwUvD4WZ>`ZnRJDZ)u&SmGZvFv;{ zj*Vv%*hF>#yO3SPE@qdoOW9@Ya&`r~l3m5FX4kN5*>&uCb_2VS-NbHYx3F8;ZR~b- z2fLHq#qMVJuzT5k?0)tDdyqZE9%hfQN7-ZSarOjzl0C(qX3wx^*>mi9_5yp6y~JK- zudr9yYwUIQ278me#olJ`uy@&e?0xnD`;dLaK4zb=PuXYebM^)Ml6}R#X5X-H*>~)F z_5=Ho{ltD|zp!7~Z|ryW2Mck4gE@plIgG2;a+$cyTox`XmyOHL<=}F1xwzb19xgAJkIT;$ z;0khuxWZf!t|(WGE6$bRN^+&R(p(v?ELV;z&sE?ma+SEsTotY=SB&!)RQCu|Fh3m?7&x}y`f~%gf!rW&FgJu7$_?X& zb0fHs+$e4|H-;O_jpN316Sx>|A~%Vf%uV5@a?`l!+zf6eH;bFi&Ee*9^SD@UJ{QNu za|v7`w}4y7E#ek)OSq-nGHyAyf?LV0;#PBOxV79mZauew+sJL=Hgj9Jt=u+lJGX<| z$?f8Hb9=bG+&*qUcYr&{9pVmiN4TThG442bf;-8b;!bmCxU<|j?mTyayU1PQE^}A7 ztK2p2I(LJ+$=%{^b9cDA+&%6-_kerIJ>nj7Pq?StGwwO}f_urm;$CxaxVPLp?mhQ` z`^bIbK678VuiQ88JNJWwc)-Iv!lOLK<2=EWJjK&I!?Qfc^Sr=|yu{1A!mGT->%766 zyv5tR!@Io4`}j1xAD@;_$EW8r@EQ3`d}clipOw$XXXkV9Ir&_CZaxp6m(R!N=L_%! z`9geQz6f8GFUA+=OYkN6QhaH?3}2Qn$Cu|T@D=$=d}Y20UzM-MSLbW+HThb6ZN3g) zm#@dy=Ns@1`9^$Wz6sxyZ^k$0TktLUR(xx|4e!slk7!Vl$#@x%EM z{78NjKbjxIkLAbld#82j@@KgC|{B(W>Ka-!u&*tawbNP9EEI*%*l`8E7nejUG_-@tF=H}RYKE&Nt~8^4|3!SCdE z@w@pw{9b+^zn?$AALI}5hxsG?QT`ZzoIk;z@wfRq{9XPYf1iKAKja_rkNGG3Q~nwMoPWW;gE!9xNNU;ziMgn~jL zp|DUyC@K^aiVG!#l0qq=v`|JUE0hz;3l)TlLM5THP(`RJR1>NTHH4Z%EupqhN2n{* z6Y2{MgoZ*Rp|Q|JXeu-lnhPz2mO?9`wa`ZJ7upH|LZA>N1Pkqi_CkozLFg!S5<-PA zAzX+MIt!6Pln^a+5xNT9gziEQp{LMG=q>aS`U?Go{=xuZpfE@nEDRBb3d4lq!U$oc zFiIFLj1k5P5@JcQlvr9UBbF7*iRHx#Vnwl%SXrzhRu!v>)x{cOO|h0(TdX7273+!h z#Rg(Sv60wVY$7%ln~BZE7Gg`WmDpNrBl?SN#Q-r-3=)IIc4B)mMC>4T6g!EbVwe~% zMu?roNHI!`7Q2XD#cpDEv4_}G>?QUV`-pwTeqw)dfH+VbBn}pbh(pC;;&5?P2y&8i?~(XCT!#%SVAOJ!X#WGBvPUzT4E$t;v`-YBvFziSyCib(j;9nBvY~^TXG~< z@+6;>M)H%=O6jEZQU)oblu61gWs$N<*`(}J4k@RUOUfTQU$4^R7t8VRgtPn)uifD4XLJ7OR6o^k?KnIr20|= zsiD+JYAiL8no7;2=28o(rPNAlEwz#SrM6Ol6etBr!BRV^y%ZvKkUC18q);hL3YQ|J z&QhclB}GeJq^?posk_ue>M8Y-dP{wzzEVG_zcfG^C=HSZOGBig(lBYbG(s9Fjgm%7 zW2CXtIBC2zL5h(kN|U6?(iCZ`G)la@;>q?OVtX|=RQS}U!S)=L|tjnXD*v$RFpDs7XtOFN{U(k^MYv`5-2 z?UVLP2c(12A?dJmL^>)Rla5O#q?6Jq>9ll4IxC%%&Px}hi_#_OvUEkdDqWMVOE;vO z(kbVs@?-IMN152T0EBk8g9M0zSclb%a2q?ghw>9zDmdMmw?-b){(kJ2aUv-Cy! zDt(i_OFtw?1~M!oGAd&-E)y~-Q!*_xGAna3FAK6LOR_90vMOt`E*r8bTe2-XvMYPC zPfjEI$!XXSbicum7mGa? z$=~H4GNb?nRuBbMFa=i#g;XeoRv3jS+QgSPKl)OqlCBITYDX0`u3M)mFqDnEP zxKctXsgzPmD`k|jN;##xQbDPxR8lG{Rg|hqHKn>zL#e6MQfe!8l)6eirM}WYX{a<( z8Y@kdrb;uVxza*uskBmBD{T~irL7X61S&yFu+mOxuY@Qal#WU#B~%Gh!j%Z6vl6L9 zDbY$7rK{3S>8|updMdq?-bx>(uhLKHuMAKIDua~4$`EC!GE5n+j8H}@qmMbN13b4Q(~3*N}Lj}Bq)i>0%f7HNLj2b zQI;ypl;z3_Wu>x8S*@&5)++0i^~wfiqq0fatZY%XD%+Ip$_{0xvP;>m>{0eA`;`64 z0p*}_NI9$=QI0Cdl;g??<)m^-Ijx*g&MN1W^U4L~qH;;OtXxs9D%X_j$_?eFa!a|b z+)?f-_mum}1LdLeNO`P0QJyN#l;_F|<)!jUd9A!r-YV~u_sR$5qw-1ltb9?vD&Lgv z$`1unfeNdLimI53tAt9bluE0N%Bq~otAZ-3k}9i;s;Zi*tA=W-mTIex>Z+dVQ`4w^ zYFag&nqJMIW>hn&nbj<6RyCWNUCp89RCB4h)jVomHJ_SaEua=u3#ox-e zCTdf)nc7@!p|(_8sjbyEs=wM+4NwEsAT?NRr?yu^)DCJ#wUZjEhNR@$O^&tI$52fPF1I=)72U3Om&tzTb-lMRp+U(>U=d$jaL)YM0J6>P+g=hR+p$t z)n)2(b%nZ8U8Sy8*QjgNb?SO`gSt`Oq;6KXs9V)->UMR9x>Mby?pF7xd)0mFe)WKQ zP(7p`R*$Gh)nn>$^@Ms-J*A#j&!}hBbLx5Zf_hQCq+V99s8`i%>UH&odQ-in-d69Z zch!69ef5F*P<^C6R-dR()o1E+^@aLUeWkuu->7fZcj|lfgZfeZq<&Vvs9)7@>UZ^r z3TZ%tHAF)-Ov5!oBQ;8+HAZ7KPUAH}6E#VbHAPc3P17|)Gc`-IHAizbPxEPMG(Rn^ zmQG8rWzaHenY7GW7A>omP0Oz3&~j?IwA@-AEw7eO%dZvC3TlP4!delns8&oXu9eVA zYNfQ&S{bdZR!%FgRnRJGm9)xQ6|JgPO{=ce&}wS6wAxx7t*%y2tFJZC8fuNS##$4t zsn$$uuC>rwYOS=^S{uz@YpVrlfm)CjthLkHYav<(t)tdS3)RB3a4kaXtVL>3TC~#B9rx@$eOo?0)hx7J7NtM$|RYXh`_+8}MPHbfh$4bz5eBeap)C~dShMjNY*)5dEP zv>0uoHc6YTP0^-m)3oW@3~i=1OPj6D(dKINv{-Gv7N^B)30k7IKwGFS(iUq=w58fI zZMn8WTdA$mR%>gtwc0vuy|zKysBO|VYg@Ff+BR*wwnN*g?b3E@d$hgUK5f5tKs%@% z(hh4!w4>TF?YMSAJE@)0PHShhv)VcBymmpms9n-7Yge?Z+BNOEc0;?V-O_GrceK0O zJ?*~sKzpb?(jIG1w5QrL?YZ_sd#SzBUTbf(x7s`Hz4k%-sD08tYhSdl+BfaH_Cteo zpu;+%qdKPJI-!#~rPDg2vpT2qx}b}?q|3UZtGcG^x}lr8rQ5orySk_Q^fbDko>ot% zr`I#+8TCwhW<86ZRnMkp*K_DO^;~*xJ&&GO&!^|t3+M&)LV97nh+b4LrWe;s=q2@1 zdTG6kURE!sm)9%k74=GbWxa}CRj;O3*K6oC^;&vuy^da2ucz178|V%7MtWntiQZIi zrZ?AH=q>eDdTYIn?ytAi1N1;WNDtQA>FxCpy@TFS@1%$7VS2b8p?B6J^(Z}B@1l3r zyXoEa9(qr`m)=|NqxaSO>HYNq`ape3bbW?CQ=g^J*5~MR^?7=%K3|X16`T}`c{3LzFps;@6>ncyY)T#UVWdwUq7H9)DP*0 z^&|RG{g{4SKcS!0PwA)iGx}NmoPJ)vpkLH4>6i5@`c?g!eqFzz-_&pExAi;vUHzVZ zUw@!K)F0`O^(XpM{h9t;f1$tBU+J&)H~L%so&H|`pnudq>7Vs4`d9s%{$2l}Lk2Kl z12IqoGjM}2NP{wHgE3fxGk8NVL_;!ULorlCGjzi+Ov5s4!!caLGkiuG!_P=-q%+bR z8H|iZCL^KhG=hDIZ!vC+h6YBV#N z8!e2MMk}MW(Z=vM+8O~ypb=yQ8|{qtMu^eD=xB5@LX9vZ+=wtb8<9qo5p8rax*FY# z?nV!zr_sykZS*nv8vTs^#sFiWF~}Hf3^9fp!;Im^2xFu%${1~oF~%C>jPb?yG#uj6%vCY_S>@ap3yNun&9%HYu&)9DqFb*1rjKjte zCFsg zMl+L{+00^QHM5!7%^YSzVb<24+LEk=fX6Vm39Kna#}> zW=pe`+1hMl`kQUd05i}GGK0-_W_vTl>|k~@JDH(om>F(Hn4Qf?Gs=uMyO>?gZf1A0 zhuPEYW%f4vn0?KDW`A>lInW$r4mO9FL(O64aC3w?(i~-uHpiG_&2i>bPa=4Nw?xz*feZZ~(BJI!6@ZgY>h*W73BHxHNx%|qs4^N4xW zJZ2s@PnajoQ|4*&jCs~PXP!4Nm>11U=4JDWdDXmTUN>);H_cn-ZS#(K*Su%mHy@Y} z%}3^A^NIP?d}cm3Uzjh=SLSQ;jrrDmXTCQ-m>8$it1}meL z$;xbHv9em(tn5|}E2ovq%5CMb@>=<<{8j<0pjF5!Y!$JJTE(p5Rtc-5Rmv)Dm9ffN z<*f2n1*@V}$*OEsv8r0ttm;+`tEN@Us%_P=>RR=z`c?z0q1DK0Y&Ef(TFtEHRtu}8 z)yisZwXyuIwpM@@Xa!lpRy(V`6=HR;I$E8qP%F#|w<4_0R-_eWMO$60u2wgzyVb+$ zY4x&tTYap)RzIu1HNYBZ4YCGXL#(0JFl)Fq!WwCfvPN5Dtg+TOYrHkVim@hIldQ?s z6lVmDVb2 zwYA1tYpt`^TN|v6)+TGSwZ+#%jiI%*xW zj$0?Jlh!Hgv~|WhYn`*sTNkX0)+Ot*b;Y`BU9+xRH>{i1E$g;*$GU6Xv+i3DtcTVk z>#_C3dTKqho?9=hm)0xmwe`k&YrV7HTOX{C)+g(;^~L&XeY3t>KP<=wHf$p{YGXET z6E)3VedUk!gf!)w|nc{-QEtdJJ=oVPIjmrW{2Amc4s@%j+3W2M_C|Y?z1iMkZ?(7C+wC3pPJ5TV+umdEwfEWk?F05f`;dLuK4KrWkJ-oV z6ZT2_lzrMhW1qFp+2`#G_C@=Wec8TZU$w8<*X9K@hj#==bRBayxmPyiPtRzf-^|=oE4aJ4KwLPBEvrQ^G0flyXWtWt_53Ij6i+ z!Kvs}awI=rnQ~J58LXPBW*u)52-#v~pTI zZ5)56trOq`Izdjb)6Qw{gg70Xj!q{h)CqIKod~D16X`@b(M}hqtJBTt?(}eaI=!6U zP9LYQ)6ePe3~&ZIgPg(65ND_}%o*;Ca7H?#oYBr0XRI^M8ShMRVw{Q2BxkZS#hL0% zbEZ2poSDunXSOrPnd{7RVx9R;oD=UPIEl^zXQ8vmS?nxvmO9Iv<<1IcrL)Re?W}Ru zI_sSE&IV_rv&q@)Y;m?a+nnvr4riyc%h~PharQdM;mYq_=EI&NLJo?G8-;5Kv{xsBZ>Zd13J+uUv8wsc#$t=%@R zzuVRga0A^SH`s0Gws%9^4sJ)clN;)Wx#4bv+u4nDqugk>i`&)h=5}{`xINuoZg01b z+t=;q_IC%k1KmOHV0VZ+)E(vycSpD*-BIpncZ@sM9p{dBC%7^0M0b)q*`4A}b*H)0 z-5Ksoca}Tbo#W1R=ee=&d^gUGcN5%1cY(XmUF0rym$*yaW$to!g}c&S<*s(uxNF^Y z?s|8FyV2d`Zg#i0TitE$c6W!n)7|CncK5h@-F@zU_kerQJ>(vCkGMzOWA1VHgnQCG z<(_uWxM$sS?s@lud(pk*UUsjzSKVvwb@zsQ)4k>1cJH`%-FxnR_ksJ+edIoNpSVxm zXYOJ;{?j#Zx`a(>=p8JrM%K!8LzBY&MWU#@G5$h zyvkk`uc}wgtM1kCYI?Q2+Fl*6u2;{i?=|omdX2osUK6jW*UW3~weVVct-RJ=8_(Zs z>jij$UXT~;we#A0AzlZsqu0p`^}@VxFT(5WMS4+QwAaP!>UHzFdp*3KUN5h=*T?JY z_4E3B1H6IWAaAfY#2e}j^M-pPypi50Z?reY8|#hp#(NXI7;mCC$(!s=@uqsyyy@Ny zZ>BfPo9)f<=6dtISZ}@;=f!&oUZS_aTj(wF7JEy)rQR}cxwpbw>87DXU zduP0}-Z}5Qcfq^pUGgq_SG=pAmt^dvCnA-aGHT_rd$cf1vkMNN`%18Sc zAM4|Myif3nKFKHh6rbwTe7eu@nLf*B`y8L^^L##E8lRsptuLJ~y)T0=qc4*$voDJ; zt1p``yDx_?r!SW;w=a+Hf3bECLAP#ex2D&cwvD&Df77;Y+qP}nwr$(CZQC}_KIgx8 zWJVS;vXH%Ujb}uRQEj!>(gzuWj6tR#bC4y-8e|Ky2RVYAL9QTokSE9+ zp`dV3Bq$mb3yKFNf|5b0pmb0sC>xXu$_EvKib18Ia!@6x8dM9a2Q`A4L9L*6P$#Gx z)C=kd4T6S2qo8rnBxo8m3z`Qlf|fz6pmoqDXdAQ(+6NtijzOoObI>K|8gvV~2R(wG zL9d{9&?o2{^b7h21A>9UpkQz?Bp4bD3x)?Hf|0?fV017h7#oZW#s?FEiNT~`axf*B z8cYkO2Qz}1!K`3*FejKB%nRlR3xb8gqF`~bBv=|O3zi2ff|bFlV0Ex2SR1Sh)(0Dc zjlrg1bFd}Y8f*);2RnkD!LDF;uqW6X>2ZDpaq2O?EBsdxz3yudTf|J3i;B;^% zI2)V`&IcEQi@~Mfa&RTM8e9vm2RDM7!L8tSa3{DM+zajp4}yomqu_DyBzPJ;3!Vor zf|tRo;C1jOcpJP6-UlCokHM$lbMPhj8hi`B2S0+J!LQ(V@F(~i{3HGqp+smAMuZjN zM0gQFL==%kWD!L~712a=5ktfju|#YUN5mEJM0}AzBov86Vv$576@d^!3MI5K!U`w6 z2ocFda*;x$6sbgNkw&By=|p;wL1Yw}L}rmiWEI&&c9BEm6uCrhkw@ee`9ywEKok^( zL}5`x6cxopaZy5)6s1IIQAU&%RgL39+IL}$@ObQRr1chN)i6um@m(MR+Z z{X~B;KnxUv#9%Q*3>Cw~a4|xR6r;pwF-D9P%@AoL2MM8#AdNYY!%zYcCkb3 z6uZQ3u}AC``^0{6KpYf@#9?tn92LjJadASN6sN>#aYmdK=frt&L0lA<#AR_sTou>E zb#X)76t~1}aYx)0_r!hiKs*$W#AES9JQdHxbMZpF6tBc<@kYEA@5FoYL3|XS#AoqE zd==lsckx5~6u-o8@kjg>|Hyx3C>dIYkzr*x8D2(^5oIJ9Sw@jjWi%OG#*i^(EE!wI zk#S`_8DA!l31uRgSSFE4Wgvx=Qb{e1w9-j0Lu4|UT&9pIWh$9krjco7I+cQwvlaR zJK0`#kR4?w*;#gxU1c}fUG|VYWiQ!V_K|&MKiOXnkOSo)Iam&nL*+0zT#k?<Kk8oGg;x<&L={OzR#8+`6-`A~F;q+yOT|`kR9qEL#a9VbLX}7*R!LM+6)2&kQc5eM zta8e$5S2_NS1D9Vl}e>nX;fO3PNi2FR7RCaWmZ{KR+UX`R7F)uRaRA0RaH$@S2a{kRZG=YbyQtd zPt{iqR72HBHC9bjQ`Jm0S1nXa)k?KiZB$#;PPJDZR7cfGbyi(eSJh2*S3Oiu)l2nO zeNz4OTE&O;%IXR5eXaS2NU1 zHA~G_bJSclPt8{g)IzmLEmlj^QngGiS1Z&?wMwm4Yt&k`POVoP)JC;QZB|>XFPwiI+)IoJf9acxwQFTlmS0~g-bxNI9XVh7BPMud5)J1hkT~=4r zRdr2WS2xs6bxYk=chp^VPu*7!)I;@1JyuWDQ}s+eS1;5{^-8@~Z`51$PQ6zj)JOG6 zeO6!8SM^POS3lHG^-KL$f7D;~kN#JO(xG)29ae|a;dKNZQAg5|brc;{N7K=D3>{O) z(y?_M9aqQG@pS^7P$$xfbrPLa2U=*UmDbv5tDW{bL?_e9bqbwQr_!l)8l6_B)9G~v zol$4fnROPORcF)Lbq<|V=hC@#9-UX`)A@A)T~HU&g>?~KR2S35bqQTkm(rzm8C_PF z)8%yqT~Sxkm30+eRaev1bq!rp*V46h9bH$~)Ae-&-B35ujdc^_fbr0QB_tL#}AKh2?)BW`TJx~wQgY^(SR1ed`^$0yu zkJ6*{7(G^x)8q97JyB26ll2rmRZr8?^$a~z&(gE?96eXh)ARKLy-+XGi}ez{R4>!Z z^$NXGuhOga8ogGp)9dvHy-{z{oAnmGRd3VV^$xvL@6x;V9=%uZ)BE)SeNZ3LhxHMC zR3FpF^$C4apVFuG8GTlt)93XCeNkW1m-Q8WRbSKB^$mSf-_p1B9er2d)A#iQ{ZK#B zkM$G%R6o{M{ZW6?pY<30Re#gp^$-11|I)wpAN^PVWBxUv zOlT9vgf-zzcoV@yG?7eX6U9U|(M)s`!^AYPOl%X!#5M6ue3QT=G>J@Nlf)!7fe}U; zWwbHI8fUx-G09AFlftAlsZ45<#-ugrOnQ^SWHgyfW|PHaHQ7vdlf&dRxlC@8$K*Bn zOny_q6f}iQVN=8uHN{MEQ^J%qrA%p4#*{VXOnFnmR5X=LWmCmeHPuXYQ^V9WwM=bO z$J90TOnuY9G&GG&W7EVmHO)+O)55egtxRjv#&^uY%-hNrm!h(Dx2D-v1x5O zo8D%y8Eqz;*=Dg>Z8n?T=CC zv1M&JTi#Z%6>TM3*;cVtZ8cln*042gEnC~xv2|@dTi-UY4Q(Ua*fz0EZ8O{4wy-U2 zE8E((v2ATT+unAt9c?Gu*>EIZrIv2*P_JKrv_3+*Di*e160?KZpJ?yx)UF1y?Av3u=4yWbwL2kjwy z*dDP*?J;}Yp0FqFDSO(Uv1jc$d){8K7wsi`*KL08BXc12uKSIiZ6C0t2Y%9VCyTv=Dnm3I|fMOVpHc2!(eSIt#-HC#JU*UU9{EnG|2%C&ZFTwB-9wRas{N7u=9c3oUo*Ufcz zJzP)M%k_4BTwmAE^>+i@KsU$@c0=4yH_Q!pBiu+g%8hno+*mixjdv5=L^sJzc2nF` zH_c6VGu%u!%guIk+*~)$&36mjLbu2*c1zq+x6CbfE8I%A%B^;5+*-HJt#=#TMz_gr zc3a$5x6N&LJKRpU%k6f1++MfO?RN*_sl(aFWgJ_%Dr}P+*|j~ zy>}nnNB7Bnc3<3A_sxBGKip6E%l&qL++X*P|JR4|p?w%1)`#=qeFPuTNAi(<6d%<` z^U-|_AJfP3v3(pL*T?hmeFC4*C-RAX5}(uuUU=!1*WP&Ro%cS(C-cdD3ZK%a@~M3q zpVp`I>3s&D(P#3ReHNe9XY<*84xiKK^0|E;pV#N}`F#Oj&=>NBeGy;O7xTq^318Be z@}+$lU)GoN<$VQT(O2@7eHCBTSM$|<4PVpO^0j>(U)R_3^?d{1&^PjpeG}i*H}lPX z3*XYW@~wRv-`2PD?R^K|(RcEleHY)=ck|tS58u=G^1Xc@-`Dr^{rvzx&=2y1{SZIY z5A(zQ2tU$~@}vD2Kh}@)_FZ0X&3cu2?@~izCzt*qw>-`45(Qopb{T9E~Z}Z#z4!_gy^1J;Wzt`{c`~3lb z&>!-L{SklEAM?lk34hX`@~8b7f7YM#=lunL(O>eH{S|-JU-Q@f4S&<$^0)mRf7jph z_x%I^&_D8z{S*JxKl9K13;)u;@~{0H|JJ|r@BIh=(SP!v{TKh$fAin{5C7Bu^1uBb z|JVN$@^46}kkBDvLc)fG3ke?*AtYi*q>#uVQ9`1IL<@-?LTBLVkQo2>!q25`J2YzC zp>d-F|1(SQfAz}I@mKIaWw1@#c4+>e-I)LVvt|D&r~BV`_Z58j&matqeg7}bsmK5M zH(S5|6x99C`)|QxDDMAfdFubyHJ(WPJ~BqZs2B~SV+{OHGxGm=|5+9KKQA`M!MGR? z85)v!9&z?xVKYhxX(i}kQRHo%712peM)Y>LgWIkv!-*a}-?8*Gd1uswFbj@Su1 zV;Ag-|E&V<&fNohVlV8CeXuX~!~Qq`2jUa4Js2={N&t;w+qvb8s%s!}+)X7vdsZj7xASF2m)x0$1WHT#ajREw01$xB)lf zCftl$a4T-Z?YIMX;x62cdvGuA!~J*w58@#_j7RV&9>e2!0#D*8JdJ1YES|&jcmXfs zCA^GR@G4%z>v#ii;w`+5cknLW!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7LExyC|_yIrS zC;W_G@GE}9@Aw0M;xGJ-fABB7|{8{=SHjEC_t0Vc#mm>82_QVdX_M1>j+T6E|!1e0NMOo1se6{f~Cm=@Dvddz?s zF%xFSESMFuVRp=cIWZUJ#ypr8^I?80fCaG-7RDl26pLYTEP*Al6qd#^SQg7+d8~jH zu@Y9sDp(b(VRfv5HL(`f#yVIR>tTItfDN$`HpV8{6q{jlY=JGY6}HAU*cRJid+dN6 zu@iR2F4z^jVR!6-J+T+|#y;2=`(b|^fCF(54#puk6o=t(9DyTo6pqF*I2Om@c$|O} zaS~3(DL56U;dGpVGjSHq#yL0_=iz)@fD3UEF2*Ie6qn(0T!AZb6|TlLxE9ypdfb2; zaT9LFEw~l8;db1CJ8>88#yz+f_u+m#fCupq9>ybh6p!I?Jb@?i6rRR2coxs$dAxuZ z@e*FfD|i*J;dQ)$H}MwU#yfZy@8NxXfDiEzKE@~b6rbU9e1R|V6~4wd_!i&cd;EYO z@e_W=FZdO|;dlIjKk*m-#y|KM|GQ%FZz%E~L*xJ7v$)at-{=?vV`40fjd3t8#>4oS z025*&OpHk|DF!G|qC$-ZEjsiVg2^yBrofb#3R7bmOpEC-J!Zg+mVx%J$As3*acz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6 zowy5k;~w0L`*1%Vz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bW zn|KRv;~l(<_wYVGz=!wRk0dY#~N4@ zYhi7ygLSbU*2f0e5F24*Y=TX(88*ij*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=d zdtq{5Fg=Ve1cE$89v7s_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6; zf8lTZgMaZq%XI(e{fnV6G={;j7!Jc@1dNE0FfvBLs2B~SV+@Rmu`o8q!MGR?<6{C$ zh>0*UCc&f_pg@TVH5#<&&|?TD!{nF(Q(`JijcG6~ro;4@0W)GI%#2wuD`vy&m;-ZS zF3gR2FfZoA{8#`BVj(PyMX)Fq!{S&1OJXT3jb*Sbmc#N`0V`r9tc+E#DptelSOaTf zEv$`ourAia`q%&)Vk2yfO|U68!{*omTVgA0jcu?kw!`+=0Xt$R?2KKoD|W-~*aLfF zFYJwdurKz*{x|>!;vgK1LvSb#!{ImrN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+m zES!yVa4ycn`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|M zF5HcKa4+t|{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?Z zExe6)@GjoN`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39 zFZ_*v@Gt&nm-c`1A46ei41-}Y9EQgT7!f03WQ>AQF&ak47#I^{VQh?paWNjo#{`%V z6JcUZf=Mwzff5yJG-%PG#}G`0$uR|{#8j9X(_mUmhv_i`X2eXG8M9zk%!b)92j;|F zm>ct8Ud)I2u>cmtLRc7!U{NfF#jymI#8Oxq%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{ zSR3nLU95-ou>m&3M%WmeU{h>{&9Mcx#8%iE+hAL4hwZTgcEnED8M|Ot?1tU32lm8X z*cY>oQBhJ2F}D; zI2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjO zxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug z_#6M=U;NK1{r}`YhQiPo2E$@F437~oB1Xc<7zLwZG>nchFeb*r*cb=nVmyqG2{0ih z!o-*alVX4ZB`VZt(4s?+A(#x4V+u@(sW3IB!L*nT(_;qAh?y`mX2GnO4YOko%!#=$ zH|D{-m=E(~0W64xurL7)R4Xa}ftckU- zHrBzqSP$!C18j(murW5lrq~RdV+(AFt*|w=!M4~A+hYgph@G%AcEPUL4ZC9x?1{aw zH}=84*bn>T033*ea4-(Rp*ReO;|Lsyqi{5i!Lc|F$KwQ?h?8(KPQj@-4X5J_oQbn= zHqODhI1lIJ0$hlTa4{~yrML{2;|g4ft8g{0!L_&!*W(7kM!LxV{&*KHWh?np(Ucsw)4X@)3yotB) zHr~Ozcn|O61AK^&@G(BYr}zw?;|qL=ukba#!MFGh-{S}Th@bE?e!;K!4Zq_L{E5Ht zH~zuD_)mE9A46ei41-}Y9EQgT7!f03WQ>AQF&ak47#I^{VQh?paWNjo#{`%V6JcUZ zf=Mwzff5yJG-%PG#}G`0$uR|{#8j9X(_mUmhv_i`X2eXG8M9zk%!b)92j;|Fm>ct8 zUd)I2u>cmtLRc7!U{NfF#jymI#8Oxq%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{SR3nL zU95-ou>m&3M%WmeU{h>{&9Mcx#8%iE+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cY>oQBhJ2F}D;I2-5S zT%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_zZp2Nv8Mok8+=kn62kyjOxEuH2 zUfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$Ne#B4s8Nc9H{D$B02mZug_#6M= zU;HNm`H!J6G={;j7!Jc@1dNE0FfvBLs2B~SV+@Rmu`o8q!MGR?<6{C$h>0*UCc&f_ zpg@TVH5#<&&|?TD!{nF(Q(`JijcG6~ro;4@0W)GI%#2wuD`vy&m;-ZSF3gR2FfZoA z{8#`BVj(PyMX)Fq!{S&1OJXT3jb*Sbmc#N`0V`r9tc+E#DptelSOaTfEv$`ourAia z`q%&)Vk2yfO|U68!{*omTVgA0jcu?kw!`+=0Xt$R?2KKoD|W-~*aLfFFYJwdurKz* z{x|>!;vgK1LvSb#!{ImrN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn z`M3ZV;v!s(OK>SJ!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t| z{dfQm;vqbYNAM^f!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN z`}hDK;v;;FPw*)|!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FZ_*v@Gt%o zk^IL{7#hQ1SPX~ZF#<-!NEjKTU{s8T(J=JeU{rVSX%t z1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4<5?014SQV>bb*zCku@=_GI#?I$VSQ|X z4Y3h6#wOSln_+Wofi1BWw#GKt7TaNa?0_Ay6L!Wf*cH2BckF>Zu^0BnKG+xgVSgNe z191=z#vwQqhv9G>fg^Dgj>a)K7RTXuoPZN?5>Cb`I2EVibew@RaTd4oS025*&OpHk|DF!G|qC$-Z zEjsiVg2^yBrofb#3R7bmOpEC-J!Zg+mVx%J$As3*acz=gO7 z7vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%Vz=L=Q z591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVGz=!w< zALA2ziqG&lzQC9G3SZ+Je2ee!J$}HC_z6Gb7yOFf@H_s%pZE)Z;~)Hs|3oJLF%*Wz zFc=oYVR(#y5it@*#wZvSqhWN6fiW=_#>O}p7vo`kOn?b75hlhYm=psPC{dwCgBBfn z48dfW98+LQOogd24W`9(m>x4=M$CknF$-qJY?vK$U{1`1xiJss#eA3_3t&MkgoUvP z7R6#%97|wHEQO`943@=mSRN~2MXZFCu?kkjYFHg>U`?!rwXqJ?#d=sD8(>3hgpIKY zHpOPx99v*ZY=y0{4YtL0*d9AzN9=^1u?u#^ZrB}rU{CCYy|EAW#eUcy2jD;)goAMi z4#irsL98cg$JcXz644%bvcpfj{MZAQU@d{qWYj_=R;7z=RxA6|%#d~-kAK*iLgpctF zKE-GF9ADr|e1)&^4Zg*9_#QvtNBo4J@e6*%Z}=U5;7|O8zwr8GV;BsJ z;V?W#z=#+LBV!bdiqSAS#=w{u3u9v(jEnIwJ|@6~mta2uj}5RPHp0f(1e;q9kCAPxW*aq8TJ8X{~up@TD&e#RJVmIuLJ+LSC!rs^i`(i)rj{|TZ4#L4W1c%}< z9F8M!B#y$-I0nb!I2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6 zT#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk| zJdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwF ze2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61!r%A@|KdMU$$t!mp)m}G#c&uN zBVa^~gpn}{M#X3t9b;fjjD@i=4#vfJ7#|a0LQI5-F$pHc00l}^sL`NBhaN*P879XR zm=aTAYD|M^F&(DI444r!VP?#NSuq=C#~hdwb75}GgLyF@=Enk95DQ^pEP_R`7#7D8 zSQ1NNX)J?fu^g7i3Rn>Rk0dY#~N4@Yhi7ygLSbU*2f0e5F24*Y=TX(88*ij z*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=ddtq{5Fg=Ve1cE$89v7s z_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgMaa#XyiYJ!q6B7!(uoLj}b5; zM#9J#1*2j#jE*rdCdR_p7zg8GJdBSCFd-(w#Fzw=Vt@iAD%5DuqC<}%m<*F+3QUQq zFg2#Zw3rUlV+PEKnJ_bE!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`F zur!vzvRDqwV+E{;m9R2a!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4 zur;>9w%88aV+ZVrov<@@!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{ za5Rp=u{aLL;{=?DlW;Ol!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+ za5b*MwYUz~;|AP_n{YF3!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X z@HC#mvv>~A;|08km+&%P!K-);uj388iMQ}J-od+g5AWjxe29b0(F&@Up1eg#LVPZ^zNijfy5*2DRXwjj^5KM;2F$Jc?RG1pm zU|LLv=`jOl#7vkOvtU-thS@O(=EPi>8}ndZ%!m2002ahTSQv|7Q7neVu>_XHQdkv02a#7(#vx8PRXhTCxm?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB z;8{F}=kWqw#7lS?ui#a@hS%{1-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy= z;9Go$@9_hE#83Dczu;H=hTriA{={GS8~@;6{3izakD)L$hQY8H4#Q&vjEIpiGDg9u z7!9Li42+4fFgC`)xEK%PV**Twi7+uH!K4_VK#2-98no!pV+bb0SI818ZU}tc`WBF4n{P*Z>=1BW#RKuqigf=GX#TVk>NoZLlr2 z!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~D z!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ z!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U? z!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`Fa8sg{Krrj8pB{%42R({0!GA07#X8rRE&nv zF$TuOSQs1QU|fuc@i74=#6*}FlVDN|P@qJG8Vy=>=rIJ7VRB4?DKQnM#x$4~(_wnd zfEh6pX2vX-6|-S>%z-&E7v{!1m>2V5ek_0mu@Dxmq=6{}%&tbsML7S_f(SQqPIeQbaYu@N@LCfF34VRLMOEwL50#x~d%+hKd` zfE}?DcE&E)6}w?~?14S87xu^NPR1!X6{q2JoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB#x=MW*Wr5H zfE#fWZpJOR6}RDb+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X#xr;p&*6Ez zfEV!+UdAhU6|doSyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(=#y9vD-{E`w zfFJP_e#S5O6~Ezk{DD957yiaS_!s|)MgC(b42@whEQZ7I7y%<;O(V-YNh#jrS*z>-)BOJf-us$}xhS&%jV-swO&9FJPz?RqwTVoq+i|w#IcEFC< z2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do`z>zo#N8=bAi{o%SPQZyc z2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfaz?HZPSK}I7i|cSbZorMW z2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86wz>|0iPvaRpi|6n>UcifZ z2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|Ez?b+6U*j8mi|_C~e!!3T z2|wc({EFZ3JO03*_zQpIAN-5|#3uhS6o$qy7#71}c#MD%F%m|`C>Rx^VRVdvF)qLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?>d9xGr)tb~=Z z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($XTVP9Ug{`p-w#9bX9y?%1?1Y`M z3wFhB*d2RdPwa)gu@Cmee%K!e;6NONgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c z3QomoI2~u;Oq_+YaSqPKc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP z3vR`2xE*)kPTYmNaS!greYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW z3SPx)cpY!xO}vG-@eba_dw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ z3x36K_#J=XPyB_y@elsRf8vn;7z#sU7z~TyFg!-Uh!_bYV-$>v(J(s3z?c{dV`ChQ zi}5f%CcuQ42oqxxOo{;tl&DanL5mJOhF~&GjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9 zHq4GWFem21+?WURVm{1|1+X9%!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu$M zHLQ*`uqM{R+E@qcVm+*n4X`0L!p7JHn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{ zH|&l*uqXDy-q;8GVn6JU18^V?!ofHMhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKT zG@Onza3;>e**FL1;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3 zHr$Roa3}7<-M9z$;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QG zHN1{D@Fw2E+js}>;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb> zH~fx2@F)Jl-}ndr;y-c8e+-48F${*qa2OsVU_^|BkueHJ#b_8EV_-~-g|RUX#>IFT z9}{3gOoWLs2`0q=1xi$?(V#_#9z!r0CdU+*5>sJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv# z8dwu+VQs8~b+I1S#|GFC8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B z9@rCmVQ=h%eX$?*#{oDH2jO5GfxDhwuX54~XaT{*O z9k>&B;cnc6dvPD`#{+l}58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o z8+a3M;cdKwckv$H#|QWjAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`} zANUi0;cxtdfAODq~8VmJ(s5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNqjE@O0 zAtu7am;{qzfC42d)M(J6LysYt43lFDOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!T zxiB~8!MvCc^J4)lh=s5)7Qv!e42xq4EQzJCG?u}#SPsi$1+0jburgM`s#p!HV-2i{ zwXinU!Ma!v>th3Kh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_! zy|6d-!M@lJ`{Mu{h=Xu24#A-~42R*ZsI1b0-1e}PIa57H8sW=U%;|!dM zvv4-f!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3 zyKpz|!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;ut zx9~RJ!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86a zzwkHy!N2%VeDWVdVQ374VKE$r#|Rh^BVlBWf>ALVM#mT!6Jud)jDvA89>&K6m=F_T zVoZWbF+hP5|5x*N9dTfwkP?vY?ha{0>F(|ZX=zZ9l5Pa4+d(6Mo9i_&LAem;8!f^BaE4@Ay4`;E%k; zpZGI>;jjFSzw-|UX^sDk7&8GAG7%Fq36nAzlQRWVG8OM&YTn7acsJAV9^T9Qn3n07 zo*9^tnV6Ybn3eZ48?!S9b21knU~cANUOvct%+CTW$U-d4A}q>cEY1=v$xXLAncavtaN2|mdMT*#-mh>N*|OSz0sb2(RVC0B7Z z*KjS@aXmM1BR6p~pWzlh%dOnT=eV6axRcNG1-{5#e2Fh}H}`Na_wg0(=c_!x*LaYx z^AHd72#@j@kMjgi@)S?=4Bz0JJj-)@i*NHhFYq0{%Zt3k%e=y?yvFOi!T0z+Kj4SF z$&dIkKjEkRjGyxhe#x)+HNWAv{EpxA2mZ)g{E0vF7yioM_&fh#kk0teh%pl|Armn% zlQ1chF*#E(B~$SZrskczi+3{(@8P|?k7=2X>6w8UnTeU1g;{w&voSk!Feh{I0p?~N z=H-LT$NVh7f-Jay;iGKHR&32SY|D0R&kpR!PVCGs?89LixF&Ji5RQ5?-N9LsSW&&N1{6FG^KIfYX>jgNCWXK*HGaW?00 zF6VJRpWu^Rz=eE@i@2CexRlHIG?#M)S8^3sa}C#W9oKUMH*ym<^BHd8v)syUe2&|> zgFE>=U*L<}#h3UpcXJQ-avxvee!j{Be2oYBIuG$MkMJmu@iC^8(-DyS&Ityv!@S%4@vN8+?!N^8^yf8nqEjlc5`2I-Cej2JTk6EYDKGYOM28Iv;wQ!*9rU~1mUyLdO#@E+dF z`h8VP1%gi`7m4X5kAV6 zY{k}W!?tY4_Uyop?8MIO!mjMb?(D&y?8V;f!@lgt{v5!89K^vK!l4|-;T*w{9L3Qb z!?7I4@qCOEIFXY$nNv8G)A%^2a|UN}7H4w~=W-tB^9er51zgCdxQL6ngiE=MPjfj} za3xo9HP>)0*Ks{Ja3eQyGoRrWKFh7##^<=5JGhh2^98=hU3`fzb2s;JFZb~k?&qsK zz}I+?uk#QO^9Yaf7?1M=Px2H`^9@>Ne2Z`MJTLGazRQce#LK+GtGveRyutVQ zK0n}xyvdLFF+bs_{EVOT3x3J3_%*-bxBQOZ^9TOOTl|SX^B4Zg-}pQKV35K1&xkP- zFd-8$F_SPUlQB6{FeOv*4yNXvyo+};4e#N-ypL&_j_H|!8JUThnT1(-KeI7Ab1)}! z@d4&$9_HnP%*XsJz=ABq!Ysm~EXLw2!ICV+(k#QWEXVS!z>2KI%B;ewtj6lB!J4ea z+N{I6tjGF%hz;0~jo6q?*p$uKoDZ`FAK{~H$yRL5Hf+mwY|jqt$WH9cF6_!~?9LwS z$zJTuKJ3eW?9Txl$Uz*;AsotK9L^CO$x$55F&xWr9M8u%ffG52lR1S`IgO8VI%jYu zXK^;?a4zR@KA+%|T)>5Vii^0IOSqKF_%xSu1y^zvS91;5avj%m12=LLH}e^8;j`Sz zZG4W~xr00TJYV38+{KspGIw(i_i`U!;eNi#1AL7K`8p5rFpuykkMTH9@FY+1G|%u2 zzR9yZ$G7-4&+`J`;k&%ZOT5f0yvl35&KrD>@ACtG$ea9#AM+D_%Fp;Yzu=erieK{^ ze#`IpJ%8Yjyv3jRGk@W){Eff!4+a^H|BM(j0TVJ26Eg{uG8vOI1yeE=?_g@)$-8(r z)9@bN%lnv?>6o4wn30*7nOT^X_cI%_GY4}r7aw45=3!nw$b8Jt0xZZvEX*P-%3>_e z5-iD5EX^`3%W^Ew3arRVtjsE`%4)368m!4$tj#*C%X+NOhuDA(*@%tVgiYCu&G|4} z@DV=BmTbk=Y{Rx}$M)>Nj_kzF?82_>#_sIFp6tcm?8Cn7$Nn6^fgHra9KxX-#^D^n zksQU*9K*33$MJlO6F8BRIGIy8mDBh*r*j5pau#QE4(D%77D_&z`2hrG#;_%T1>r~Hhc^9z2-ulO~;;kW#b-}49l$Xoo0Kl2y<%HQ}q|6q{G z_|J$j6EGnYF)@=cDU&fdQ!ph{@eZcuoxF>8GY#+Iy}XZUnU3k1ff<>JnVE%Ic|Wr; zJ9986bMXP@W*+9{gUrYLEWm;+#KJ7XqAbSZEWwg2#nLRpvMk5)tiXz_#LBF~s;tK9 ztihVB#oDaHx~#|ge25L$kd4@wP1uyp*qjfu1s~y~Y{^z^%{FYyc5KfM?8r{+%r5N8 zZtTt;?8#p2%|7hQe(cWy9LPZ&%pn}gVI0m89LZ4}%`qIyaU9RbIDr#6iIX{nQ#p-~ zb2?{mCTDRr=Ws6PaXz2mlU%@se2R;>m`k{n%lI^xa|Ks&6<2c&*K!@#a|1VW6F2i2 zZsD`s%58j(+qr`~`8;3Xi`>PR_%e5M5BG8(U*Uef$^(3j2l+Y=@i33@D39?tPw*s9 z@ifoy4Zg{e(1&g{aj?8ffw!Jh2J-t5D^?8p8bz=0gZ!5qS& z9LC`s!I2!r(Hz6E9LMo|j1xGKlQ@}EIF-}*IHz+4XL1&2a}MWn9_RB3KFI}K$fvl7 zi@AhLxr|S9IahEcS8+Aha4pwyJvVS8H*qtc;TAs2t=z`vxScz=lh5-7zQ|pCi7#_E z_i!)w@fGgpt31Hhc#yC25D)VRkMbCg^8`=w6i@RE-{6}(%X55-Z}U7a@EyL(i@e0k zyuz!z#_PPn_xL_P;D@})kN7b^;ivqJpYscT$*=e|zu~w1j^FbK{>WSWi9hof{>tC@ zJO5yi#rV&NF%vK$6EQK9Fe#HUIa4qtQ}GU_=AFEYcQXy|;k~?%X_=1cnSmLZiJ6&& zS$RLRF*|cGCv))u=4Kw|<%7(}{4BtNEX2Yr!lEq3;w-_EEXC3+!?G;L@~ps$ti;N! z!m6ys>a4+d(6Mo9i z_&LAem;8!f^BaE4@Ay4`;E%k;pZGI>;jjFSzw-|US&jdU7&8GAG7%Fq36nAzlQRWV zG8OM&YTn7acsJAV9^T9Qn3n07o*9^tnV6Ybn3eZ48?!S9b21knU~cANUOvct%+CTW z$U-d4A}q>cEY1=v$xXLAncavtaN2|mdM zT*#-mh>N*|OSz0sb2(RVC0B7Z*KjS@aXmM1BR6p~pWzlh%dOnT=eV6axRcNG1-{5# ze2Fh}H}`Na_wg0(=c_!x*LaYx^AHd72#@j@kMjgi@)S?=4Bz0JJj-)@i*NHhFYq0{ z%Zt3k%e=y?yvFOi!T0z+Kj4SF$&dIkKjEkRjGyxhe#x)+HNWAv{EpxA2mZ)g{E0vF z7yioM_&fh#aDVWRC1k{y37C+Hn3zeJl*yQ!DVUO}cn4GSPTs}4nTGf9Uf##FOvm)h zz>Lhq%*?{9yr0>aojI73x%dEcGY|9fLFQwA7GOaZVqq3xQ5IuymS9PiVriCPS(amY zR$xU|Vr5ogRaRql)?iK6Vr|x8UDjiLKEwuW$VP0;CTz-PY|e+-f{*Y~wqz@|W*fF; zJGN&Bc4Q}ZW*2s4H+E+a_GB;iW*_!tKlbMU4&)#X<`53$Fb?Mkj^rqg<`|CUIF9FI zoWO~k#L1k(shq~gIh`{&le0LRb2yjtIG<1ONiN_*KE*{`%q3jPWqg{;xq>UXimSPX zYq^f=xq%zGiJSQhxA0kR4km zNtukvnSv>qigz$I@8n&)n`w9t@8x|=%XCc749v((%*-sz%KMp(*_nemnTroFH}fzr zA7nn}X8{&uAr@v47G*IOX9<>MDVAm#mSs7XX9ZSdC01q?R%JC-XARb5E!Jio)@41` z=R<73hHS*fY{I5&#^!vOE%*o@WlOeVYqnuqwqtvCU`KXhXLey%c4K$;U{Cg9Z}wqd z_G5nz;6M)IU=HC>4&!i+;7E?*XpZ4nj^lVf#tEFrNu10noXTl@oYOgjGdYX1IfrvO zkMsEipX35A;e3U*s;n#Fx37d$^bT_zL&)RUY7LJjmC1h=+NEM|q6Ld4eZ-il=#oZ}3f? zv`okJ%)pGy#LUdXth}Gun4LM8lezc+b2AU~@$!m&xrv+k47c!EZsj&U$L-v~oqV1z@I~(8OMIETxrckXkFRh)U*!S5 z#)EvFhj^Grc$CL@oF{mar+AuY_y*tPS)Sute4FQaf$#8LUgRZS<`rJ$HD2cpzQ_0Z z0YBtTe#DRY2|wj${G4C#OMb<#`3=A2cl@3|@JHU_PyCs`@K^rE-}wiF9L9e}jG2H5 znTUy*gh`o<$(e#FnTmHXHSgqIyqjrw5AWrDOv`jk&kW4SOw7zI%*y+jjoF!lIhl(O zFgNosFCSz+=4SyGWFZ!25f)`J7H0{TWGR+r8J1-^mS+W4WF=N+6;@?6R%Z>?WG&Wa z9oA(%*5^ZPz=mwZ#%#i-Y{uq%m@W7SA7x9nVr#ZxTef3+c3?+#VrO9yYq*x{xSkuhk(;=g&u|N$Ntl$$n4Bq?lBswHQ}a&V#k-k? z_wZic$Fxkx^vuAF%*4#h!mPZX*_fRER$*0EV|CVGP1a&<)?r=NV|_lv25iViY|JKX%4TfNhuMOU z@KLs8E4F4Ewq-lEX9spY9GGdPp8IGb}gm-9HEPw+`D;6gsdMO@4!T*_s9 zn#;L@E4hlRxrS@Gj_bLB8@Y*_`3$%4S#ISvKF96c!JT}bFYra~;!Av)ySayZxsR`K zKVRhmzQ%)moridsM|hOSc$_DAlBal@XZQx+OLnT^?* zgE^Ut4=^|LFfSiuKIUfu7Gxn7W)T);F&1YDmSicGW*L@cIhJPyR%9hsW))UtHCAU0 z)?_W#W*ydLJ=W(#Y`}(W#KvsGrfkOMe3&ix2p?rjwqk3xVOzFidv;()c4B9CVOMrz zclKaU_F`}LVPE!Re-7Y44&q=A;ZP3aaE{%qg78X?&d1 zIfFAfi?cb0b2*Rm`2?Tj0xslJT*Sp(!lhisr@5RfxRR^5nrpb0>$sj9xRINdpRbJzD-r#$DpC9l;-sDI8n4j=de#X!F1;6B1 z{F>kJTYksy`2&CCE&jxx`3ryLZ~UEqFnGZD&xkP-Fd-8$F_SPUlQB6{FeOv*4zBIl ztNp-%n`-wNRH9hzKAk!bTRmuazqQ@}TKn^LP~j3&)^_aMsq?_eYx;ER-aA~|>Cerb z|K75pW8dEW26P@caQ~W4?FY3#lK9M@ml6h15C;i@gh8SpagZcP8YByn2PuM-L8{=6 zAa!tOa9415kS4e%xHq^jNE@UJ(gzuWj6tR#bC4y-8r&ab3$h0}f}BCFKW|O$AWx7t zcreHpN9uDg+gSNDYZ7E}*v z1T}+NLG7SUP&cR-)DIpC8Uzi4MnU7CNzgQC7BmkY4q60{1dj$SgH}Q7piR&=Xcx2( zIs_eqPC@6OOVBmw7IY7K1U-XZLGPeX&^PE8^bZCE1A{@q;9y8FG#C~P4@LwdgHgfg zU`#MJ7#EBW9t$P}6N5>?8J%pT?lbB4LX2g2N8o-l9tV3;q=9~KA;hK0hyVUe(CSS&0amIzCRrNYu-nXqhF zE-W8b2rGt_!pdQluxeN>tRB_~YlgML+F_lrZdfm@A3hW|2pfit!p32fuxZ#VY#u%w zwg?{y9}QcEt-{t}o3L%zE^Hrm2s?(I!p>oruxr>Y>>lI4m3GHhEIiy!o}f|aA~+Kd^%hnt_W9#tHRacns9BnE?ggO2sehC z!p-3`;g;~(aBH|Nd@kG`?g)2=&xbFBFNV9qm%^9B-Qk{aZ@4dfCEOpr8XgE=3lD~` zhlj$$;gRrYcq}{~o(NBdr^3_WnedJ9&G2k^E_^F|J3Jp=2;T|c4KIe5!pq^6@M?H1 zydK^N-wWRlKL|ezZ-yU*ABUfWpN5}>pNC(BUxr_WUx(j>--h3X--kbhKZdu$pTeKR zU&3F*-@@O+KmKg|VH8DilpsnNC5jS9Nus1tvM70!B1##hitdO~M|VbdMR!MOqI;ry zqx+(?QMxF7lp)F(Wr{LKS)#1b{ZY0kdz2%}8Rd!|h;m1HqP)?AQNAdDR3IuC6^aT+ zMWUinv8Z@dA}Se`ib_XiqOwuBsC-l*su)#@Do0hKs!_G5dQ>B-8P$qvM|GmQQN5^s z^ib3wY8W+&8b?i{rctw~dGv79B6=ixG-?^Oidsi)qP9`HsD0ES>KJv3I!9fiu2HwB zd(KM}4BcQNO5vG$0xn4T=UwL!zP4uxNNRA{rTuibh9cqOsAqXngcoG$EQ8 zO^PN*Q=+NSwCM3@dNd=N8O@4jM{}aN(Y$DW^hESzv>;j-JrymA7Dr2>rO~qJ>1cVh zB3c=(idIK!qP5YwXnnLH+8Aw$Hb>7yTcT&9tyIvBkk9f}S|N1~(AvFLboB03qJicUvoqBo*9qqEVu=&k7O z=zMe`dMA1}x)@!GE=O0QtI@USdUPXtFM2=vAo?)68GRIe9DNdf8hsXh9(@sg8GRLf z9eoph8+{jjAN>&h7~P6~ihhoMiGGcKi++#(__ON|<0y{f1aZPRQJgqV5+{w5#mVCo zamqMVd`Fx*zB9fnzB^76-xJ>(-xsHi)5YoI3~|OdQ=B=@5@(I?kF&+u;~a6$I9L2Y zoIB1F=ZznX^Tqk&0&&5(P+T}J5*Lk&#l_Lx*#kf*jIj#~{ zjjP4g;~H_zxK>;{t`pad>&5lshvEit!?;o0IBpU*jhn^IuG@gwo0am%<>+&XR( zw~gDy?c)w{$GB76Iqnj7jl0F&;~sI(xL4de?i2To`^Ej^0r9|iP&_yu5)X}s#lzzf z@yK{oJUSi|kB!I0Jh%#gE6+;~DYHcvd_+o)gcF=f(5mC*mjL z1@Xf8sd!PmI9?JjjhDqw$IIgt@yd8rygFVJuZ`Em>*Eda#&}b_IesSI5|UJMp{m#rRTuIldBKjjzSm;~VjN@%!-y@rUuv zcVa6Im+#Ju~{pq~h<@$R{xq zFd-8$F_SPUlQB6{FeOv*4yNXvyo+};4e#N-ypL&_j_H|!8JUThnT1*TFaIm@=LG5R z9ENM{*QLa}39F9LMu9PT)jN z;$%+YR8HgLoX#1X$yuDuIh@ORoX;otBo}ZYpW-4e<`ORDGCs}aT)~xG#noKHwOq&b z+`x_8#Law$Tlg%uavPuHcJAO#KF=5UB6sm6zRca+!@bY}i zpYk()&M){Szv9>YhTrl#e$OBHBX991{>)$aD}Uqf{DXh~n(&V$WW<;Wn2?E>m`RwF z$(Woen3Ab@2UGJ--o?9_hWGGZ-p8~|$Mnp=jLgK$%)+d^pV^q5Ihd2V_yBV=5A*Ut z=3{;qU_lmQVHROg7GrUiU`du@X_jGGmScHVU`1A9WmaKTR%3P6U`^IyZPsC3)? z#0G50Mr_O`Y|3VA&WG88kML2pWGl928@6RTwr2-+WG8lJ7j|Vgc4rUvWH0t+ANFNG z_U8Z&?yQj^|^Xz=@p1$(+KeoW{pFoijL-vpAb`IG6J{ zpHJ{fF5p5w#YJ4qC0xp7e45L-f-AX-tGR}2xsL0(fg8DroB0g4@L6u)nY+1%d%2IVa6ez=0lvnAe4U4Qm`8Y&$9SA4c#@}hnrHY1-{e`I<6C^2 z=Xrte@LgWyC0^zgUgb4j=MBEc_xS-oI<=2mgGQ@b~!7h%pl|Armn%lQ1chF*#E(B~$SZrskczi+3{(@8P|? zk7=2X>6w8UnTeU1g;{w&voSk!Feh{I0p?~N=H-LT$NVh7f-Jay;iGKH zR&32SY|D0R&kpR!PVCGs?89LixF&Ji5RQ5?-N z9LsSW&&N1{6FG^KIfYX>jgNCWXK*HGaW?00F6VJRpWu^Rz=eE@i@2CexRlHIG?#M) zS8^3sa}C#W9oKUMH*ym<^BHd8v)syUe2&|>gFE>=U*L<}#h3UpcXJQ-avxvee!j{B ze2oYBIuG$MkMJmu@iC^8(-DyS&Ityv!@S%4@vN8+?!N z^8^yf8nqEjlc5`{+UVe_xR6< zF%vK$6EQK9Fe#HUIa4qtQ}GU_=AFEYcQXy|;k~?%X_=1cnSmLZiJ6&&S$RLRF*|cG zCv))u=4Kw|<%7(}{4BtNEX2Yr!lEq3;w-_EEXC3+!?G;L@~ps$ti;N!!m6ys>a4+< zti{@_!@8`;`h18D*pQ9bm`&J}&Dfj|vjrdFqio4mY|S=o%XVzf4(!NI?949g%5Ln= z9_-0p?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht%W)jf$2fr#If;`wg;P0=k8?U_ za3*JQHs^3I=W#xt;FDayg?x&OxR^`0l*{-umvaSIauru|4cBrV*K-3mauYZ68E)aT z+{$fyj@!9|JNZ0c;EUYFm-sSwa}W1&A79~qzRCl9jR*NU5AiUM@FjOe=zKDFzk3R>~t{fd@$^C zFzovG6|Vi4%*Bm^y7%rpa8Uc+{U)vH(05R`iK_;U*i?C7_x1&w_U$>m?_VeX&t29I zY2Ryb=hJt7HzjxV9qod3eY;AR<+V|<) zYr^VIgZoWf*P&P6jy(s?o&0xJ#riJ2`nLZwtMlL8$3M6K=S0r`u*YACoQKo?J?r}O z^al*ZJPg2KoE5`EkN%-9PT+z{9s68{Iz4R~4;Q`r9N-%`{BQjLgbx%)wmD z&AiOV0xZPBEW)BJ#^Nl&k}Sp2EW@%a$MTglv>Jci@hX4S!kYHOIuD%xXVUCHvt0h` zu%5rN^2#bJEC1JVto)jRgZlPMxc1M09xy2Q^G)Y|p)YLQzE9WA{}%k42mJN6?LU%t z|9#Qu|E%a=mAmC1vDCJA`#$Zvb{^pD8vYe)8+&&j*11#Le;07`{+|Zi?Z>6T?f44+ zi}(uv8D9;9|G3A!9d^ZsfN;_cr8`|o`8|2Zz#{-eA5f2|X|{pkLmeW0q0{)^cBGu!oc-?!p+ yP4RY2-u8A(-u8A(-u6Go=HCbUKP`XzIJf)O{~^}@*L~H$503w)j{bid9RC*<;i_Z+ literal 0 HcmV?d00001 diff --git a/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack b/pandas/io/tests/data/legacy_msgpack/0.18.1/0.18.1_x86_64_darwin_3.5.2.msgpack new file mode 100644 index 0000000000000000000000000000000000000000..ea8efdc86dd2d45a151ae49e78790a694a115d4f GIT binary patch literal 119206 zcmeErWpLO^vhD~*%#02*Gcz+YIDF~xS6nVA`Ehnbm~nVFdxU!2%`md@_pJ-c=5 zRz3aDCyi!C-P2$9v|5AXL&AfCJNJtX36B{V-!3#TI(kus@R+>0D2gP)W4F1vO zN5g`a5n+*0!O_t>Vt+LL8WtBG7#6%EE+{Z2aDR1>bRdZrXd+^Xh$kXUL?RK%M5Gdt zPQ-+Hg9rZ-YH8^o*D)bs!9V4i8`E{iFWHtQ^hs#Xo4ZkXbd%qNjB6DY+NO6AKfxAG%zZ<&%y-zM#lt(MfOjS;af{!a_iAxXm0WdG3RjcUWm>oaa(JJbPe1l?xI{X^6v_)t{%XZDe0>cAa2S@!f z&cYv@3M`w@Z{E+R(PIYA`#EFu{?3UMOaiL-uPZANQzl~iM0`;U{5+(@doK}FCt}7# z1c|66V)8^xn~0fvfvGyWzVjGI$pPRThX zlIT@4Fue6I*NuygiHL+h*%Xz~n8>Cd=8cL0KOP20BEKAIVOU7#;GkyzBLj~AlL6Bw zshWsM21+tOl0iT8`fn^a@^it@|4D|wSziSH9Z8d{k~HkE?|(_U;754=Gh+M-&i@uI zlB7&}&p(s$@$8SIH5q_{IY(Ykz#+R+X4QY-h1%=8Ct$jJl@9CjFDd^Num4QcKgHo+ zvHB0moV2I>1DU7B{Rk~N5>7c2PPr0Jxf4!#4*zL||9(J``%gV`Y~)`Cl-M5w4;N1y z_)Evd)qcjvKhrQVYW`cAp+7lu0VGCpkhEL=jfUtS_00)CeY4b@(tpT5e{3N?SMOhn ztYiNU-G8m+?>VOQUx|cClushS-z@Gwbj&2vB#|%4ME?tp`3J-E*aQkB1?T_V-ap}A z+xvfH1Mx9!qJo2$1jRJ_73O2QM*o{^O1>iOSn!W*?VsQ;8!b;tBWpMOGq>IJ7fEF1 zUA={Z|T>u6*{-`Aq zz8L?Q9g=kWYhC~PJ}K-b_5bVp|8)5Mzhi^=pTYBAx5EF=kN-|N<99!slEQcr@BUe` z@P8~no{*3W|K0rfmqYz$)7zxQOmg18vzUJ{rx~A=)+XgO|IYRj85kZM+V3Bh4dSaO z^!b~T!HcA@m59He*#7E}R02pUDI}E)k{AE=JY^y#l@O9j1W6@_q>@7B#C};5F?%BB zNW`3pm@5%;Ct{vN%$tb$5;1=w7D&W`iC8ER3nyZc|4M@XKQ2{_!yx*TIOF~oYZbr4 zIP^bL=J*T3{q`mGdkM{76AvUBEUDy?guf*+;OFt*NobP%{?|JI_-nr(hd}gCABVrG zloWH5Fez*#O_S7~gunOvKb;#Ssgv~Hzo*V0q~qB53NgW9C4QZMhgtth0qx)Rho70@ zKS{Lzf|I``*|U@ElZZ)NOu{5CCSej6lkjgj`D>#8>G-61l7ehfQvcuK_`jMkjzj-& zton0u{OJS!-`ME>7&(3`!4LRRZ2y^m{??ROm6zYF`S$!s@;|T2FZ%7nIyO8orcXk6 zOt|>@ujcn=iJSE$$Dc}Zb4n)^_veL2M1=*0{!1f%{tbw05fRho_qq>CUuJdlBr)ey z`SF59C89$Dv(=7h*CishWnfIdgc@tEoJqfQO8xlb)H)(6y$_8~IW3!+uozARqtQpteL+9a9}EBk!5}ag3;{#IFfbg903*RDFdB>j zW5GBu9!vle!6YylOaW8DG%y{^05icXFdNJPb3rVK1My%Um=6|!gbUU>R5r zR)CdY6<7_{fVE&9SPwRUjbIbl47Px+U>n#Dc7UB=7uXHi{KKt46cBy;2O9NZh)KM7Pt-WfV~+kKhyd48DM`;2ZeA{c@r5Fro}p%5Bj z5EkJO9uW``kq{YC5Eao79Wf9Su@D<^5Et5&Xb zMkEuG8OefVMY18;ksL@)Bo~q!$%EuY@*(+=0!Trm5KLK-!21rAs5z-iGf;2^%A)w=$V6lkG8vhI zOhu+4(~%j-Ok@@^8<~U5MPiXSBp#WE%tsa=3z0?0Vq^)j6j_EWM^+#!kyXfQWDT+w zS%<7gHXs|3O~__s3$hj2hHOW6AUly=$Zlj0vKQHh>_-kD2a!X_VdMyM6gh?*M@}Fo zkyFTN>hFnK(AUBa)$Zg~fau>OW+(#ZD50OX6W8?|) z6nTa`M_wQ=kyprTfVc8VHm`DU?PT zltnp|M+H!5YfdT4#L0oo94gf>Q-piR+cXmd0W zZGpB#gV12K71|nYgNC4O(ROGk8it0W5ojdZ9*shy(HOJ?+7a!9c1F9PUD0l6ceDrE z6YYieM*EhoD2zVd!vl1UeEOg^otYpkvW-=y-GjIuV_OPDZDo zQ_*SYbaVzf6P<<5M(3b&(O5JNjYsF9^U($9LUa+j7+r!cMVFz=(G}=QbQQW9U4yPg z*P-jt4d_O66S^7Qf^J2(q1(|N=uUJOx*Oet?nU>Z`_TjFLG%!M7(Id>MUSD!(G%!N z^b~p;J%gS_&!Okh3+P4k5_%cEf?h?hq1VwH=uPw%dKzL) zi*Xo_37CjUn2afyifNdR8JLM#n2kA@i+Pxj1z^dr4p7tRPkhD~uJviekmE;#di+BvuM5jg`U5 zV&$;%SOu&iRtc+&Rl%xa)v)SV4Xh?s3#*OQ!Rli5u=-d7tRdD2Ym7C)nqtkc=2#%s z0&9r{VZm4{tTomK3&GlA?XXZR3=791ut=;u7KKG)F<1wzBi0G)jCH}fV%@OrSP!fx z)(h*6^}+gL{jmPn0Bj&O2pfzI!G>bPu;JJUY$P@c8;y;@#$w~J@z?}xA~p$|j7`C& zV$-nc*bHnYHVd1L&B5kku~-}ykIlp8V+*i_*dlB(wgg*>EyI>$E3lQ=Dr_~j23w1* z!`5RPu#MOzY%{h6+lp<&wqrZ6o!BmHH?{}ci|xbqV+XK<*dgpNb_6?$9m9@eC$N*) zDeN?M20M$L!_H$Du#4Cw>@s!*yNX@Iu46Z_o7gSvHg*TQi`~QSV-K*0*dy#Q_5^#1 zJ;R=3FR+)`E9^D)278OW!`@>bu#ea$>@)TS`-*+TzW>R;36*JGOpk%uHiav;3jV2Htygq?%_TjfG5L~<0NiWkF+<0bHtcqzOzUIs6V zm&42B74V99CA>0T1+R)%!>i*p@S1onyf$73uZ!2i>*EdZhIk{qG2R4kiZ{cX>oS;#>i z3Q&X+l%WDus6ibX(1aGWp#xp$K_3RdWH32Q0aLEKP&(X!a}exECP$dVz4+Y0ZYPCurw?K%ffQ7JgfjK!b-3* ztOBdTYOp%20c*lqur{m%>%w}lK5PIR!bY$$Yyz9YX0SO7ge_o87zBf1E7%&gfg!Lh zYzISO7z~FIFcP+hQ7{_Dzz(n@>;yZ*F0d=?2D`%^uqW&Vd&54kFYE{V!vSz090Ui$ zA#f-h28Y8Da3mZBN5e62EF1^N!wGOAoCGJsDR3&B2B*Ura3-7uXTv#gE{uh7Fdoi> z^Wg%x5H5m?;S#tME`!VA3b+!kf~(;gxE8L1>){5t5pIH;;TE_RZiCz54!9HUg1g}! zxEJn&`{4n25FUbu;SqQg9)ri>33w8of~Vmbcov?6=ivo-5nh6q;T3olUW3=+4R{mY zg16xvco*J-_u&Kh5I%yB;S=~2K7-HU3-}Vgg0JBl_!ho{@8Jjd5q^T7;TQN7euLj3 zAP@p2FajqaK@cQC5j4RNEWr^xArK-V5i+3=DxncNVGt%^5jNovF5wYA5kMp(k`pP2 zltd~bHIar$OQa*x6B&q%L?$9Lk%h=gWFxW@If$G@E+RLPhsaChBk~gkh=N2RqA*c} zC`uF~iW4P>l0+$@G*N~qOOzwZ6BUSxL?xm!QH7{VR3oYrHHex-EuuD2hp0=`BkB_k zh=xQXqA}5gXi79AniGLU3!)_vLwOL=+KC#1I{b zjzlM-Gtq_UN^~Q-6FrEYL@%N@(TC_u^dtHc1BijdAYw2vgcwQ;BZd^r7Vl**^ z7)y*J#uF2WiNqvgGBJgiN=zfB6EldJ#4KVqF^8B-#1e5tJTZ@$Pb?r75{rn%#1djD zv5Z(wtRPkrtBBRa8e%Q6j#y7@AT|=4h|R zE^&{zPdp$V5|4<-#1rBv@r-y*ydYi@uZY*g8{#eTj(AUeAU+bGh|k0q;w$lu_)Y*4 zAyE<|aT1aQNs<&vlMKm{9LbXcDUuQ?lM1Pl8mW^8X_6LclMd;U9_f<-WHK^2nSxA7 zrXo|5X~?u>Ix;<(fy_u|A~Ta&$gE^GGCP@r%t_`VbCY?!^sFTl59^#kwA)k`Z$miq>@+J9-d`-R~-;(dh_v8oiBl(H^OnxE1lHbVhB%lxq zr7#MoAVp9lMNu@xP%On!JS9*fB~db^P%5QSI%QBMWl=WeP%h!t(Q#GiXR4uADRfnoe)uZZD4XB1x zBdRggglbAPqncBJR12yl6+{J7t*F*i8!CiqOSPjysW2*>il8E?_EZ!VO~p_hsE$-8 zsx#Gv>PmH^x>G%VWYH`RygOZB7rQv;}h)F5gwHG~>U4WourBdC$oC~7n{h8jza zqsCJcsEO1hYBDv2no3Qhrc*Phnba(5HZ_NuOT|)gR6I40noljD7E+6-#ncjNDYcAR zPOYF;Qmd%d)Ea6nwT@a(ZJ;(%o2bpy7HTWCjoMD_pmtKbsNK{aYA>~q+D{#z4pN7x z!_*P#D0PfFPMx4mQm3fX)EVk5b&fhuU7#*fm#E9s73wN=jk-?Vpl(vPsN2*X>MnJU zx=%fz9#W5}$J7(*DfNtcPQ9RBQm?4j)Enw8^^ST^eV{&4pQz8&7wRkZjrvXj8lh1d zqj4J21WnQuP16j`(j3jx0xi-KEz=6E(i*MP25r(7ZPO0z(jM*80dz7tIh}$|NvEPy z(`o3mbUHdcoq^6sXQDIHS?H{EHaa_S4I+AWrN72!A4Bdh5NOz(; z(_QGUbT_&?-GlB)_o92#edxY)Ke|6XfF4K>q6gDM=%Ms5dN@6T9!Za)N7G~IvGh24 zJUxM)NKc|C(^KfF^fY=pJ%gS}&!T73bLhErEFDM3)AQ*0^a6Szy@*~+FQJ#x%jo6w z3VJ2Iie62xq1V#u==JmldLzAw-b`+}u!CVh*(P2ZvK()Z~5 z^aJ`K{fK@{KcSz}&*}v>C5zE`ZEKVfy^LgFf)W1$_!(MGb5Oh%qV6wGlm(EhW;Qd2najj7aZEfjkD1RbU=}iqn8nNzW+}6bS|k~>yO`a~9%e7IkJ-;0U=A{en8VBw z<|uQFInJD5PBN#M)65y>EOU-I&s<-7$n8(Z$<|*@xdCt6GUNWzk*UTH{E%T0f&wOA$GM|{w%opY>^Nsn=02X0U7GrT1 zvII-A6ic%V%d#BHvjQu!5-YO`tFjuavj%Ij7HhK(>#`o}vjJ=}HaVMuP06NWQ?qH< zv}`&yJ)42e$Yx?Qvsu`zY&JGKn}f~C=3;ZRdDy&cJ~lsFfGx-tVhgiH*rIGPwm4gY zEy)7?|26iL6iQUX@VYjl|*zN2Nb|<@w-OcV{_pK_9lCaz0KZX@3QyU`|Jbu zA^V7Z%syeCvd`G(>T?aahFl}AG1r7^ z$~EJfbAen7t|b@51#_*q)?6Ddglo&S<3hPGE}VdgTqmwG*M;lK zb>q5oJ-D7+FRnM&hwIDrGq{=DEN(V8hnvgAa&cTdH;$&fVZ{a<{nK+#T*NcaOWzJ>VX4 zkGRL&6YeSZjC;<#;9hdCxYyhp?k)F@d(VB~K60P9&)gU8EBB52&H*0bQ6A%Q9`XcF z@)S?=4A1f$&+`H=@)9re3a|1Suk!|P@)mFN4)5|F@ACnCGCnz-v8^LhBZd_F!uUw|*j7vc-^Mfjq8F}^rof-lLJ z;!E>o__BOCzC2%nugF*8EAv(Os(dxRI$wjY$=Bj*^L6;Td_BHC-+*t(H{u)fP57pK zGrl<=$hY8I@U|x8XzhwtPE2ln>*>`3OFeZ_h{Z(R>Wwf$zw7;yd$Q_^y06 zzB}K8@5%S#d-HwxzI;EvKRZ{fG{+xYGL4t^)Ui{H)f;rH_U`2G9={vdydKg=KDkMhU( zltL;YwU9iMgn~jLp|DUyC@K^aiVG!#l0qq= zv`|JUE0hz;3l)TlLM5THP(`RJR1>NTHH4Z%EupqhN2n{*6Y2{MgoZ*Rp|Q|JXeu-l znhSwK3!$YDBm@hsgw{eEAw*~^v=c&wFdMXz6NU>TgptB1VYDzt7%Pku#tRdKiNYjd zvM@!MDohim3p0e7!YpC7Fh`gx#0qgjyf9CgFDwui3X6or!V+PruuNDktPoZTtAy3U z8ey%lPFOE&5H<>%gw4VhVXLrB*e>i4b_%+&X~eW*Ix)SN zLCh#-5;Kcg#H?aAF}s*U%qiv)bBlSzykb5vzgR#lC>9b6i$%nuVllC}SVAl*mJ&;g zWyG>#IkCK0L98fN5-W>U#HwO7vAS48tSQzKYm0Tnx?(-CzSux)C^ixsi%rC)Vl%P1 z7$~+7TZ%zqu-Hm$Ew&Lu#I|BPF;ol_!^H?OQfx0qiP2(=*g@_UBs?pH?h0e zL+mN`5_^k%#J*xbvA;M#94HPF2a7|*q2e%cxHv)_DUK3Hi(|yG;y7`ir2*J;tlbpcuTx3-VyJL_r&|+1M#8wNPH|l z5ub|B#OLA*@um1md@a5a--_?V_u>cfqxebuEPfHcir>WVB9IUXl`sjHphQTdL`k&7 zNUX$3yd+4XBuTQQNUEesx@1VEWJ$K@r8skBr^ zDl3(f%1afbic%%1vQ$N?DpixJOEsjLQZ1>rR7a{S)syN=4Wx!rBdM{}L~1HElbTC` zQVXf26eI;pt)$jc8!1F;E47nCr7$U6ijX3u_EMA-EyYM3q>fT2sk78Y>MC`Ux=THz zo>DKVx70`KEA^B5O9P~V(jaNDG(;LI4U>jTBczeiC~34bMj9)Plg3LEq>0ibX|gm$ znkr3`rb{!VnbIt2wlqhYE5%B2QoJ-znlCMo7D|hx#nKXKskBU5F0GJON~@&R(i&;4 zv`$(tZICufo21Rs7HO-rP1-K)kakMDq}|dUX|J?T+AkfD4oZil!_pDysB}y^E}f80 zN~fgL(i!QjbWS=iU63wHm!!+m73r#UO}Z}KkZwx1q}$RR>8^B7x-UJD9!ig-$I=t& zsq{>GF1?UmO0T5X(i`cm^iFy&eULs%pQO*y7wN0?P5LeY8Ie&LlW`f!giOkmOv{YS z%ACy0f-K6CEX#_l%9^aphHT1~Y|D=9%AV}Y0dg`qxtu~yDW{TC%W34aaymJ^oI%bg zXOc6^S>&v8HaWYTL(VDZl5@*>;l55L#1ygWgkC{L0n%Twg3 z@-%t6JVTx-&yr`$bL6>ltQ;rD%k$*<@&b9GyhvUwFOiqZ%jD(q3VEfxN?t9mk=M%W zPk3HhXa zN+%ixrhH4jE#Hyv%J<~^@&oyy{78N*Karox z&*bOw3;Ct|N`5WBk>ASiDq(UjQ!YHi5 zDZC;mq9Q4>qA04ODY{}PreZ0!;wY}-DZUb*BvXivMV{1oJuYww~|N6tK?JiD+QE-N+G4NQbZ}L6jO>TC6tm%DW$YhMk%Y5 zQ_3q9l!{6vrLs~*sj5^{sw*{=no2FDwo*r_tJG8KD-D!}N+YGQ(nM*hG*g-@fl3Rd zr4pnBE3K5)N*g6aX{)qTLX|KjT!~O3mG(-M60O829h8nrC#AE}Md_+^Q@SfXl%7g2 zrMJ>Y>8tcp`YQvJfyy9durfp$sti+xDH63l$pvbWwtU$nXAMqaZ0>0PnoYQP!=kSl*P&tWvQ}ES+1;5Rw}EM)yf)Wt+Gy8 zuWV2@Dw~we$`)m-vQ62p>`-gPAaFA z)5;m;ta45{uUt?rDwmYY$`$3Ra!t9e+)!>Rx0Kt;9p$caPr0u=P#!9el*h^w<*D*a zd9J)rUMjDY*UB5^t@2KJuY6EGDxZ|k$`|FU@=f`!02NVD6;p8)s)S0aluE0N%Bq~o ztAZ-3k}9i;s;Zi*tA=W-mTIex>Z+dVs{v{$szv}!svy_!MIsAf_# zt69{nYBn{ynnTU0=2CO3dDOgWJ~h8uKrN^iQVXj^)S_xJwYXYBEvc4LORHtnvT8ZC zyjnr6s8&)dt5wvhYBjaGT0^a=)>3P$b=10QJ+;2tKy9ctQX8vH)TU}PwYeIowoqHD zL29ttN^PySQA5(LLI4&Qb(&})UoO~b-X%3ov2PyC#zG`sp>R! zx;jIhsm@Yot8>)3YOETk#;fzx`RW37p}I(2tS(WPs>{^n>I!wGx=LNGu2I*j>(uq? z26dylN!_e&QManw)a~jHb*H*Z-L39X_p1BU{ptbrpn6C>tR7L1s>jsh>IwCvdP+U5 zo>9-L=hXA+1@)qONxiIIQLn1k)a&XE^`?4Dy{+C+@2dCI`|1Prq54RDtUghns?XHt z>I?Oy`bvGRzER(*@6`9|2lb=+N&T#TQNOC+)bA?L5DnEZ4cDMXXrxAIw8m(x#%a7J zXrd-*vZiRNrfIrnXr^Xqw&rNA=4rkbpe56iYbmspS}HBImPSjfrPI=D8MKUACM~m; zMa!yX)3R$hw47QlEw`3O%d6$n@@oaOf?6T1uvSDXsuk0UYbCUjS}CoxRz@qUmD9>= z6|{<4C9SenMXRb+)2eGVw3=Eit+rN2tE<)1>T3<1aL~E$V{hHE3Vk=iJ2v^GW?tBupfYZJ7I+9YkVHbtANP1B}p zGqjo7EN!+nN1Lm~YH?b;Hcy+cEzlNfi?qes5^brrOk1w4&{k@zwAI=gZLPLWTd!@< zHfo!+&Ds`itF}$suI|y6YZ(?Ona`q z&|YeXc6FjLzzu&g+6M z>XI(&imvLKuIq+w>XvTnj_&H7?&|@1GCjGTLQkou(o^ec^t5_9J-wbm&!}h8GwWIO zta>&*yPiYOspryj>v{CNdOkhBUO+FX7t#ysMf9S2F}=86LNBS8(o5@Q^s;(6y}VvQ zuc%kjE9+JCs(LlOx?V%Csn^nL>viOp$2 z-b!z+x6woNwt71~R1ed`^$0yuZ?8w`(Rz&DLGP${(mU&2^sah0y}RB+@2U6Fd+UAl zzIs2szdk@8s1MQy>qGRR`Y?UCK0+Ug)9N`UZWY zzDeJ#Z_&5v+w|@F4t=M-OW&>U(f8{6^!@q){h)qGKdc|okLt(tgV+H`UU->eo4QqU(v7X*YxZ94gIEmOTVq((eLW_^!xe){h|Iyf2=>zpX$%_=lTo% zrT$8Pt-sOV>hJXT`Um}^{z?C=f6>3{-}LV~Fc1SZFatNBK^UY#8MMI|tic()AsC_| z8M2`ms-YRWVHl=i8MfgVuHhNJ5nv=Uk{c>v*lz4Fb{e~k-Nqhcud&bAZyYcV8i$O-#u4MFam+YwoG?xrr;O9a8RM*R z&Ny#eFfJOGjLXIqNn~cetoXMMlDVmZg zn~JHLnyH(HX_}U4n~v$4p6QzbW->FmnZitIrZQ8TY0R`{Iy1eQ!OUo8GBcZ5%&cZM zGrO6?%xUH_bDMe0ykIkUW3!K`Rj zGAo-^%&KNJv$|QstZCLVYnye+-!_5dY(rj-=nbBs9*}?2+b}~DgUCgd#H?zCh!|ZAHGJBhS%)Vwn zv%fjO9B2+Q2b)98q2@4ixH-ZcX^t{Sn`6we<~Vb_Il-K0PBJH(Q_QL6G;_K+!<=c( zGH07}%(-T)8E3|u^UV3?0&}6c$XskLF_)Ul%;n|^bEUb;Ty3r~*P83h_2ve1qq)i4 zY;G~Pn%m6n<_>eGxy#&b?lJe8`^^330rQ}F$UJNwF^`(Z%;V+>^Q3voJZ+va&zk4V z^X3KfqIt=@Y+f<1n%B(h<_+_vdCRZYE`qUTQ#hjRxPWxRmZAp)wAkb4XlP%Bdf91#A<3avzl9hRtu}86=Vflt*q8o z8!NS}efx?4T0o>nibx7Ek$YxT4G zTLY|t)*x%JHN+Zf4YP(@Bdn3uC~LGe#u{slv&LH!tclhnYqB-Pnrcn6rdu& z7Hg}u&Dw75uy$IztlicgYp=D>+HW1O4qAt-!`2b&sCCRbZk@1BTBoej)*0)pbX&AM*gux?tntlQQd>#lXrx^F$O9$JsA$JP_;srAfyZoRNxTCc3v z)*I`s_0D>4eXu@SpRCW;7wfC_&H8Qu8?jLvvvC{RgiYF%P1}sk+MLbXf-TyTE!&E% z+M2D~hHcuGZQG9R+Mey(0d_Jwxt+pJX{WMN+iC2yb~-z~ox#p%XR(!9vTNIQ?7DV6yT0APZfG~M8{19nrgk&CxgBV?uv^+ecCg*bZf&=* zL+rM8J3G`4v%~EOJJN1%N7>PKjNQTRXm_$Z+gKq9%+xVN84lUvGzE7ygk95Xiu^y+f(eR_B4CCJ;R=9&$4IR zbL_cxtQ}{^+w<)C_5ypMy~ti{FR_=}%k1U$3VWr!%3f`+vDez`?Dh5rd!xO{-fVBN zx7yq6?e-3Pr@hPGZSS%7+WYMN_5u5#eaJp+AF+?x$L!-G)%rhUu4ZQrr)+V|}H_5=H&{m6c7Ke32A{2X!z9cc4Q!q(eEh!#J$NIlLn{q9Zx7qd2Og zIl5ywreis_<2bJ4IldF%By*BGDV&r}Dkrs*#!2g>bJ9B*oQzH;C$p2q$?9ZtvO77P zoK7w$x0A=o>*RCtI|ZDAP9dkTQ^YCi6myC@C7hB@DW|kk#wqKRbILmvoQh5*r?OMU zsp?d7syj8DnocdJwo}Kc>(q1VI}MzMP9vwW)5K}&G;^9efldpjr4!@?JFT47P8%o0 zY3sCeLY*)t+=*}^o%T+Y6Yaz}9h{C%C#SR1#p&vFbGkb{oSsfEr?=C`>Fe}!`a1)h zfzBXjurtIN>I`#+J0qNt&M0TJGsYR~jC0026P$_8BxkZS#hL0%bEZ2poSDunXSOrP znd`(laZbE5&zbKma27g?oW;%(XQ{KyS?;WGRywPk)y^7ct+UQq?`&{3I-8u$&K75@ zv(4G=>~MBEyPVz59%rw!&)M%Ba1J_$oWsr$=csecIqsZrPCBQY)6N;^taHve?_6*$ zI+vWw&K2jXbIrN#+;DC>x18I~9p|od&$;hBa2`63oX5@+=c)6|dG5S$UOKOw*UlT~ zt@F-#?|g7RI-i`+&KKva^UeA002gsl7jtnJx`a!*luNse%etJ)yMimak}JE4tGb%2 zyM}AJmTS9?>$;xny8&)8H@Ta_P3fj`Q@d&0v~D^#y_>GpDayM5fgZa=rbJHQ?2 z4sr*(L)@Y6Fn72+!X4?3a!0#k+_COBcf32no#;+-C%aSJsqQp)x;w+2>CSRzyK~&R zZmb*U#=G;}`R)RDp}WXk>@IPay35?)iG326v;o$=&R3akskL z-0kiTcc;6{-R>hECy2sq(?g{s#d&)iSo^j8*=iKw|1^1$R z$-V4eaj&}9-0SWQ_ojQxz3tv{@4ENg`|bnxq5H^v>^^ayy3gF_?hE&&`^tUozH#5W z@7(w92lu1<$^Gnpalg9X-0v>%5D)b*5BH!)c%(;pw8wa?$9cRbc%mnHvZr{er+K<( zc&2B0w&!@R=Xt&t;3e~tdnvq>UMerOm&Qx$rSsBz8N7^MCNHy>#mnkt^Rjz6yqsPx zFSnP+%j@Oy@_Plmf?gr7uvf$@>J{^fdnLS*UMa7%SH>&rmGjDb6}*aGC9kqq#jEO7 z^QwC_yqaDuueMjmtLxSC>U#~mhF&ADvDd_F>NWG4dx2gHuca5{1$(W$)?OPg#B1xd z^FqBaFWigpBE9xrlo##AcpbcsUMH`!*Tw7Vb@RG=J-nV?FR!=P$Ls6$^ZI)Oyn)^z zZ?HGS8|n@7hI=Etk=`h8v^T~Z>y7iqdlS5g-Xw3bH^rOkP4lLEGrXDJEN`|q$D8ZL zdU0O7H_w~zE$|k4i@e3&5^t%u%v_h^Tkmb~HhP=9&E6JotGCVD z?(Oh)db_;c-X3qSx6j+}9qbult5? z`j&6|j_>-O@B0CMGC#SW!cXa^@>Bb1{Iq^LKfRy9&**3JGy7TmtbR5>yPw0)>F4ru z`+5Alem+0HU%)Tu7xD}HMf{?EF~7K9!Y}EU@=N<={IY&Izr0_;ujp6uEBjUas(v-U zx?jVu>DTgW`*r-fem%dw-@tF^H}V_%P5h>QGrzeX=(q4&`ayoM-^y?8xA8;#wthQ5 z)DQE+{RltOZ|_I>(SD5I!SCpI@;m!o{H}gCzq{YV@9FpQd;5L-zJ5Qyzdyhq=nwJ- z`$PPp{xE;IKf)jBkMc+RWBjrHIDfo9!Jp_)@+bRK{Hgvlf4V=zpXtx?XZv&fxqhr4 z=g0f={Q3R@f1$s~U+gdOm-@^6<^BqPrN7Ew?XU6I`s@7l{sw=ezscY1Z}GSK+x+eR z4u7Y=%irzq@%Q@s{Qdp`|Db=!KkOgzkNU^_ zU-7T{*Zk}L4gaQp%fId4@$dTg{QLd`|DpfLf9yZ;pZd@I=l%=-rT@x*?Z5Hg`tSVr z{s;e~|H=RCfAPQi-~8`B2tWeR04x9xfB{4R89)Wl0ZhREW9=V;Mu`@!O?T~O+sNtp zE!(zj+qP}nwr$(CZQHDU|MQ->5!I@Qs@ZvBHZr>-vol7NAZidTh#tfUVg|8-*g>2i zZV)esA0!A828n{iL6RV8kSquS5y(IVIxv9^T;PL{AbF4?NExIGQU_^*v_ZNceUKr@ z7-R}E2U&uwLAD@!kR!+$fLrN(W_v zvO&3^d{7~%7*q->2UUWqLA9WIP$Q@r)Cy_`b%MG^a^?heS*G0zo36GAQ%`73I+#5 zf}z2%V0bVh7#WNTMh9bpvB9`td@v!H7)%N#2UCKn!L(p{Fe8{5%nD`)bAq|SykLH? zAXpeI3Kj=Tf~CQ-V0o}2SQ)GeRtIZ>wZXbzeXt?e7;FkQ2U~)z!M0#~up`(R>cFAUGHt3JwQHf}_E);COH%I2oJ@P6ua#v%$IGd~hMS7+eZ22Umit!L{Ie za3i=G+zM_7cY?dYz2JWEAb1!&3LXbff~Uc=;Cb*Ocp1D3UI%Z2x52yMeefap7<>vo z2Va7(!MEUh@FVyc{0e>te}cckzaW(Oj|eToh_E7@2rnXth$51RETV|0BASRUVu+X` zmWVClh`1u2h%XX|gd&khERu+%BAEz;5K<_ig%MUb;YEl@E>eh;B9%xj(ulMook%Y- zh>RkW$SksmtRkDpE^>&RBA3W5@`$`5pU5u?h=QV!C@hMIqN126E=q`!qLe5t%80U} zoG33Uh>D_;s4S|8s-l{xE^3IHqL!#F>WI3co~SPxh=!t(Xe^qDrlOf>E?S6|qLpYZ z+K9HIooFvQh>oI@=q$R3uA-ahE_#TbqL=6``iQ=wpXe_Jh=F2|7%YZ}p<2p7m@KA?(@VwG4e)`+!Yomek6h>c>C*ete)tzw(lE_R5WVwc!0_K3Y=pV%)Bh=bygI4q8c zqvDu2E>4J(;*>Zo&WN+(oH#Eoh>PNqxGb)StKyotE^dgM;+D8A?ufhMp13a_h=<~l zcr2cXr{bA-E?$V2;+1$U-iWv2op>)kh>zlv_${)oTgp9m%Y zBSXtDGOP?I!^;RVqKqUX%P2Ccj3%SY7&4}eC1cAtGOmm#!DOeIsxG%~GBC)3LeGNa5SGs`S8tIQ^|%N#PN%q4TnJTkA$ zC-ch!vY;#^3(F$1s4OOn%M!ArEG0|JGP0~JC(FwUvZAacE6Xafs;nlf%Nnw#tR-v9 zI*ovZL%IJIgMztL!Gb%O0|) z>?M24KC-XuC;Q6*a-bX}2g@OHs2nDT%Mo&<93@A~F>Fca;e-ZkIJj^sr;&dDyRyn!m5ZWs*0)Ns)Q=3N~zMS zj4G?jsq(6Vs;DZd%BqU0s;a5#s)nkmYN^_)j;gEbsrsscYN#5i#;S>Gs+y_hs)cH) zTB+8mjcTjfsrIUa>Zm%Y&Z>**s=BG}s)y>Sda2&3kLs)Xss3t!8mI=T!D@&as)niI zYJ?i8Myb(ij2f%Psqt!pny4nJ$!dz4s-~&wYKEGrW~td~j+(3HsrhPwTBsJO#cGLK zs+OtcYK2;V!I}PN~!Cj5@2%sq^ZBx~MLx%j$}{s;;T)>V~?hZmHYqj=HPvsr%}I zdZ-?$$LfiCs-CIm>VWBKNeyQK;kNT_r zsZjbqI!|HH4ypEtF>PR}Wj-sRLXga!%p=0V;I<}6ZO?xRPNI|Q zWIE77ORcolMqBN)*C9H&PN7rkR64azqtohiI=#-IGwMt_v(BQk>TEi@&Y^SaTspVT zqx0%~I=?QU3+h6;ur8vD>SDUME}={6Qo6J*qs!`Yy1cHSE9y$RvaX`5>T0^WuAyt{ zTDrEbqwDH=y1s6p8|p^7v2LQ9>SnsRZlPQ1R=Txrquc6sy1nk8JL*olv+kn1>TbHb z?xB0?Ub?sLqxS21g9-&9-QF^oS=nqo}p*zS$ejfqvz^*dcIzu7wScNv0kE=>ScPlUZGd&ReH5vqu1(ndcEGDH|kA# zv)-b&>TPSOx2KA}(QQ~I<%qtEJd`nTCMCzM*gGTl%)Xqwnf_`o4akAL>W?v3{bT>Sy}7exYCLSNgSnqu=Ux z`n~?3Kk85Vv;LyL>TmkH{-J;BU;4NHqyOrEI+Xd332nlduqK=dZz7n8CX$J4qL`>A znu%^=n3yJ(iEZMTxF(*7ZxWbOl9;3>nF)+A(kP>iG1fTaO^8WuQkaw`l}T;V zn6xIHNpCWkj3$%GY_gcFCY#A_a+sVZm&tANn7k&R$!`jnf~JruY>JqorkE*iN|=(S zlqqe>n6jpvDQ_y6il&mOY^s>5rkbg4YM7d)mZ@#(n7XE(sc#yXhNh8eY?_#+rkQDO zT9}rmm1%9-n6{>!X>U51j;538Y`U1Prkm+*dYGQ3m+5W#n7*c;>2C&@fo6~yY=)Sj zW|$dnMwpRilo@Ttn6YM@8E+<-iDr_SY^Ip0W}2C9W|)~~mYHqln7L-2nQs=Dg=Udi zY?hd%W|>)TR+yD$m04}pn6+k|S#LI&jb@YCY_^!KW}De=c9@-Jm)ULhn7wA7*>4V* zgXWMqY>t?t=9oEdPMDMClsRqAn6u`bId3kQi{_HKY_6EA=9;-~ZkU_qmbq>2n7ihl zxo;krhvt!aY@V2>=9zhJUYM8Wm3eL6n78Jgd2c?LkLHv4Y`&PU=9~F$ewd%;m-%h} zn7`(q31$CdL)$PmtPN+w+XyzIjbtO+C^o8%W~18}Hl~ebW7{}3u8n8o+XObDO=J_> zBsQr{W&)Qslp>1Rv+a|WDZDyO>7Ph5rWn0@ewykYv+uIJdqwQon+b*`N z?Pj~%9=50LWqaE`wy*7H``ZC_pdDlf+aY$S9cG8y5q6{>cCX!M_uB*Zpgm*{+ava%zJ4E`p2bBDu&e zii_%^x#%v2i|Jyy*e;HX>*BfiE`dwv61l`KiA(B|xxfi0opRb4XPtB2g}CG{g-hvD zxzsL=OY73P^e%(T=rXy?E{n_RvbpRohs)`5x!f*~%j@#F{H}m2=nA>Qu81q@in-#h zge&PvxzetTE9=U+@~(ob=qkC&u8OPbs=4Z}hO6mnx!SIdtLy5y`mTX%=o-1ku8C{v znz`n#g=^_rxz?_YYwOy%_O65L=sLO1u8ZsHy1DMIhwJHjx!$gi>+AZt{%(L9=mxpL zZipM|hPmNxgd6EbxzTQn8|%io@os{f=q9(hP&x*x!dlJ zyX)?``|g2z=pMPp?umQqp1J4lg?s5-O`|JL>Q2swYv=8IM`fxtHkKiNvNItTU;-mU#KDv+LWBOP=wvXfE`glIRPv8^! zL_V=k;*-u`WzHi_g`bNI7Z{nN!X1=*^;amDvzO`@T+xm9Cz3<>V`cA&H z@8Y}qZoa$k;d}aCzPIn=`}%&qzaQWS`ayoMAL57lVSczD;Ya#WezYIs$NF)8yr1AF z`bmDWpW>(bX@0t&;b;0;ezu?E=lXemzF*)M`bB=RU*ebgWq!F|;aB=qezjlY*ZOsS zz2D$B`b~bb-{QCWZGOAo;dlC7ez)J__xgQ)zdzs)`a}M(KjM%2WB#~5;ZOQg{tc|Hn8#b_8EV_;1D?`+8bIk7Pg#>IFT9~0pJZrLQ_o*0v0QcQ*c z3Y4f&qd|)fJ%-@_?#ra$o)S}GYD|M^F&(DI|5h9RpOXY6LAtw#wj=zr{Q#*firOy z&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gmAMq1@#xM94zu|ZMfj{vV z{>DG}7ysdZYiIw@`45K1Fc=oYVR(#y5it@*#wZvSqhWN6fiW=_#>O}p7vo`kOn?b7 z5hlhYm=u#?fC42d)M(J6LysYt98+LQOogd24W`9(m>x4=M$CknF$-qJY?vK$U{1`1 zxiJss#eA3_3t&MkgoUvP7R6#%97|wHEQO`943@=mSRN~2MXZFCu?kkjYFHg>U`?!r zwXqJ?#d=sD8(>3hgpIKYHpOPx99v*ZY=y0{4YtL0*d9AzN9=^1u?u#^ZrB}rU{CCY zy|EAW#eUcy2jD;)goAMi4#irsL98cg$JcXz644%bvcpfj{MZAQU@d{qWYj_=R;7z=R zxA6|%#d~-kAK*iLgpctFKE-GF9ADr|e1)&^4Zg*9_#QvtNBo4J@e6*%Z}=U5;7|O8 zzwr3IVV;qc&@i0Cnz=W6x6JrugipelQ zff5yJG-%PG#}G`8DKI6b!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5= zupkz~!dL{0Vlga^C9oux!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*SeNC+@=CxCi&*KHQH7 z@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+( z@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00VrVV`vP6VKE$r#|Rh^BVlBWf>ALVM#mT!6Jud)jDvA89>&K6m=F_TVoZWbF&PFZ zP@+PO1}!@D7=p<$1*XJQm>SbyT1i(0EQZCg1eU~7SQ^Vw}aN>~}IU{$P!)v*TF#9CMz>tJ21hxM@m zHpE8Q7@J^IY=+IT1-8Ui*c#hlTWp8zu>*F*PS_c{U{~yh-LVJu#9r7N`(R(}hy8H? z4#Yt?7>D3c9EQVj1dhZ}I2y;`SR9AraRN@nNjMp&;8dK3({TpQ#925S=ipqNhx2g( zF2qH+7?_uyXKhx_pW z9>ha<7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hxhRT zKEy}(7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H-|+|j#9#Ou|KMNzhyNX% z`=9*B&=>~8VmJ(s5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNqjE@O0Atu7am;{qzG7M0l zM1>j+T6E|!1e0S5Oo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!TxiB~8!MvCc^J4)l zh=s5)7Qv!e42xq4EQzJCG?u}#SPsi$1+0jburgM`s#p!HV-2i{wXinU!Ma!v>th3K zh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_!y|6d-!M@lJ`{Mu{ zh=Xu24#A-~42R*ZsI1b0-1e}PIa57H8sW=U%;|!dMvv4-f!MQjO=i>rg zh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz|!M(T-_u~OP zh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ!Mk`5@8bh} zh>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86azwkHy!N2$q|2sGP zKlzWLF${*qa2OsVU_^|BkueHJ#b_8EV_-~-g|RUX#>IFT9}{3gOoWLs2`0s47@$Cj z3N;$E=+I*bCdU+*5>sJnOoM4L9j3<&m=QB!X3T_y7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~b+I1S#|GFC z8)0K?f=#g*Hpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$?*#{oDH z2jO5GfxDhwuX54~XaT{*O9k>&B;cnc6dvPD`#{+l} z58+`vf=BTf9>)`S5>Mf2JcDQP9G=Guco8q*WxRq{@fu#o8+a3M;cdKwckv$H#|QWj zAK_zsf=}@oKF1gM5?|qKe1mWC9lpm8_z^$hXZ(U+@f&`}ANUi0;cxtdfAJswcWnND z@*hKE7z~TyFg!-Uh!_bYV-$>v(J(s3z?c{dV`ChQi}5f%CcuQ42oqxxOp3`cK!Fk! zYBXrkp~nzRjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9Hq4GWFem21+?WURVm{1|1+X9% z!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n4X`0L z!p7JHn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V? z!ofHMhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y(-(f&9nN z7zV>)I1G;wFd|06$QT8qVl<47F)${^!q^xG<6=CFj|ng#Cc?y+1e0Pi3{ap%g&GZ7 zbm%bzlVb`@iK#F(roptB4%1@>%!rvVGiJf8m<_XI4$O(UFgNDGyqFL3V*xCPg|ILd z!J=3Ui(?5aiKVbKmcg=E4$ET&tcaDcGFHK=SPiRV4XlZ^ur}7gx>yhEV*_l6jj%B` z!KT;@n_~-XiLJ0Tw!ya84%=e~?1-JPGj_qQ*bTd55A2D(us8O>zSs}@;{Y6pgK#ho z!J#+|hvNtwiKB2dj=`}w4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B!;{sfWi*PY6 z!KJtim*WatiK}omuEDjq4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~hwv~S z!J~K#kK+kEiKp;1p24$t4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w%kMJ=* z!Ke5PpW_RBiLdZAzQMQn4&UPk{D`0MGk(FZ_zl0~5B!P0@HhU!zxWSBMI`?*G={;j z7!Jc@1dNE0FfvBLs2B~SV+@Rmu`o8q!MGR?<6{C$h>0*UCc&hb3QK3eI79DyF z!Q_|%Q(`JijcG6~ro;4@0W)GI%#2wuD`vy&m;-ZSF3gR2FfZoA{8#`BVj(PyMX)Fq z!{S&1OJXT3jb*Sbmc#N`0V`r9tc+E#DptelSOaTfEv$`ourAia`q%&)Vk2yfO|U68 z!{*omTVgA0jcu?kw!`+=0Xt$R?2KKoD|W-~*aLfFFYJwdurKz*{x|>!;vgK1LvSb# z!{ImrN8%_Pjbm^uj>GXd0Vm=loQzX&Do(@cI0I+mES!yVa4ycn`M3ZV;v!s(OK>SJ z!{xXFSK=yMjcaf%uEX`X0XO0%+>BdrD{jN>xC3|MF5HcKa4+t|{dfQm;vqbYNAM^f z!{c}YPvR*&jc4#Ip2PEa0Wabuyo^`yDqh3ucmr?ZExe6)@GjoN`}hDK;v;;FPw*)| z!{_({U*ao#jc@QRzQgzU0YBm={ET1lD}KZ8_yd39FZ_*v@Gt(uP?5-g42@whEQZ7I z7y%<r%PphbrsLohj} zz?7H@Q)3!Ti|H^uX26V?2{U6B%!=7CJLbTgm;O(V-YNh#jrS* zz>-)BOJf-us$}xhS&%jV-swO&9FJP zz?RqwTVoq+i|w#IcEFC<2|HsK?26s6JNCey*b94OAMA_$us;sKfj9^U;}9H*!*Do` zz>zo#N8=bAi{o%SPQZyc2`A$eoQl(MI?lkEI16Xv9Gr{ua6T@;g}4Y8;}Tqo%Wyfa zz?HZPSK}I7i|cSbZorMW2{+>w+=|<9JMO@pxC?jV9^8xja6cZvgLnuJ;}JZH$M86w zz>|0iPvaRpi|6n>UcifZ2`}Rnyo%TGI^MvWcnfdi9lVS8@IF4khxiB|;}d*}&+s|E zz?b+6U*j8mi|_C~e!!3T2|wc({EFZ3JO03*_zQpIAN-5|FjQpnA46jp42$6~JVwBX z7zra|6pV_|FgnJ-m>3IVV;qc&@i0Cnz=W6x6JrugipelQff5yJG-%PG#}G`8DKI6b z!qk`s(_%VIj~Or{X2Q&v1+!u{%#JxQC+5Q3mKFp5=upkz~!dL{0Vlga^C9oux z!qQj<%VIe!j}@>YR>I0y1*>8;td2FXCf35*SO@E3J*SeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym z!qa#L&*C{ej~DPFUc$?G1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4 z!q@l)-{L!bk00nchFeb*r*cb=nVmyqG2{0ih!o-*alVUOqP@qJG8Vy=>=rIJ7V+u@(sW3IB z!L*nT(_;qAh?y`mX2GnO4YOko%!#=$H|D{-m=E(~0W64xurL7)R4Xa}ftckU-HrBzqSP$!C18j(murW5lrq~RdV+(AFt*|w= z!M4~A+hYgph@G%AcEPUL4ZC9x?1{awH}=84*bn>T033*ea4-(Rp*ReO;|Lsyqi{5i z!Lc|F$KwQ?h?8(KPQj@-4X5J_oQbn=HqODhI1lIJ0$hlTa4{~yrML{2;|g4ft8g{0 z!L_&!*W(7kM z!LxV{&*KHWh?np(Ucsw)4X@)3yotB)Hr~Ozcn|O61AK^&@G(BYr}zw?;|qL=ukba# z!MFGh-{S}Th@bE?e!;K!4Zq_L{E5HtH~zuD_zy!xCI2xrhQY8H4#Q&vjEIpiGDg9u z7!9Li42+4fFgC`)xEK%PV**Twi7+uH!K9cB0~9Dxp+SI818ZU}tc`WBF4n{P*Z>=1BW#RKuqigf=GX#TVk>NoZLlr2 z!}iz#J7Op7j9suRcEj%21AAgG?2Ub}FZRR!H~D z!}YiUH{vGTj9YLkZo}=k19##s+>Lv1FYd$rcmNOLAv}yn@F*U`<9Gs3;we0hXYeeZ z!}E9nFXAP8n18?Fjyp4D8F5biY_y8Z`BYccc@F_mS=lB9&;wyZOZ}2U? z!}s_BKjJ6+j9>68e#7th1ApQ#{EdI`FaE<&(a3)cjbSh>hQsg}0V850jEqq*Dn`TT z7z1NsER2nDFfPW!_?Q3_Vj@h8NiZoU!vFJs)Gh-IairFwb=D?ho3v**0%!~OjKNi4(SO^Pa5iE+uusD{$l2{5$V;L-q<*+*1(!r3u|K?tc&%qJ~qIH*a#bA6KsmjusOECme>kgV;gLX?XW#| zz>e4nJ7X8@irug~_Q0Ol3wvW9?2G-dKMufwI0y&h5FCoba5#>@kvIxR;}{%^<8VAq zz==2sC*u^Hiqmj9&cK;C3uogToQv~tJ}$t8xCj^H5?qSQa5=8PmADF5;~HFx>u^18 zz>T;GH{%xEira8I?!cY63wPrl+>85gKOVq?cnA;U5j={=@Hn2plXwbG;~6}Q=kPpU zz>9bZFXI)wir4Tu-oTr93vc5cyo>knK0d&Q_y`~46MTx#@HxJ~m-q@_;~RX7@9;f- zz>oL|KjRntir?@%{=lF33xDGu{EPoERCMwmLt_{Wi{UUlM!<*|2_s_^jEd1PI>x}5 z7z<-#9E^+cFg_;0gqR2uV-ie?$uK~H5*2DRXwjj^5KN9KFeRqK)R+d-VmeHZ889Pe z!pxWjvtl;PjyW(V=EB^V2lHY+%#Q`IAQr;HSOkk=F)WTHuq2kk(pUz|VmU026|f>! z!pc|$t70{*jy13**23CY2kT-ztd9+_AvVIs*aVwmGi;76uqC#_*4PHyVmoY)9k3&I z!p_(QyJ9!&jyZzFARfZQcm$8)F+7eZ@FbqX(|88Y;yFBz7w{rp z!pnFCui`bljyLco-oo2>2k+uNypIp?AwI&#_ynKgGklIO@Fl*&*Z2nC;yZkgAMhi7 z!q4~xzv4Iijz91x{=(n*2mj(f3>Aa?$IuuC!(uoLj}b5;M#9J#1*2j#jE*rdCdR_p z7zg8GJdBSCFd-(w#Fzw=VloU+phSfl4O(>QF$9xi3QUQqFg2#Zw3rUlV+PEKnJ_bE z!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwV+E{;m9R2a z!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88aV+ZVrov<@@ z!LHa1yJHXRiM_Bl_QAf`5BuW)9EgK(Fb=_?I1Gp52pox{a5Rp=u{aLL;{=?DlW;Ol z!KpY6r{fHqiL-Dv&cV4j59i|oT!@QsF)qQSxD1!$3S5b+a5b*MwYUz~;|AP_n{YF3 z!L7Irx8n}niMwz&?!mpd5BK8%Jcx(zFdo69cnpu@2|S6X@HC#mvv>~A;|08km+&%P z!K-);uj388iMQ}J-od+g5AWjxe29F!wSOQC8DJ+d;uq>9t@>l^YVkNAMRj?{n z!|GTAYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3_SgYCVkhj3U9c;5 z!|vDvdtxu_jeW2$_QU=-00-hA9E?M7C=SEnI08rFC>)Jra4e3)@i+k|;v}4mQ*bIy z!|6B!XW}fJjdO4=&cpe*02ksST#QR_DK5k1xB^$=DqM|ga4oLG^|%2y;wIdTTW~9G z!|k{Ocj7MGjeBq}?!*0f01x6JJd8*1C?3P(cmhx2DLjp5@GPFg^LPO-;w8L{SMVxc z!|QkhZ{jVyjd$=a-oyL&03YHbe2h=_xJ%n;wSu!U+^n_ z!|(V5f8sCvjeqbj{=-nQ$bSrtVK6L)!|)gZBVr_sj8QNuM#JbB17l(=jE!+HF2=+7 zm;e)EB20`)FexU(00l}^sL`NBhaN*PIi|prm85)v!9& zz?xVKYhxX(i}kQRHo%712peM)Y>LgWIkv!-*a}-?8*Gd1uswFbj@Su1V;Ag--LO0M zz@FF(dt)E$i~X=a4#0sp2nXX39E!tmIF7)PI0{GO7#xe^a6C@Hi8u)-;}o2V({MV@ zz?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn;}+bC+i*MX zz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky;}yJ$*YG;t zz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c;}`sj-|##B zz@PXFf8!tgi~lfGZ1NvNV;BsJ;V?W#z=#+LBV!bdiqSAS#=w{u3u9v(jEnIwJ|@6~ zm2nS(V@o>OpYlqC8omEmta2uj}5RPHp0f(1e;q9kCAPxW*aq8TJ8X{~up@TD&e#RJVmIuLJ+LSC z!rs^i`(i)rj{|TZ4#L4W1c%}<9F8M!B#y$-I0nb!I2?}?a3W5^$v6e4;xwF&GjJx( z!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYnaI$Vz%a3gNQ&A0`(;x^olJ8&oN z!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hnIXsUS@FHHq%XkH^;x)XEH}EFj z!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tNJA98H@FRZ0&-ewu;y3(`Kkz61 z!r%A@|KdLk6^H!C&=>~8VmJ(s5ilY~!pIl}qhd6SjxjJM#=_Vb2jgNqjE@O0Atu7a zm;{qzG7M0lM1>j+T6E|!1e0S5Oo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!TxiB~8 z!MvCc^J4)lh=s5)7Qv!e42xq4EQzJCG?u}#SPsi$1+0jburgM`s#p!HV-2i{wXinU z!Ma!v>th3Kh>fr@Ho>OY44Y#MY>BO~HMYUF*bduc2keNQurqeSuGkH`V-M_!y|6d- z!M@lJ`{Mu{h=Xu24#A-~42R*ZsI1b0-1e}PIa57H8sW=U%;|!dMvv4-f z!MQjO=i>rgh>LJBF2SX^442~yT#2i2HLk(6xDMCj2Hc37a5HYft+)-h;||=3yKpz| z!M(T-_u~OPh==en9>Jq{43FapJc+09G@ik;cn;6w1-yut@G@S(t9T8s;|;utx9~RJ z!Mk`5@8bh}h>!3wKEbE>44>l*e2K5{HNL^O_zvIW2mFYi@H2kFulNnW;}86azwkHy z!N2$qL&YWkF*Jt3uow=*V+4$dkuWkw!KfGwqhkz=iLo#?#=*E4594D3Oo)jvF($#J zm<$6HC{dwCgBBfn48i1>0#jltOpR$UEvCctm;p0lCd`akFe_%m?3e>{VlK>$c`z^L z!~9qP3t}NGj76|07Q^CL0!v~kERAKbESAIaSOF_yC9I59uqsx=>R1D7VlAwVb+9hh z!}{0&8)74Dj7_j9HpAxF0$XA$Y>jQOEw;n<*a16YC+v(}uq$@M?$`r+VlV8CeXuX~ z!~Qq`2jUa4Js2={N&t;w+qvb8s%s z!}+)X7vdsZj7xASF2m)x0$1WHT#ajREw01$xB)lfCftl$a4T-Z?YIMX;x62cdvGuA z!~J*w58@#_j7RV&9>e2!0#D*8JdJ1YES|&jcmXfsCA^GR@G4%z>v#ii;w`+5cknLW z!~6IEAL1i?j8E_>KEvnu0$<`Qe2s7LExyC|_yIrSC;W_G@GE}9@Aw0M;xGJ-fABB< z!%*?ae+-RbFf4|{@E8FjVkC@=Q7|e-!{`_TV`40fjd3t8#>4oS025*&OpHk|DJH`J z1xi$?(V#_#9z!rWrofb#3R7bmOpEC-J!Zg+mVx%J$As3*ac zz=gO77vmCKipy|0uE3SJ3RmMAT#M^)J#N5_xCuAo7Tk*4a69h6owy5k;~w0L`*1%V zz=L=Q591L$ipTIcp1_lM3Qyx1Jd5Y>JYK+ycnL4#6}*bq@H*bWn|KRv;~l(<_wYVG zz=!wv(J(s3z?c{dV`ChQi}5f%CcuQ42oqxxOp3`cK!Fk! zYBXrkp~nzRjwvuDroz;i2Ge3XOph5bBWA+Pm<6+9Hq4GWFem21+?WURVm{1|1+X9% z!opYti()Y>jwP@pmcr6l2FqeOERPkiB38o6SOu$MHLQ*`uqM{R+E@qcVm+*n4X`0L z!p7JHn_@F;jxDeyw!+rf2HRpgY>yqVBX+{h*af>{H|&l*uqXDy-q;8GVn6JU18^V? z!ofHMhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1;yj#>3veMW z!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$;y&Du2k;;s z!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}>;yt{N5AY#A z!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr;y()I1G;wFyjAJyIqG}RlSX(o#`o7W_>nb zLpI_we3p&*9GkEyoAG%zXA8DuE4F4Ewq-lEX9sp84j-r^_xl%Mf)e!(yK6~E>;{FdMGd;Y*5`4fNUFZ`9i@pt~gKlvAf zOyWNy#!SG3OvJ=Y!lX>bCiU*L;;iG%nuU*TZB${~D>LphAY zIf5fOilaG(V>yoFIe`;7iIX{nQ#p;(`8sFt4bJ2&zRB5~!?~Qt`CPz-T*Sp(!lhis zi@>WETG!F=hfLWFjVJ5+-FbCT9w!WGbd+ z8m47B-o^CHz`L1|nV6Ybn3dU>ojI73xtN=In3wnPUgqO{yr215fDf=B3$ZYZ@Ie-3 zF&1YDKE#qN#nLRphxrH}Ls!@8`; zr&*s3*pQ9*44-9VKF20(%4U3?&Dnx2*@~^%hHcr7?b(4H*@>OmgC?8{4mScHV zU`1A9WmaKTR%3P6;NyIPPqHR!@hR439oA(%KF#`Uz=mwZXZS1|^EozQQ#RxCY|a*J z$yRL5Hf+mwY|jqt$WH9cF6_!~?9LwS$zJTuKJ3eW?9Txl$QSq`U*aIX%vU&=uW|@q z<4_LcaE{##2C@oCm)12$wMKEr3(n9s2Zo3a_7XLGh-OSWQbwqaYgV|#XBM|NUoc41d`V|Vso zPxfMO_F-T4V}B0dK)%2i`4R{5Wxm3}e3e7^8i#TihjRo+aui2%499XD$8!QFauO$V z3a4@!r}K5r;2WIDS$vbTIfrvOkMp^J3%Q7kxr9r(jLW%#E4hlRxrS@Gj_bLB8@Y*Z zaWl7YE8pfje3#p}ojbUbySSTs_#XFiANTVB5AqNX^9Yaf7?1M=Px2Js=V_kdS$@C| zd5-6Kffsp+mwAO(d5s_OV_xSC-sCNQ!cX}bKj#|%3>_e5`2gyS&F4uh7a=*KFY^f zmgQKU6XFY^@+ z=BpgS*Ep2JIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=IGwL^2H)UJ&f=S#%{iRQd7RG$ zT*yUS%q3jPWn9h`T**~j%{5%hbzIL4+{jIQi<`NHTlqHM;k(?%?cBkg+{NA8!}qwC z`?#M6c#wy9m`8Y&$9SA4c#^01K2P%u&+-F)$a6f;3%tlnyv!@S%4__HAM-kI@Fs8Z z6Mo9i_&LAem;8!f^BaE4@Ay4`;E()?Kl2y<%HQ}q|KOkei$QkrpAlmwU_vHhVkTiy zCS!7@U`nQ9YNlaYrsG{q&kVeq8JUThnT1)IjoF!lIhl*OnTL6K5AS6@-pBiyp9S~; z3$hRkvj`t#Q5Iuymf%Ay$xVs*pjW-nr+yY?bx0j*pZ#snO)eG z-PoNy*pt23n|;`q{n(!aIFK*!MZUyAe3`FsFkj^mzQ&;(#^D^nksQU*9K*33$MKxN ziJZjAoWiM`#_4>WGx!E)au(m@Y|i0a&f|P8;6g6qVlLrQF5_~p;7YFIYOdj0uH$-c z;6`rZTinbo+{(B44&UWAZs!i}!9=^xD+{gVqz=J%*!#u*HJjUZZ!IM12_j#IU zc$OdVL!RS#Uf@Mu;$>dpRbJyq{Fv8ygEx7LpYT(D#?Sc$zvNf^n&0qSe#h_m1ApXC z{F%S-SN_J|`3L{xUkq}H|BM(j0TVJ26Eg{uG8vOI1yeE=Q!@?IG9B+?dS>9=%*ag4 z%q+~xY|PFa%*kBL%{e(1&g{aj?8ffw!Jh2J-t5D^?8p8bz=3>$FY+Z0 z;>&!6gZV0l@HGzQFb?Mkj^rqg<`|CUIF9E8PUIv`<`holG*0L1oWVCZle73HXLAnc zavtY%0T*%+7jp@hav7I%1y^zvS91;5avj%m12=LL-{NL&;a0xQcla*1aXWW#CwFl- z_wYULWzs z8@$O|{DhzKGk(r5_$9yM*ZhXx@;iRdANV7G;?Mkrzw$T!&Oi7k|6-6+{Aa|N37C+H zn3zeJl*yQ!DVUO}n3`#rmg#sG(=!9_W=3XWW@celW@C2dU{2;@ZsuWL-otyDkN5F@ z=4Syuz=ABq!YslES(L?CoF(`WOR^M8vkV{RBYc#Pu`J86JS(swE3q=GuqvyuI&1K8 zKEWqhlePF1YqJjPvL2sieKuf2HsUjUmW}xwo3JUH@p(393$|n{wq_f)WjnTK2XtLmw1_1c$L@q5kKa2-r!B%;wSu+pYd~k!7uq0zvegmmf!Jv z{=gsk6MyC}{FT4)cmBaY`4@v+;y)wCOu&Rp#KcU(q)f)-Ou>{)#nep0v`ojln4TGU zH#0I5GcyabG8?lq2XitPb2AU~@*dvHe7ukMGd~OP0TyH-7G@DX$f7L9;w-_3Sdyh! znq~MfAK{~XjAdDl$3qHvJs!* zvuw=g*n~~ljL)+au{Zm$FZ;1S2XG)? z;EQ~TgZMIE;b6YXA$*NPIgG4~#Kl~~rCi44T)~xG#noKHwOq&b+`x_8#J9MaTey{P^Bum+ZQRZs z+{s;jjFSzw;0N$-fxn4*uCf zMvR$&37LqAnS@E1jLDgTDVd6?nTBbZj(0IVGw^O^WF}^27G`BOW@irOWG?1r9_Hme zyqEcSAMa;=7T^Oc$U-d4B7BfVS&YS5f)BAIOR+S|@L@i}NBJ1bvK-5^0xPl-E3*o# zvKp(i1|R1We3CU;i%+pO>##2C@oCm)12$wMKEr3(n9s2Zo3a_7XLGh-OSWQbwqaYg zV|#XBM|NUoc41d`V|VsoPxfMO_F-T4V}B0dK)%2i`4R{5Wxm3}e3e7^8i#TihjRo+ zaui2%499XD$8!QFauO$V3a4@!r}K5r;2WIDS$vbTIfrvOkMp^J3%Q7kxr9r(jLW%# zE4hlRxrS@Gj_bLB8@Y*ZaWl7YE8pfje3#p}ojbUbySSTs_#XFiANTVB5AqNX^9Yaf z7?1M=Px2Js=V_kdS$@C|d5-6Kffsp+mwAO(d5s_OV_xSC-sCNQ!cX}bKj#|%3>_e z5`2gyS&F4uh7a=*KFY^fmgQKU6XFY^@+=BpgS*Ep2JIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=IGwL^ z2H)UJ&f=S#%{iRQd7RG$T*yUS%q3jPWn9h`T**~j%{5%hbzIL4+{jIQi<`NHTlqHM z;k(?%?cBkg+{NA8!}qwC`?#M6c#wy9m`8Y&$9SA4c#^01K2P%u&+-F)$a6f;3%tln zyv!@S%4__HAM-kI@Fs8Z6Mo9i_&LAem;8!f^BaE4@Ay4`;E()?Kl2y<%HQ}q|KOke zi$PxTpAlmwU_vHhVkTiyCS!7@U`nQ9YNlaYrsG{q&kVeq8JUThnT1)IjoF!lIhl*O znTL6K5AS6@-pBiyp9S~;3$hRkvj`t#Q5Iuymf%Ay$xVs*pjW- znr+yY?bx0j*pZ#snO)eG-PoNy*pt23n|;`q{n(!aIFK*!MZUyAe3`FsFkj^mzQ&;( z#^D^nksQU*9K*33$MKxNiJZjAoWiM`#_4>WGx!E)au(m@Y|i0a&f|P8;6g6qVlLrQ zF5_~p;7YFIYOdj0uH$-c;6`rZTinbo+{(B44&UWAZs!i}!9=^xD+{gVqz=J%* z!#u*HJjUZZ!IM12_j#IUc$OdVL!RS#Uf@Mu;$>dpRbJyq{Fv8ygEx7LpYT(D#?Sc$ zzvNf^n&0qSe#h_m1ApXC{F%S-SN_J|`3L{xUkvUM{~0l60w!c4CT0>QWilpb3Z`T# zre+$ZWjfx)^vuA!nUR^8nOT^X*_fRfCKpgU*t<1#FzOB2lG`9;cFbqVI0m89LZ4}%`qIyaU9PHoXAO>%qg78 zX`Ig2IfHL-CTH13bt>Jj^3J%40mv6FkXNe4nRzhG+Q!Kjb-{ z=LKHmC0^zgUgb4@#E*HMH+Yk`_z6GdXZ)OB@JoKhulWtX<#+s^Kk!HX#Gm;Kf8}rd zoqzC8{>9*4@t+Z6CSXD)Vqzv?QYK?^reI2@Vrr&gTBhS&OwSCwn;DsjnVE%InT^?* zgE^UtxtWJ~c@OVpKHkUsnV$vt01L7Z3$q9xWKkAlahBjiEXh(V%`$wLkML1G#a4-X`2?S2P1fR5tj#*C%X)m8_1S<8*@(~ZSvKZ#Y{I5&#^>3b zE!dK+*qUwFmhIS{9oUhb*qL3}mEG8#J=l}I*qeRWm;KnE12~W`@I}7FL4297a4=ux z5WdEt9LC`s!I2!r(Hz6E9LMpTz=@p1$(+KeoW|*Voiq3bXL1(bkJTYksy`2&CCPyCs`@K^rE-}wjszu(iIFqyZCTDXF=W-tBa{(7}5f^g_mvR}Ga|Ks&6<2c&*K!@#a|1VW z6W`)yZsAtG&3E`Nw{bgna3^H1znnVZoOkc`!2mo4_nx=Q}doZmsh$|rhUuioffw2+NyQWp$l8x-naEX zg_gAJ+PPbg);)XfSkR?a`_AFtM_t^dRr4M_n)hGWvt9FUttTX2(5iW_=KB%{K^Vme z5+?e$U6%Ry_J1GqpATDDqf4vS{q7upQP&n7TDR==Z^i#U_QLjm7f{Q;k2`H{^SLeN zww&8)ZtJ;i=C=L6wd(f1y^5Cn@0xX|)W3b||FK<1;#>c;Y=7e8x36RvL_r)R2oeT~ zg2X|RAZd^+NFJmJQUJtb zv7l^FE+`*V2r34Zg33XaplVPps2sqk_@Fm|$!$ zE*Kw72qp%Tg2};@U}`Wem>#?y%n05HW(Ko@H-p*1oM3J+FPI-J2o?s5g2lm-U}>-{ zSRSkhRtBqr)xnxzZLls_A8ZIV2AhJng3ZB}U~BMp@J{e=1SgJB6LYE@9WOTi8A95%vswg}uW*Vc)P{*gqT)4h&xiUkqOg2Zb+(uY`lcSHmIU zYvIsvSU5Z!5snN;g`>kU;n;9oI6j;ZP7Ei7lfx`<5uOZBh3|)_!!zO8 z@PqKf@LYI4ybxXtFNK%GE8*4fTKG};adhhKzWhF^tWhu?(X zhTnzXhd+crhChWrhrfishQEcshkt~BhJW22{9zPDag-oR7$u4lM@gcjQL-p`lp;zQ zrHWEVX`-}Iy6CPbeUu@(JIWYkiZVx8qO4K2D0`G6${FQ~az}ZhywN?;y-~jCzUcla ze^elPASxIYiV8KXNldPjYtzEQuZe>5N( z7`+g^7`+q?ie8Rhi3UfnMnj_4qM^~SXm~Ut8X1j>Mn_|!vC+6_d^91N7)^>MM^mDy z(X?oK^m;TSdLx<{&5GWPW=C_PxzW66ezYK37%hqxM@yom(XwcHv?5v=t%_DhYofK$ zx@dj0A=(&iir$JgM_Zz;(c95G(Yw*MXnV9H+8OPNc1L@n_oBVgzG#1RAUYTwiVjCd zqNCBV=y-G@IvJgc-j7a4XQH#w2hoSox#)a!A-Wh{iY`Z2qN~xh=%eW4=z4S`x*6Sy zK8Ze!K8rq&zKFhzzKXt%zKOn#zKgz(eu#dIeu{pMeu;jKev5vO{)qmJ{<=N&hjA3g zae_EuoG4BlCyA5B$>QX3ia2GQDo!1ziPOgE;=AJXafbNrIAfeC&Kzfnv&Pxt>~W4b zXPhg}9p{Pj#`nbc#`)s=;``(Lae?@OxL{l;E*uw$AB>B}#p2>|iTI(oWLzpP9hZq8 zjvt91jUS84#^vJjafP^ITq&*`SBa~})#B=Ljrj5SiTKI5W?U{6hR<{8Bt9emQ<69vr_K4~bukhsML=;qi!gWIQS! z9gm5}#^d7g@q~C{JSm0-)8gsz>+y{Ejd*4}D}FPc9nXp9#`EI&@q&0^yeM8A zFNv4N%i`tnig;zbDqbD0iPy&K;`Q-{cw@XNek+y~FX8dpA|G%zLmeuIet7M6Pw@~?e#Za=uzwPob*D3pRmcO&! z`)93i=V|1fmqim91~X_=09F+DTzZf0aAW@Z*N*|OSz28xq>UXimSPXYq^f=xq%zGiEnW;w{R=p<~w|s+qj)O zxRblMn|t^k_i`Wi^8gR>5D)VRkMbCg^8`=w6yN7*p5a-3zz=zj=Xrq_d5M>Kg;#lv zAMs;e=MCQEEq=mJ`58aw7yOc6@oRp=Z}}a+=MVgmKk;Y&!e99tf9D_klYjB{j|Bg0 zAtT02z=TZ1#7x4ZOvdC)!IVtJ)J(&)Ovk&Jo*8&IGcpr1GYhja8?!S9b21lmGY|9f z9^T7*ypQ)YKMU{y7Gxn7W)VKfqAbSZEWw9ZlBHOhW%w{3;iG(vWm%5pS%DQLMGrO=WyRkcauqS)5H~X+J`>{Ujnnx$XYdWqG z!IfOa)m+21T*vj?z>VC*x44;GxRr179lpzL+|C``$z9ydJ$#RQxsUsKfCqVqhk1lY zd5p(-f+u;3@AEXz@GL*zhdjsgyugdR#LK+GtGvdK_%W~Z25<5fKjEkRjGyxhe#x)+ zHNWAv{EpxA2mZ*P_%nauul$X_^AG;Xzj*r=!JYWeh%pl|Armn%lQ1chF*#E(B~vjq z(=aX5@h+xk2Hwq#%*4#h!mP~3?99QO%*EWy!@RtQ_c9;vC?8{4mScHVU`1A9WmaKTR%3P6;NyIPPqHR!@hR439oA(% zKF#`Uz=mwZXZS1|^EozQQ#RxCY|a*J$yRL5Hf+mwY|jqt$WH9cF6_!~?9LwS$zJTu zKJ3eW?9Txl$QSq`U*aIX%vU&=uW|@q<4_LcaE{6aN`8W&$Q;A|_@MCS@`vX9}idDyC){re!+b#q`X;yP1)hn3-9amD!k` zIhd2Vn45W+m-p~q=Hq?5pZQsU53nE$u`rA9K^A2(7H0`Q#F8w<(k#P=`3N87V=T*Z zEYAw8$V#ltDy+(Ctj-#IoKNsc)?_U{#oDaHx~#{iS)UEqkd62ZpJih{$0lsbW_+H_ z*@7+Eimlm(ZP||P*?}F|iJjSnUD=J@*@HdVi@n*0ec6xwIe-KC0$=1y9K@IT3J3F5 z4&iGY%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4wX=*Exf4a3*K*P0r>V&gDGL=K?O| zA};0hxSdBhe15Cm-K6*}s`t-Zcd`i=x9QZi`G2`hZ?8KUiud1%&j0)I>vEo!zhe@n zDSsym@&9wq;*_}+=2o0rX>R4Y74Bp*&TG}{U)6cl?F7fxz1nwfU9v>uEHJ?m zirje>1poQ<|4>aAc5dIVb*q0>*MFH{`tK_{<$v8+3&J6PpP8k9UyIvkq;t20eVTXb z-TGMCAu|piy?vK;QP(zYdbaMhV_vTTJLb3P+Vzm068ulKp;fPDy$1ZNHf;S5U8G>H z`t^!bX@2`^_2}~7beCMsFOIGq;_Udvu-uvF@_) zpV9ZP8t^|jE`J{MKXkh1b#DLvb5;M-6tb*l?@qni|GSi;f7vyccmBJSf%*3+>^X6M z*DkH+_v+jAp9yTr#4w&1CYTt-6QkRY3C%6Ndj6}%{I>(O_Wv4}|9fEmr*AR0lb!Eu z37LqAnS@Ds=jsfed_rJ~c0MZ4HDxe0?g??7{rpM!$Dfzqo+`1m{Vii$ zKrM+TQkpSdt3GVh=t*NH)=U^*ll$k__8zv@m|z(b16rqz+8$cxpb{(!7HwE)Owzh; z)Vf)!5)Ue7w60}!YK=eK;x`TmsQ#(R#>mLUi5068#^iZtofs@qi{~s_Y#f+cXdDF5 zgSASn+Lr5$%`JwWU-M(VDVA|aoAsvVtyha_rfC(f*5f96r}5Wnq=mX@WfqyBIhVAzbeG}f(XiusM% zwXMcs3ynEySv(<{BTYuoUA92^TGDE+sa7Ui!pWFuj3teEmQfo}OBE; z(u;cZ8fhF3a~D{~!hl*HPMXow>K0>>+KK4Nm^;T}wKQBGNkt4b{dCEQ$|E3hWFTZL zN&j9lwB{(wI2sEiQxTCej)8?Gu(-uo8dRv4XfZ;;&RU;ZbNxc&*ht6-r{C)_WRS5e zSk$U2QK%aUAflD6S})btFcG0ufJM`ThV=HSvoi}mQ}#(8(sR~6MjdA9q4ms))svWy z#Tv&tVlfg+ztXd}Z8SLK*haOYyr`(ihyPQw-17D7qfkTB`f1R-F`z+Z4a#acHT8&P ztWe8psz;5j9%VFHMl+H$vI@?OYfIF!Xj7tYMbb!wj26o<0%}F`auEiWG(=rE95ox8 zQg&lJ9QGrYb%W92M+7xctGt&cg$+12yh2yI9w zlzr28^;$d7I2G>Ph}h&pdYTi`=*C_FoyYGovBy^e^52FgK-5|u7vBa3Z%KGmJR-^VGYG$V#h*S}G|i-^QJ~z^2)r+M(`Tg6vxP#>d4!&4p=l92EA@huda+RIC6Cn0EHyp1)GJcx)k2}y zJVLLt(2ShW=9p!?K_&NQK(pi8RaA3t1uWxjx9AWA_+Sf)?oQJ(-jOh}fZ=xm-wXIX z3HyS@48IR}6W|XdTpTQ6_(Q;tm@(Q>wN+{*J*;FIex!B^*T))WN6e;FctvBp-q@;^ z@(yl%tac|7Q#_mSC)tpi(WL5YZZtl%jL*~(5nDdm_&mL!Y-qPHEaS_7TGkYa#+s7G zwvh2vdPCVz`|Iq(=^OG(g?u=T!d^;tej5lDtI7$rwY770;~)P7Q7oMBisJJX7eo0T zT5gu_oLRml7DfJ-ipJE^IJTjru{~sbA6Qa~q-baq$&_L5SVD=FjMXJz=FS)tjIAUE ze+U^rIvMs;VFZ7sHL&$ihW!#)LUk352;*0$x5oI5dku+%v5R|#xytxGM(L#OplA9Y zGBy50jPp0Ozi4JuG|l|CoH-GX8UOGsspAsHZaK?N%pzc1Q4GhHgh(YseM|@i0G%0x z51pi1To*}+VstH0f?u3B8sn)kqeUqO?wKb2Ubg`=%K!{pE6SO*f?3PLb&2{|QdDB( z-MECG$U$XZmwjcVM$1Tv4tMn*GOc!)S`lVlV)ddUI5(;t!q6hNJVECw(TQg3r}ekQ z9_dK;?n>u%t;T(=hH}xw8C^iat5Bip4-+)>fZjPlQRGMOM03opteziUkc{~yO5>rS72&A)_7)F5ScvdR+Juw`3 zBqT-g>5YvKy!#U`Pg+e(?Mylnc*GJl#MoqsQOp+C zS(`*ijKA>noqS-!3FFXOVmy8WI$5r>;e03) z69^57iD;nU7nYbrfVM3p4&aVwKM<1%kp?1gD11dx^0y3eU;&FV6ye35p=}R|gLuM! zwB0f7hmbg!N1VV@D9$B^07!4D7}Vt-D!Zu|$@HGa)agXsRS}r5o2WA|is{)Jorxj5 zd??9yBQgtsqj0)QMU5?H69=1l7?H@#Io$D@8O%xjo0)S9Sg<*|R?g%3Tq|pN#8%E% z($VhSN)88-Gjjn((#(Zy<|3lPOqpep4rB6z0f$&Jm=@F)X{3WI08acylB~a7Y}B2D+j$!UVLF#LIvq4iRGX#vw}l zGI0=x;r1?B6?MeN6V?+$I}^r$!{P>FgvHC5Ev~aBuvo*`5{-;{E!K(O)M?oQi`CNl zRM<|f5i3A*jc-EV63zH+TOV;kLn47js@g(8JE|JMVY?tkddCq-W~{)AxHf&x<(uTo z_uP4lMT&|%wNRMET1oHwVkNqkIF2RqZyCfYfa&-BeM`k^>h1DRw#6Fsu-5Ttq*5oK z*Pc=qFh7=xSdgNTHZnYrcTBd_*pHkH(SJ=Ah`a%3w=x6t?YxT+c^9l5aIB9(Lht2H14C# zx9j}`=e+oUl6m|?JjQSTQt35h6KF8*K@#zL{-K-xXBE-_&X8LO+x?H?G2Y@mY{uPdtadC7#D` zp(7Tp6)zAR5-*~WF?)%Cb~5i};IRJ{V)WYoD)Hxh_FDSJE+g#MiEG!aLvWaNWKL!q8d_s&~UwlgZ@I`ua*Rl3zL}hb7 zC%Bzyz5ot$za&POyN%i6IWsQs$_rtRVvGk*Kp1<<5j<#%+Xs(&zF?$*>*^-th%=U>DKcmB<6 zah+|1JO9Dh61y2?cg~P+^n$Kt;ZHKFz@H|rt?9;1C}6RK55Krn$2(XUFOqeMCQ-q!m*gXKlLc0e7Ls$oqD8jlINnnp8AL2qs3`T2-A&kl@;aeRsl<213 z)t5QxO{Hqe-WcjmnD$|jeMv+<8498lAWBFKqef~woVD#Tf)I8XM`^#zZcKgP{y=45 zs#ziz7&kLV0!LtKnC#s!MrG8ulo4~v5Nc${XwdPFM_g@#HI`s1tpBgB#<8Ar$LDw{ zWD{suSXdJQ>M~f9sGAKF6g_|iI6o)zjMWDclFQG7FyuH*%igw$gMmqJO<|eL$aj7h|rpO{u`Ls2Q7)*T>8q*T9vvgz` z(+`t$@}@b1Z{+YY+}92M9Ke~FkE4kR{vb`rAIG4z#8O6O zqDyy#h;HhO(8%oz*3NgWJ(h*SB-F08%ZP#(HZ&EExlyTO9Z_=T*0YWyVnB6z%Y(TM z=;xWc+!YCyuIH=oVHx9HWn>497}@?tIk)aIN>{Fs_BR2A{msM;`{Tp}`$;#lKY`W~ zEsT2YH;8WPw0PG3PhI;33nfXYUHel+2?-O8YyV2A<8egE*}qEaxEd%-!8OSmqSO7s z@!av=ADrNb;vVSV?hmX27GIGM&q=-|Ig#hvq3z^b5}QZtypw!IneGD(9oaL>K47Ly264tZKKYV$MBY!L=Hjsq(M)Xtwp1X&QGz^K; zSkSwN{f7`LJ&HPxz}0!5NyX{D%-n6tz|41Bl#29azAjy1Cf?~y}&xvq?7S2_oM zrN=%As=R)9O3p%md{I1o=3?> zqSSPM0W47aMdFm!zr)Zw`pvNEew0B=N)1&b@B_U=iMybt)BOQQ1y^K+3MkUR;PO22LZ2XB-n2C zd`J|$s!=12+A7oFV?y9W`9(>5Lb=(H_O=nYl)n4!xM{&DGw7w8>SaPbRocnWa=^(rE0y4wH|b43ww+2+%1#b;?Sel2NA| z)G765(Nzxp3o3fs3m&M^pVix^0&PmZhHpYcHx)(6%tikQfFeHD5q3#5p zgxxzk!9MLwu&;N58Ti)c&frPkVc8jmw==^CX9njnvtMT5*GiSWKTxD^HHjj9N3vjC zr`S{a*3cN=^ceQ0?`UE$b&3|!cTAS zFku4usFivj#oc^2 zA59o+cGB}08uB6kuvDyhs#uD#PCvN|wh%$|0;?}TgyF|yNhZ&cVM1s#DR(@XnZI3$ zD$6o=ZdaDu5$0!4M!B1>QJpI+Jz0+-YmnRfV}PVZ4eiJ;&zIK<D7G>AzXdHx zQ-(x}8fl@)-Fy$NBrF%R<7mh($_8$lSOqW_v(*^!Teu$c4dbA#0g@N9<7pO}G@JaF zQ}XYb(Y7lGWIFCtRQm*y;ka4ETu=cgx)aE_(Pk~R((`pW2?UhHS~TSRlZg+`C!5Ln c>(JsOA*0gw^46lYxCadC6VK-Q9dMLW>7t+nkfRIItCYwgWf6rKlDpjPE!XxXK}6TsK+lF4bye)W zcg5btPpqgYs3;b!DE6*=|CzU&Y?1?iX5X8(Gyj=a_jZ0>^vfqj_r~()tA){ssEtWEy_8 zaA@VQk(I-YqfBG2Pt6alLR3}SB(*Ty&|JMDYRn55N1MicpIY2lCxW0wjRn=gU^rgi z5UXzr21}z2_0^G5W1-*UFYqgP_ae1$WvC$zJCjvyW;_&+#=>fTFm_^#u{Z_XG30t! z^)}WUOEiM$SkpMpr;XLdY2!5lVkv-FmYmYLU*~{n9Pd*L8$#hoL(~WcjOED_I`>P2 zQo)9Oe#Kv~Mms?D)(?XltH?_7sy<-oK49I;G*C?N8GSw($nzCh@t%;iy-(-zsS|Vymv{}TOMNLE)24`3x!_f?@v2@V2 zW9*>g$$N^rS>YYZpakSs;$6V*1fK@XR{1jGJ<%O+8+fdC^KCpd0CAE}tJAcIhImi5 z<2AEmIR(1bUoR@XWXK5_@~PQkr*X&$4*B$mCgVH6nT3qAPs-={Z9{V?X0&oKonsp3x{K*N6w~?e14H40%%O0Q96{@V*M%AQ zuD210Tg!JS1FW5&EBOTKMbb(#(~2~5F`U1IoWGQuKcAepOyjal=P!rz>vNp1i3MY; zjVsvs4W@CW+xe^D{MGc@glSyU)_bpoU)N~`TA^uN@6(E_TTYCvE)BMn8aMdVQgz{w z86nIZqRF^%&b5#2-HVwd@2I(pjGJN$jGMI*trUap7Sp)Zr?qd@I%wq>OCG-mL+CcG zQ>)h5RFx*b5)Y%Ot&W?OGOuyF)n~IZE@9k(*WM}NxT`IA@Hna0CzEH(m*M>y2J#-oU}$`0T$(|9~5fF~~9tn~02Pg+Y~ zQK~D9r{M3?899E&`rs8MIM8@jg7=)u;Pa;OLXN=~rNK>XFk!re0RKk;zDxl=LIG|z zjaQriOL-!C6#+hz6`*!72ltw3yzUO}4Fvb51awPVfWCzQ-cAKDhUWQq>?+07H_w>` zwzf6(O~TX{Fy8fPL#x&&x>_H-rj#X&_mKK+rm>v@RonAQQ{PlKx49{%8SnesA^FA! z8X8f%2K=k=$`3XBzep?$hjvJZKWfY2j{(q50_YP0=uHCXQ`7j&257v!UVILO-pN8p z&8vyG7`qszFHGZ0H%wmvrmvIdw(EV+H>UAz_O!CwMr13({Vw@kyKdH=HAM6AFN5le~KV{qe*=6&Wl+A?kE8_c&;`^QA`mH&fkl z9!T=4POr-jGp{-n6Mcw3dJY0T2rW|_j9*v2MPKR%L_ZQ^{|;e+bf9^1pC=9l9f9^w zwyzkF@Sy{f1Mo1#K+;ePgVoJ7k*FAiUIr3}5gSP4twrijjgC86Eqsc5Yo-u(ukpoC*c^RG8;xRTwvjl5W~^4JFCSAbaW~?RGJ;e zNc7^EJH#k7WW9*By<4lWadmEj#?Cq#>3dYimB*go@@z;Xv8rw z4K%rB71K#NAZ8GS^LQi?Ty<7%127YGgg=V}E1a_fn@zr6SkX(&2E*CiiaBHixhq+t z#aq=`)^*q@CKKiRC(Y7|WK&U(A;M4MS#7OT=32Bb(u-xQ0PDpOSM zPf(7IAq7>Z_+*aM(%Sllh9Dmv#1fEaCjOu~3S!-skb7Y}ZO4)(c7E8EJgD0VI}S8c zEXA)+=R9?`^AxLZOeJd>DdK$56&VhYXh0lKB-%TPAX>?Piw6!D%h7^F2oF<)@yiQj zVRLP5G!hflAYhw>4AC}8)Szokd`0QjBSMnMR5^n_*ItiBEy&4crGN68lG6r>I`jam zhKE+6XJoqG)^97*ef2i7tlJ9SHJrrc9i`ngu_BY0o!JJkWo8@El94x&3U8)#JM?By zOwq#h_N{tW$Oh;fa=9Q04q4|ghnAg@5K$0O5HUQcAmSvWRq2d03SuStrZ|DAPC-yT ztRi_;Clq8o=-D+;LNySJ+Es8O$#WIN)nbh4T7l!5SX~CPDNZ70YqDFrLk5eJ(MCE= zW}JdwyF%L86(SWAq%l6ZtX;oJ;#8?|8p#I4>GY73;Thb_INymgi3SYv*Jv~su2HPX zJC%y$-gX@)iM8NKjL%|C+ORqppACvBTA7}U@j0Lq#{D{CZ&U!M_xWBr|uQR z`Wzt|mpo-$1QrS_yQq_yK0q3A-MmTZo7|Tp7uyVIde&lAbFh6f~coU z+jhvZwMqd>UaNYvGocP|6P3$s=`<=ZpY6!yIYOn56IZnD#0FcVkTWxsY$bIYp#KWxGmGd?T&?MX$yCxEZmuELEM#T;BLpj^t6F{ zK#Q2-UOZ@mx{ow4L1COHcvo>h5lry_)8!NeOn~Hrq@n6GuHvS6D8*O&Ugke6`Bd}Z zZ%pyMez`4qMDnQw!GAQx&-3TA{$o4|sXCQMJVF;|+G4eYt_bBE^*HEi`(RC^HWY7& z1y|HJ)rcpER=~G);z??EN%39F=~LjOmeY7Rsx2|a(qRss zHle?SN;Mj(Zf;7{h?hX13nuYDl9Z+PGB-1ebu-c6j6LnXLS210e)SRbQ-~klgc|Wx z;#G8b|EbgKaLFcK!&BOR9j(;rz{Nn1iz9}|hxOfL0-G?76L0g&qF0u7N_|2Gr-vgm5UOKm9f4OTLZtvQ* zfq`9H*9R`G=wEfHJ{a9ry^_@F!clBTD;0q4|NCeB72fz)SlKXU`Uha+rj@)IcDVQu zJ;$3JXq)0AQqT3~VP)!SOkI#kz%XU zHl<6V1XOGrOGz5Dav2F>Rwi`$mH~a-GBDLWCwCxuRVOgywh>j=p7M%MGC{r+7UdAp z?YwqmGo9GXSlyM@Ug^AcCN6FsScnh773{b*cB@iWK&b3*?b)VuAAbNOWZt^6Z0?c0 z8|X+|cV@dUCwf@h-&F?9u_uO};G3csN#&-pH<6J__vt++mj|+kYM-<$`FRjq%$ya| zDLfclU8b%tHB(80$NflCX6+C*U3n-`(skS)-R!m92b$CwATvEMQ)iIO^kJZ4{T)ow z$aEzMA=896WqJtpO)-?olxk;r9YzAG-UAOTpTkppR2nazBP5@|l&dEWh&`Odj|9~e zqwsqeMzlNg>BJunE)qYQ9x{$GGL9pNLVn2gU@WJ<$2d^6831cMHQd>qVD;awM9L-- zoeOA^v@xkLe@?Jje~gV+Nk4IFhNd^5t%0Ch=8$0v=A~YpqVW zvsh_1EA1oNIm8SIzZ_}o=26nkxx`6_Tg7e;ng^=34&fe64R^Tn9eIDjTDqm+P4%JH z?)Q};WeZ4{jW3kO7dgTbOI2d97*t?z3`qlnB_sq4{L}{q$Kq*<YjX_wtSq&c$HcRP(>${aMY zves-XV@jcEqJ_Fz9dFBGnuy@LBWYx9cLZq8b|*^hfQYf8yADl4rG zJCq6SPGDg!g;f&c6CGjc#%i7(%1#1RdV4a9CCp5*Q_#(xc@m&uN;#EOQ9h@U2FfQ$ zeU#7X)HlT$Om?pxXOe)blh>G1)};8(l(QC;c@S7h0=y%~oU+R)u(Lol#o4U2j{<8Y zWk1 z5?|FXvaa5#bf0<^dJZmEv&J>7u@79XC1yZe$1?2V_0q)~h=Mrmy1tReUD-{bA`)zO zZ)P#?Enchj1*KQAXLZ;9sl(~5kODThu~EL%yPccG8oqQ^S$%dY2Xv8tD3H2Qxf23( zJ7?v8stga^g`T{xx!YF6H`KgoGZu+^(4X6}1ARTE?!D;77Kr;uzN9K^4|PB3(g?`B z2z&r!e$`WWRhI|RgRc*ftjy8F5`c}iw!L3`L=K8aL6u^Uu~_2qOtB}>%^nm_f`&oy z6saPwPm>1nN|;k#pP@bud`xx^iswi`)d>s?isw^&XHdKV3JnShAvGxec9Q=hsHWJ& zTKh=;OT@$pk7YRdFU#0B69vdQhdQdy{pc!fj(8O;0DzJ48mr}wjMqU2qHi$UJv81V z3aqB*B>te^N!daZQy**QcBQK|aF~eHmS0Z1ebiQPr#8;U*UpqRkdzf!PLwS8mswl7Bq=E9HtWtS> zM}1R#&t!Lb?I8hGr>vm7en|11^7;{!1O&E7fuB+W&Na!;pn(6DKybqm5G+els_PyAK0>D_p)kk#d>gIiDFRjpuf*a-?Ms&(pJUhS@(UV%=G5xS>tvoMxAe@8mdmYLtXbu z2{?7#Tk`WH|G*UADeFFxPxtQ7KPbib`V0MD+&&&G`IK{~tZ}P>KRt{zh`xDcRLK2U zX_vM9Tcvy5A?VeY#IP(0=Z<4I=%~OEBt&=R;UcxF@mRWY%-w)-!RQLQIUEsVu~9Pvd4L(9?;AuBL77lV0V9s! zp-e2MzL^lmFx@re1YPhV7Zeso%&{rHGvbbu{5iyTHt(&vov5XtnqnC%?ITeEV&WSy zJ?QTkarz3fd}f5piIz@Oh`J0>DqB-U7{qjGNcwYN+Qf_y}1bgQ#57$=$Ss~V4(h2;6bQ2kQ`8_U}53#ghj`Bn6hD!E!$KNhzMvM92MuCHA^#R z+Jye05kbKngQI-{1EPcDJd}S#a0mY?&O5trh~PmJpPi*337?9H4R71Y$NQyldtspg zQBeV%>`(be1-Fchh>nShZ5d+^_n_X6aUR@8*xO}voG;KXt{fN`Hz2NdTozx@7wPv` zH#8`wQ`=x?AXKBU-P0G0?*)6nu5o^TepSEOm-psdDkd&)kb?$TCe8zvwf~i~|CNsm zw%>b@2#4f;vHk6Dc$5S@W1@qjLTx;~_{Mo8FQPWNgJVL&gA(8zeCI{HRa9`h zIFI(jT#>C>MF+>&M5fEeMu$cOM@N^p>5#-ZFe1(u^dN0I1VuXnKt@DHg$IO%b`Fjk zBftyeE z&-vDdbdam71K;ZxTEDYlr3K+@9XPfqgub`ookL^mI52;+>@(ks&a@%9<4GTE^C!ei z60`()PmmQlUf#X{vM6wV)oxIGm$4r z%k*lP6r$ML{VU_(5FoQ{B}1~?ekVySL;k^l9RGGe&VM_=C;5P2+d5*oee&e@$>Vi- zSfJyxqT_4>Vx#6E!dplA+iz?`O9aM-+78FIWF9m+EVQNLEgVM??eXg#^Sp zG+&?-t(DqQa zql-5a3BKLKCVA*Kws)~t=3RcA2l4mzCLOOC@8)9zqdi!>Bl02&1=#_Clwf?k{xqS{ zgVpi6t0ZR#*Qodh`$}$<=vXobBqO0wP*FKE1XB6K$D8P$W7gxUJ)x z(CCg%xezJM?7k=eyvH-x|`xmh@5&)>3BkMUZ zh(=~RaMk4TFC7T^gBoJoXAIaO1pP#K*$%j;5;~tv{Ces{^7wf0S;qqAq&YJ;D+tiB-Yp-7Y>ST8|S9% zxA9?E2<7JDf7sE}A5_R8k)v~2Lb8YC2+0}Z6O!GDN&c3swu!U}l>GX=ZAl`~Z#Dn# zR`EA9tL%`)>=5GLu6dA z2MY^sA8g0*jzmdG0cfIz+wrw+CR*APR*8rym_ONszw!r;=eE^1^dE#(jVY z4$1BA%>Ix;FIGAhVUqv8YH-`dE3F1msf$0S(0ZuS>#db)eq&}O%n)?WGq zyZgky`ZLjO_9hVh{{`;3rzR}%@96pO3GZHIPn|&VztP+cc2BS7c6J{AthrcQTXiCX z{-C}8%$WSnetErsCn>W?_?ic+8UNYQc0yOwL)D749dK||1v_p?${Sg)SGHY!^~i{r zkT_o^q?9ihQo385`>TzXFB2V7c2I1fZ$~E%bc42r{%~^~jw74@8wdJ2k)}`no~Ce1 zJFNT990HuoMSAq)%5TpZ+&a=Wa{*y?rd=-CuOy0L{w*VeoKWp2PvTgjcvXfWz^m~d z+COndXTvt5{}oEvGE*R=kYm$}*s=*HFJ>f)pj|kLjSjX8 zJz)Vcq3si@6@%>9IH8^qQqhjM6ABkzF%7BWLE3h*7w3We+uAao#Y5GrldD329fC$h z#8sWX@!K~${DiAjuWMUE+pqk>n>gk;tu1^`h?FMaauMB*71JyaKN{@7Z(jpfJJ4^$ zJLs$pr=<1NijN_a3Do~c6`Ccor1U>vNqA^STO|GmEcr>MlcKvy3BJ@0jy+6*%EZ`d zQJ`H*s%zJhY&#O}(0s6+(hoa&cw%4C&(CW#a@gyyR61ew|JGc@B+@AS?`Z_twX@$6 zC{YsKmF$TUg78E;=xe_km!*H`32j3D#5~9N#D}s; z`B@_K5_n*Th~Hm-g>?Um1a?;<6*m7Z6=FT8Z3TkEoJ81N$X=F5OzueI+dODZZ=Xcx zX;&6lTfLl3fq&u*y?O0-Iqg3*u7Bp!f&(vq(J3o-UR$o#r_I1o+Jv_#YExqruog{;tL`P;^a;9&)Pi_ zYa#zcVw`LXyw_p67`%bj{HkRLAiSHBP@7leEg+CkK z#3H_zz{!i|e<15`$JmATq++R4T2H8!CX~-ZUb+1okw|PvRyudrbsB>0npk1y;rG&H zyP(7!?_ar~gxzk^-emn@pZoum8~UZ;u>DJa+q(=b*YP_x0zZn=kK&W4)<)_0asRn@ zNs1YMYYzSkD?ilXnN!g0_B-YwwxrsofU+}Vf4ltHGBPaA-O29!Zw!8-)cv^K^rtdP za<}g`Z8eGipvO+=16qc7&rKi^L80OHW<|6+z1{u?ssGDfX0toGogC*oTa?>=G02_5 zndB$SVvkKI6F{K1G?|de|CSrSM)ub>`7iZaoydr}f5QmIPUz#!a!hACNBXy{NFv3L zZu8GUSW+dxyRn#!suGD@?x@qCbdu(tz(aX#du#e{!Vl zS5{rHtMXo}e&x^d=x%S@hySS*{!r|;y+&x1?NyvDIQs}uP+YQ__iuGDNy^<`h0ksa zL%hPfOE`GNZ&Unsbdo*w7TP^%Oh{C4a9n)B-79bIl8Lhf|L?2Y5CD0rk-^ET+g@q; z$u!wPkK>#E<^JH$CeAz0?^0Qtbl%Gr1Mg*H?!6pg{{M&&2Rq#Ol@Paa8l2E1kMr=W z&?Mfwx%1oD{c9mkDz|BXTYuUQ+PHT+A~CUEkXRf^wDl~`>B9!>u+La|SG z70o;Bnga*zO6Q~6@O<5MT^*SDuJ0i={?yk`*xA{bEx7S1AYxa|^VlaMY>+n|?{`>u+K@}knFsQ zdr|TR$QN_!C3ZRem3xKL;HbxN=iv|P5$aI=-$okt`OWXoJ0uUe{(t58t#ZU$b+2-` zSKEwnuW@c`o!dI+w%)!ur2M~6=#yfU@6Lq%t;9q2p8PM(JCVF0xx{nH8W787d>f0s=DtD4Y{ zf&H&DkH4xxbsd^GDPT?K;rCX+!#XLTeL~$o{A0lr9C{_HO8B3}!oB{t&cR*P(H5uB_j_&m zUH$AwPX72ik+VP6(Gq_<9QW?H|2E41%LV@+j(?{x>l}hO3bX3Zl!=W$wjaOM>V&67 z+@0)YJ6o=KK2MwdTKt#$m);FpJGD)iB+cPxUst~t{o+$ycWry>s>?UT@0nMq6QAok zj@|=DI7cr!*zF$9-{1b>9~;p&v}Kzx=ZLIdKc@>8{~r+Sa>92DjNcRXAc2wgVUxtR z7ON^`8Sq-Pj{y+gQz*#~s3e{RaQ|T&HX${q%0!0-6JgB{YmoINza`pYCzE&49C!Au-``t0GU;XERvT6JrfdT;tfbH{VsC}p%w|7uU zfC6ZM0a$izye%A3Lqtr3P=s40n!5Lfb>8HAR~|o$P8ow zvI5zF>_846C*T9*0&)X+fV@CHAU{w5C8UzAPfixB7jJsEzk~#0-}K!AQos3bO1U6oq*0j7oaQ9 z4d@Q^0D1zwfZjkKpfAu5=no741_FbC!N3r}0}KU*0mFe2z(`;eFdB#h#sFi1alm+B z0x%Jn1WX2|08@c!z;s{+FcX*s%m(HFbAfrld|&~v5Lg5(29^Lzfn~sQU<0D#dx3qxe&7Ib5I6)J295wnfn&gN z-~@0II0c*r&H!hDbHI7v0&o$y1Y8EL09S!)z;)mTa1*!%+y?FdcY%Arec%D`5O@SU z2A%*f*i<$0w{tKD1!>9f*PoU255p7=mJxKDZx}=YA_9$7EA}G2Qz>f!AxLgFbkL! z%m!u$bAUNPA21h~8_Wad1@nRV!2)1Gun<@nECLng5PZA2O$W8Ap}Ap48kD-A|VQ* zAqHY04&osJ5+MnaAq7$)4bmY4G9e3cK`Ee=P%0=jlm}-`Jntz0jMBU2r3K}fr>)KpyE&os3cShDh-u^%0lI!@=yh+ zB2)>g3{`=uLcUNns5;~a)qrY3wV>Ki9jGo;52_C}fEq%LpvF)Ws43J8Y7Y5BEua7> z5NZhpLBUWfs5KMIQX( zdO$s)UQlnS57Zax2la;rKm(yc&|qi?rVHRw8Y1G)*_f^I{1pu5mL=sxrSdI&v&9z#!{ zr_eL#IrIX03B7_|LvNtB&^zco^a1(^eS$tiU!bqhH{0}sFa*Oe0;4bn<1hh}Fa^^v z1G6v(^RNJmumsDn0;{kF>#zZvum!u|6mUv76`UGQ1E+=4!Rg@)a7H*2oEgpnXN9xD z+2I^;PS^*|1?PtIzV7L|B8V-R&VK>|c4uiwt2sjdM3%7%#;Al7oj)mL99pH{|C%7}*1?~!WgS*2$ z;GS?VxHsGf?hE&W`@;j^f$$)BFgyhIz(e6-@NjqpJQ5xSkA~ymG4NP;96TPL08fM` z!IR-B@KksjJRP0^&xB{ev*9`LTzDQlA6@`2gcre!;U(}=cp1DLUIDL!SHY{{HSk(^ z9lRdi0B?jh!JFYN@K$&mydB;F?}T^3yWu_XUU(n8A3gvdgb%@o;Un--_!xW~J^`PE zPr;|*Gw@mX9DE+W0AGYJ!I$AH@KyL4d>y_4--K_$x8XbRUHBe+AASHogdf3=;V1A@ z_!;~hegVIPU%{{8H}G5d9sC~t0Dpu(!JpwT@K^Yo?f5|iLSO_zPy|D8gg{7yLTH3R zScF4(L_kDDLS#fiR768`#6V2MLR?4+BqfpxNsXjI(jw`Q^hgFIBa#WpjATKwBH57a zNDd??;)CQuawB<=yhuJIKT-fGh!jE!BSny+NHL^1QUWQ7ltM})WstH+Iix&N0jY>o zLMkIwkgA9;QVpq&_#rirnn*3AHc|(vi_}BvBMp#-NF$^%(gbOWG((yr{zwZX00~4| zB0)$n(h6yfgdm}a8)<`tA>l{_5{a}$+96R$G!lcvBJGh5NJpd-(i!Q3bVa%$-H{$h zPox*p8|j1eMfxHAkpaj+WDqhK8G?9_p~x^~I5GkmiHt%Kvp8Fkk!ZcKwcuRkk`l?y0pt;c8XdW~#nh(v77C;N4h0wxi5ws{;3@wh9Kue;f(9&obv@BW)^c0 zqZ`nT=q7YCx&_^eZbP@DJJ6lzE_64#2i=SAL-(Tx(1Yk9^e}n^J&GPfkE18hljte* zGHTnj9i@rnOqaVva@ zjK>5_#3W3{6imf5OvenOB(rNB~Rsj$>o8Z0fA4oi<^z%pW)u*_H%EGw1`%Z}y1 za$-JME-W{e2g{4)!}4PVu!2}2tT0vtD~c7vien|Pl2|FMG*$*HiK9SS747 zRt2ky`C`?u>X;u^1FMPE!fIo6u)0`1tUlHNYlt<%8e>hcrdTtqIp&YGzyh#9tR)tN z1!JwS)>sG@in*~iSQr+LMPQLwTdW-xg+*gASS;2a>wtB{I$@o$E?8Hr8`d4`f%U|C zVZE_FSYNCk)*l;y4a5dvgRvo)2OEkF!-iucu#wm(Y%~^!jlsrZf z!KPxIV#~1Q*a~bVwhCK~t-;n}>#+6M z25cj?3EPZq!M0-CuD4!LDN0u?U>#yN%t!?qc__``82QA@&G+j6K1gV$ZPW z*bD3>_6mEAy}{mM@38mS2kayE3HywF!MGadcnQ2DUJ5Ubm%+>8oFcs0B_?uXaFYvQ%=+IStjE?y6>k2k;@;*IdecoV!S-VAS!`{OO}06Y+Hi3j1q zcq_a$9)gGBZoCa1hKJ)3cqHByZ-+`tdFWKjUBUulP3{AV2~lU;-gf0wZvOAV`8DXo4YFf+KiB zAVfkUWI`cSLL+p-AWXs{Tto^YC6S6qO{5{x66uKaLIDic+R zs)R35ji^re5jBXKL@lB=QHQ8Y)FbK>4Ty$BBcd_UglI}MBbpQbL<=H-2qan(K}0ao zifB!Q5TS&dXhVb%;Y0)xNwg)}5m7`m5ktfh?THRVN1_wandm}vCAtyai5^5xq8HJd z=tJ}+`Vsw!0mMLJ5HXk-LU@Rw#4utwF@hLLj3P!8al{y6EHRE4PfQ>t5|fC@#1vvG zF^!l`%phhGvxwQm9AYjpkC;y^AQlpfh{ePbVkxnVSWc`URuZd-)x;WNEwPSRPi!DI z5}Sz4#1>*Jv5nYH>>zd$yNKPy9%3)CkJwKfAPy3Th{MDY;wW*9I8K}(P7L`amxNSq`{lB7tQWJs3eNS+i( zk(5Z8R7jQ7NS!oDle9<|nSxA7rXo|5X~?u>Ix;<(fy_u|A~Ta&$gE^GGCP@r%t`u? zxyamP9x^YPkIYXNAPbU($iie1vM5=MEKZgnOOmC?(qtL3ELn~$PgWo+l9kBHWEHY1 z=}T54tCN0Y4YDR#i>yu7A?uR$$ogahvLV@sY)m#Go084Q=A=K_f(#%7$(Cdg8BDe! zTazJVDCs8KkYQvv89_#pZOL|I6d6s%kg;TYvIE(X>_m1ZyO3SUZe(||2icSCMfN89 zkbTL1WPfr1IglJg4km|?9&#u-j2upmAV-p;$kAjRIffidjw8pD6Ud3=Byuu2g`7%G zBd3!y$eH9UayB`KoJ-Cl=aUP_h2$b~F}Z|XN-iUplPk!TlP}1Z zBLNDeAPS}s3Z*a#rwEFqD2k>SilsP;rvyr* zBub_fN~JVPrwq!ZEXqZtpi)w)sMJ&%DlL_cN>62=GE$kS%v2UCE0vAPPUWC-Qa)5J zDmRsf%1h;=@>2za@@Fja&qN)@AuQzfX9R4J-7RfZ}{m7~g26{w0-C8{!2g{n&V zQq`#Hlpj@ts!7$NYEyNnx>P-?KGlF~NHwAwQ%$I*R5PkMnoLchrc%?W z>C_BrCN+ziP0gX^QuC_ids#rq1ICCsP)taY9qCY z+DvVswo==u?bHrxC$)>(P3@ufQv0a=)B)-sb%;7l9ifg=$Ef4f3F;(uiaJf5q0Un0 zsPohX>LPWCx=dZ6u2R>i>(mYECUuLtP2HjHQunC))C1}v^@w^*J)xdb!r3+g5H zih51Gq25yOsQ1(d>Lc}u`b>SHzEa;PfCg!ZhG~RGX^h5cf+lH-rfG&|X^!S;ffi|r zmT84nX^qxtgEnc4cF`&5lyoXOHJye|OQ)mL(;4WDbS648orTUyXQQ*zIq00U51ot7 zP3NKW()sB8bOE{`U5GAB7om&N#pvR63A!X*iY`r;q07?c=<;+0x*}bPu1r^0Weix)0r# z?nn2h2haoQLG)mH2<@SV(!=QC^ay$+J&GPp$I)ZxvGh24JUxM)NKc|C(^KfF^fY=p zJ%gS}&!T73bLhGBJbFI8fL=&1q8HOk=%w^BdO5v6r9P1|}nuiOI}lVX`vWnCwgrCMV;=LRJ|;g?fGNlnVhS@wn4(NErZ`iADan*#N;74cvP?OqJX3+G$W&q~GgX+Xj4xA- zsm}N@HJF-AEv7b8hpEfdW9l;vn1)OvrZLlmY05NXnlt`P3nqXGWLh#oOfb`mY0ZQ% zp^Tep!-O&6Oav3jv}M{cQA{)w!^AS}nGQ@xrW4bd>B4knx-s3E9!yWB7t@>R!}MkP zG5whV%s^%kGng5|c$lHgFlIP2f*Hw-Vn#D@%ot`YGmaV0OkgH5lbFfO6lN+jjhW8O zU}iG2nAyx6W-c?2na?a>7BY*N#mo|BDYJ}O&a7ZoGOL)?%o=7bvyNHMY+yDro0!ea z7G^86joHrZV0JRQnBB}CW-qgk+0Ptc4l;+B!^{!pD07TC&YWOQGN+i+%o*k^bB;OB zTwpFTmzc}U73L~)jk(U;U~V$EnA^-9<}P!Oxz9Xc9x{)Z$IKJvDf5hZ&b(k=GOw7| z%p2w{^NxAXd|*B@pP0|g7v?MTjR9DYg;e$@;Ll*xYO$ zHZPlx&CeEK3$lgS!fX+?C|isz&X!$3IO`fLNXA=`*;%r;@0vd!4$tUud=4PXP=mTV9k%(h}%vmtCK z>t@@qVQe@X!A7!e*>-Fc8_mYBv21&`1KW}9#CB%8uwB`1Y>_qCyM$fJE@PLoE7+CnDt0xyhF#09W7o4A*p2Kab~C$$ z-O6rbx3fFgo$M}lH@k=3%kE?Mvj^CN>>>6rdxSm89%GNQC)kthDfTpbhCR!kW6!e} z*o*8X_A+~gy~?8Iu`-FYUK4YJ=FW8stEA}<} zhJDMvW8bqM*pKWd_A~p1{mOo00S@FK4(1RJ2;a+$cyTox`XmyOHL<=}F1K3py?H`)2p7t^ zxi(xF7tTd+kz8A@9T&w#b1_^j*PiRZb>up6ow+VtSFRh^o$JB%N* z8^8_Z262PAA)JRB$_?X&b0fHs+$e4|7srj^#&YAh@!SM%A~%Vf%uV5@a?`l!+zf6e zH;bFi&Ee*9^SJrk0&XF%zfd$a^EN0lpw#h%d|+;fwOc_~LvCz9e6YFU^=cIDdja$)Dm+^Jn<8{5k$Se}TWq zU*a$GSNN;^HU2t(gTKk&;&1bJ_`Cc){yzVJf5<=LAM;Q6r~EViIsbxx$-m-X^KbaK z{5$?V|AGI=f8sy$U-+;5Hy#i`0TN&V5l{gWaDfm=ff8te5m_QG9r{E*x5^@W9guFsN zA-_;SC@2&X3JXPqqCzpDxKKhUDU=dQ3uT0|LOG$lP(i3DR1zu+RfMX7uTV{>F8B#G zgqlJvp|(&*s4LVH>I)5ohC(BuvCu?lDl`+C3;se2AwUQeS_(lzu+U0qErbZ6f?H@K zgbCq7gb*pT71{|=LbMPg#0u?&4njwvlh9e{B6JnH3EhPrLQkQW&|Byu^cDIE{e=O- zKw*$DSQsLBgrUMPVYo0t7%7YrMhkJm7-6h1P8cst5GD$fgvr7bVX81qm@do^W(u=} z*}@!Qt}suSFDwui3X6or!V+PruuNDktPoZTtAy3U8ey%lPFOE&5H<>%gw4VhVXLrB z*e>i4b_%6jy_iAFC}t8fi&@01Vm2|mm_y7d`iQy2++rRvub5BF zFBT9BiiO0&ViB>ZSWGM~mJmydrNq)=8L_NbPAo505G#t6#L8k7v8w1RRuij>eqs%= zrdUg?E!Gk1iuJ_$Vgs?E*hp+FHW8bO&BW%Szt}`qQ|u-77W;^O#eQOcaez2b z93&1Fhln0=s5nd}L|iH^6PJrC#FgSIakaQcTq~{<*NYp(jp8P8v$#dvDsB_E zi#x=f;x2KwxJTS8?i2Tm2gHNoA@Q(yL_8`U6OW50#FOGF@w9kGJS(0P&x;qti{d5m zvUo+jDqa(>i#NoZ;w|yEct^Y|-V^VO55$M!Bk{5LM0_ef6Q7GO#FyeL@wNCyd@H^a z--{o_kK!lsv-m~)Dt;3I36vlSmJkV*FbS6kiIgabmKceZIEj}8Nt7f>mJ~^qG)b2X z$&@U~C8dy3N~xsOQW`0(luk-7WsovTnWW587AdQgP0B9ika9{sQZ6aClt;=d<&*MD z1*C#fA*rxbL@Fv3lZs0vq>@r8skBr^Dl3(f%1afbic%%1vQ$N?D)~y)r0SBNR70vM z)skvUb)>pdJ*mFbKx!y8k{U}*q^43csk!7YwU7d&K&hn^Bn3;Yq}Ec16e_u;Hd2@r zE=5R@Qd_B=6eUGVF;cA5Ug{uqlsZYBr7lueshiYY>LK-%dP%*dK2l$)pVVI(APtlT zNrR;!l1CaU4U>jTBczeiC~345CykNDO5>#Q(gbOuG)bB)O_8Qb)1>Lr3~8n`OPVds zk>*PCr1{bUX`!@8S}ZM*mP*T{<7aB-IxHQLj!MU*7n#UdMrJWo=VT8=h6%5rSwXAExnQ6O7Eoi z(g*3I^hx?GeUZLO-y}c=Wk`l)L`G#y#$`e#WlE-HMrLJB=4C+^Wl5H0MOI}^)@4IB zWlMI+Ddd!LDmk^BMoufIlhex?CA1LZ;T zV0nn_k%!8|=v=XDlD(#gHN=K!W(pl-EbXB@3-IX3nPoy2?Wsovh z8KQWUp~^62xH3W+sfw$E-IIl%gPny zs&Y-auG~;=Dz}u|${ppda! zJ}RG-&&n6&tMW|&R8WOfSVdG+#Z+7+R8pl>T4hvLt7+7Pb&S=6j*HZ{ANL(QrBsJYbKY92MOnorHI7ElYS zh19}o5w)mVOf9aKP)n+%)Y57hwX9lBEw5HkE2@>$%4!w0s_Ls&Q>&|fY7MofT1&01 z)=}%K_0;-m1GS;rNNub(QJbpG)aI(c+CmLb1J#ylkQ%JEQd_GbYN+Z~+o)k`xEi5G zs%_PFYLptS#;CDsd$ohwQSGF5R=cQO)oyBcwTIeM?WOis`>1`@erkVpfI3heqz+bx zs2+8wI!qm|j!;LcqtwxAoH|AwtBzC0s}t0T>LhitIz^qTPE)6=Gt`;tEOoXzN1dzA zQ|GG-)P?FIb+NicU8*iqm#Zt(mFg;WwYo-KtFBYms~gmf>Lzuwx<%cpZd13bJJg-( zE_JuMN8PLLQ}?R})Pw3F^{{$GJ*pm4kELvBEdPTje zUQ@5DH`JTzE%ml~N4=}wQ}3$})Q9RL^|AUyeX2fFpQ|s_m+C9^wfaVVtG-j;s~^;l z>L>NH`bGV!ep3Mr)F2Jk5DnEZ4c7>b)F_SC7>(69jn@QC)Fe&T6iwAMP1g*~)GW=V zrO;ApskGEu8ZE7sPD`(4&@yV7w9Hx-EvuGI%dX|na%w(WE-kl~N6V|_)ADNtw1Qe8 zt*}-^E2YATcL#wIP(rRmU zw7Ob7t-jVkYp6BS8f#6ordl(tx#q96&;qnTt)&*E1#7Le)>?=bs=2i`T9_8DMQD*) zTdkcIrA2EoTCCPy>!5YiI%%D?E?QTuo7P?Hq4m^yX}z^RT3@Z7)?XW-4b%o{gS8== zM;odQ(}rs!w2|5)q(WYwCwCUOmZKgI$o2|{!=4$h_ z`Pu?)p|(g{tS!-&YRk0c+6rx@wn|&At$LUS25qCZN!zS#(Y9*awC&msZKt+N z+pX=<_GpAqCx{sbq&#mXt^XmEZ{CWYspk7EX ztQXOX>c#ZpdI`OxUP>>mm(k1W<@EA;1-+tPNw2I|(W~mddNsYe?x)w#YwET1+Ik(m zu3k^CuQ$*e>W%cqdK0~=-b`<<`|BLA;(Rz#?tGCxX=pFS=dS|_h-c|3Wch`I9J@sCCZ@rJ+SMR6y*9YhW^+EbzeTeSS zhw8)h;ra-Dq&`X?t;gwO^s)LleY`$FpQumLC+k!6srod1x;{gnsn619>vQzE`aFHU zzCd57FVYw5OZ27sGJUzeLSLz`(pT$i^tJjreZ9Ux->7fWH|tyUt@<{7yS_u;sqfNv z>wEOQ`aXTXen3B{AJPx&NA#omG5xrHLO-dW(ogGW^t1Xo{k(obzo=i*FY8zItNJzl zx_(2yso&CX>v#0K`aS)={y=}IKhhuTPxPnyGyS>#LVu~h(qHRu^tbvu{k{G{|EPb` zKkHxgulhF~FhBz`U;{Bw12b@gFi3+kXoE3WgEM$TFhoN#WJ57jLo;;4FigWTTt*5b zrIE@=ZKN^M8tIJmMg}9Jk;%wxWHGWD*^KN)4kM@GW8^Y&8+nYpMm{6IQNSo@6fz1M zMU0|GF{8Lq!YFB!GD;g|jIu^Kqr6eUsAyC&DjQXds)nyo&8Tkp88wWWMlGYZQOBrj z)HCWE4UC3HBcrj=#As?XGnyOzMhhdr2sBz6K}N9A%4ltb7@>ySXk&yK;YNfJX|y%k z8Bs>G5o5#}?TrpbN28O`+2~?)HM$wyjUGl%qnFX!=wtLX`WgL=0meXMkTKX8Vt9<9 z#xP^JF~S&Wj50yG#uj6%vCY_S>@ap3yNun& z9%HYu&)9DqFb*1rjKjteAgOxz?)(xgnOx-k0)3i*NnZitI zrZQ8TY0R`{Iy1eQ!OUo8GBcZ5%&cZMGrO6?%xU_Vxy;;V9y70*&&+QYFbkT6%)(|7 zv#43jEN+%COPZz3(q1$RqtDAmi4YQ_M%dBnIG3%Q3 z%=%^nv!U6@Y-~0$o0`qc=BB^d!VEA2&6Z}68Em#PTbm(fsOdJ_m||}N}yO>?gZf1A0huPEYW%f4vn0?KDW`A>lInW$r4mO9F9&@NU z%p7ixFh`oB%+Y3?ImR4ojx)!b6U>R`By+Mk#hhwRGpCy~%$epabGA9hoNLZA=bH=6 zh2|o2vAM)tYA!REn=8zf<|=cwxyD>;t~1x08_bR7CUdj7#oTIcGq;;N%$?>gbGNz2 z+-vSL_nQaIgXSUguzAEhY92F>nGq0OB z%$w#d^R{`%yldVw@0$>P0f>t4`uvNq= zY8A7JTP3WLRw=8rRmLi7m9xrQ6|9O@C9ASk#j0xgTGg!TmY-F_s%h1-YFl-zx>h}_ zzSY2LXf?7LTTQH{Rx_))Iv<6v&ts$1j8fp!* zhFc@7k=7_{v=wKKvBp~CtntDCNurZvl&ZOyUfTJx;=)&gsx zwa8j*EwPqb%dF+r3Tvgc%35u$vDRAato7CgYooQv+H7sHwp!b)?bZ%!r?t!4ZSAr4 zTKla1)&c9Fb;vqw9kGsD$E@Sl3G1YF$~tYGvCdlOtn=0d>!NkZx@=vsu3Fcu>(&kH zrgh7@ZQZf%TKBB`)&uLI^~ic`J+Yoz&#dRx3+tuz%6e_RvEEwmtoPOj>!bC_`fPo% zzFOZbzy-P>7wke@s0(x9F2Y5+C>QNwT&#<8@h-t7x+ItEQe3J_bLlR_Wx6bv%ay{F z(v`}U+LgwY)|Jkc-j%_X(Ur-S*_FkW)s@Ya-Ic?Y)8*sJ<;v~KEV~jP%8RLx!#zbS1G1-`6Of{w%(~TL%Ok96~;|6UIs7lyTZPW1Kb48Rv})#zo_jaoM$cw#&?o*B=L7sgBDmGRnmW4txq8Sjk`#z*6m@!9xd zd^Nrq-;E!}Pve*I+xTPrH6oZ1%}8csGm072jAlkRW0*0`AT!tuF+zzsncd7`<}`Dexy?LgUNfJW-z;DjGz*!9%_3$|vzS@jEMb;3OPQt3 zGGvGAS>3E*)--FGwaq$aU9+B9-)vwuG#i(V0JV+nVropW>>SD+1>16_B4B$z0E#mU$dVXZuU0^m;=p0 z=3sM(In*3x4mU@bBh69fXmgA?)*NS!Hz$}A%}M5DbBa0DoMuipXP7h1S>|kWjyczy zXU;blm}XYMx-m

6=3(=QdDJ{+9yd>zC(TplY4ePE);wpPH!qkM%}eHG^NM-Z zyk=fEZP~kNMY(U`4bdS&^+MR#YpR72S$q#k7K~U@OE5wPIPZ ztvFU(E1nhKN?;|l5?P6@Bvw)@nPpg}Wm&?qEonKHYk8Kkd`nwlR&pzamC{OOrMA*o zX{~ftdMksK(aL0Hwz61Rt!!3yD~FZS%4OxY@>qGTd{%y|fK|{cWEHlGSVgU3R&lF@ zRnjVDmA1-QWvy~nd8>j|(W+!swyIcFt!h?vtAR5HHdRBd_fz{AzWHq*$ zSWT^FR&%R`)zWHZwYJ(=ZLM}zd#i)h(duM%wz^ndt!`F#tB2Lo>Sgt|`dEFfepa~E z-x^>Iv<6v&ts&M>YnV0M8exsJMp>h+G1gdXoHgE>U`@0pS(B|P)>LbnHQkzF&9r7& zv#mMSTx*^+-&$ZTv=&*5ttHk{Ynip&T4AlUR#~g9HP%{doweTDU~RNES(~jb)>dnq zwcXlb?X-4TyRALeUTdGV-#TC&v<_K^ts~Y^>zH-iI$@o(PFbg|GuBz_oORy1U|qB> zS(mLV)>Z48b=|sQ-L!65x2-$YUF)88-+Ev@v>sWHttZw~>zVc3dSSh^URkfLH`ZJ0 zo%P=OV12YcS)Z*h)>rGB_1*em{j`2rzpX#kUn_!$C?bi-B8rGAqKW7thKMPGM6d`E zp(2)uE#io{BA$pZ5{QH%kw`3(h@>K!FoY>AA%rcYaD*#7p@c8A2ouRg3XxKz5~)QR zkyfM==|u*SQDhRCMHZ1&WE0s%4v|yj61hblkyqps`9%RyP!tk{MG;X{6cfco2~kp% z5~W2MQC5@_Yobz;5PAU29kVzbyHwu)_HyVxOiid|y2*dz9eePX{j zAP$N{;;=X(j*4UAxHutBic{jWI3v!AbK<mR&$Z{-^X&!pLVJ*gj$(wU61y?GyG%`;>j!K4YJ?&)Mhg3-(3(l6~2}Vqdkd+1KqG_D%biecQfc z-?i`A_w5JvL;I2a*nVO^wV&C~?HBe-`<4CLeq+D2-`Vf&5B5j2N=LfVlS=wh%P^T-rjRLRDw$fQk!fW*nOREk#%J~Szk7g4P_(QST>POWi#1awva7lE7@AMk!@u=*1b*F|?)2ZducIr5F zoqA4vr-9SZY2-9^nmA3JW=?abh11e$<+OI%IBlJFPJ5?=)6wbVbauKpU7c=Dcc+Kb z)9K~(cKSGdoqkTZ)885340HxLgPkGHP-mDk+!^7FbVfO&oiWZ>XPh(Mncz%xCOMOx zDb7@9nls&*;mmYqIkTNP&Rl1nGv8U@EOZt*i=8FTQfHa7+*#qQbXGa5oi)x{XPvX& z+2Cw+HaVM}EzVYFo3q{7;p}vFIlG-b&R%Dqv)?)39CQvjhn*wNQRkR*+&ST#bWS;^ zoiol^=bUrix!_!ME;*N-E6!EtnseQ`;oNj?Ik%lV&Ryr8bKiO3Jais8kDVvZQ|FoU z+$%GHUG0Xs$=wugN;j38+D+r8bRX7tGU(P8g5Ou zmRsAcub=$e^-41R?x0Bo1?c#QI zySd%n9&S&!m)qOz(vCkGMzOWA1VH zgnQCG<(_uWxM$sS?s@lud(pk*UUsjzSKVvwb@zsQ)4k>1cJH`%-FxnR_ksJ+edIoN zpSVxmXYO-$P4yDyihNe7u$>D#r5KO@x26ILNAe**h}Ii^^$ppXL^<= zJlm6=Cgy+U4LuZUOFE9MpVN_Zu`QeJ7Vj91nx=au&=con@$US+R}SJkWL zRrhLmHN9G1ZLf}3*Q@8%_ZoN&y+&SRuZh>xYvwigT6itJR$gnbjn~#|=e74bcpbe? zUT3e1*VXIhb@zIBJ-uFDZ?BKn*X!qnd;Pru-av1VH`p8E4fTe3!@UvSNNDYx7b_aE%lap%e@ue zN^h07+FRqT_11aoy$#+*Zci21P z9rccR$GsEYN$-?*+B@T&_0D&%GDkOYfEU+I!=@_1<~!y${|;@00i0`{I4|zIorhAKp*zm-pNI zT~#;L zUG-2sRWH?B^-+CQKNYU}s{v}D8l(oRA!?`^riQB#YNQ&aMyoMutQx1rs|jkNnxrPH zDQc>krlzYIYNnc{W~(`Bu9~Ojs|9MITBH`MC2FZ!rk1M}YNcAGR;x8?ty-tns|{+S z+N3tCEo!UUrnajcYNy(zcB?&VuiB^fs{`twI;0M(BkHI+rjDx<>ZCfQPOCHOtU9O8 zs|)I)x}+|vE9$Dcrmm|S>ZZD-ZmT=$uDYl0s|V_#dZZq!C+ewsrk<-8>ZN+6UaL3i zt$L^4s}Jg<`lLRqFY2rMroO8m>Zkgpeycy~uZrMD^dtF^{V0A^KbjxikKxDkgZyAW z#1Hjj`LX>teq2AEAKy>lC-f8fiTxygQa_n*_@;09!nb|tJHG3CzVdxv`(b`^KZT#t zPvxif)A(uqbbfk2gP+mQJU*Y@l9b^UsNeZPU< z&~M~7_M7-k{bqi1zlGn@Z{@f4+xTt$c7A)mgWu8btNk_pT7R9t-rwMF^f&pN z{Vo1hf1AJE-{J4{clo>hJ^o&QpTFNf;2-o4`G@@@{!#y!f80OepY%`pr~NbjS^u1W z-oM~q^e_3B{VV=e|C)c@zv18XZ~3?VJN{k&o`2te;6L;q`H%f4{!{;%|J;A!zw}@E zul+avTmPN^-v8i#^gsEZ{V)Dk|C|5a|Kb1ifBC=tKmK1of{v&o>Bu^Yj;f>S=sJdu zse^Q|4$+}HmX59C=(swbj;|BwggTKueA=-$#n{y zQm4|XbsC*kr_w(`Q|HpTbsn8p=hOLh0bNiR(uH*qT~rs- z#dQf?QkT-Dbs1e&m(%5S1zk~B(v@`;T~$}p)pZSBQ`ge9bsb$-*VFZN1Km(J(v5W! z-BdTz&2Op$29-@cpVS2b8p-1XbdbA#+$LeuPdRCo}#DfX?nVzp=at@dbXaU=jwTS zzFwdg>P337UZR)kWqP?@p;zivdbM7o*Xnh8z22ZV>P>pH-lDhaZF;-jp?B(Cdbi%A z_v(FmzdoQ3>O=alKBAB6WBRy0p-<{l`m{cy&+2pfyuP3>>Pz~vzM`+{Yx=srp>OJ2 z`nJBK@9KN{zJ8z|>PPyqexjf1XZpE*pQDN!{-VF?Z~D9b zp?~UM`nUe0|LO=~5yK*dMGlJ+7BwtdSoE+MVKKvk!h*v>!a~Dhg~bm0_h{t*mIeMF zi>Cjco%`Pv!-C=2|2sbz;r||0{^yF_zx{Ix@!$SAJo<0{+z$M=|KE;R5Wg3UAsC9W zFgC`)e@-y}TRe=92{0ih!o>LB4b7x{Cqn~Gv{0ap5*>8WLxnzS48#Anv{LY$5>sJn zOoM4L9j3<&m=QB!X3Tp5^R>vAx6Ki2@tb=v29@fVO*bp0GV{C#= zu^BeU7T6M7VQXxIZLuA;#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ABJOp9DoCH5Dvy6 zI24EBa2$anaTJcmF*p{-;dq>Y6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6h zxD=P+a$JEcaTTt{HMkbn;d@fE(tH~1Fc;d}gmAMq1@#xM94zu|ZMfj{vV{>DG}7yoJeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4<5?014SQV>bb*zCk zu@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wofi1BWw#GKt7TaNa?0_Ay6L!Wf7}zfg)D631 z4-D+Z2kM2rF)&jUs4w=zaO{r*a3BuC!8inm;xHVJBXA^+!qGSe$Kp5~j}verPQuAJ z1*hUPoQ^YaCeFgyI0xtAJe-dUa3LSeNC+@=CxCi&*KHQH7@E{(-!*~Rb;xRmqC-5Ym!qa#L&*C{ej~DPFUc$?G z1+U^YypA{UCf>r^cn9y|J-m+(@F70J$M^)F;xl}XFYqP4!q@l)-{L!bk00F!wSOQC8 zU{N7ZX)J?fu^g7izyM;Pz%E3fN>~{KyN!XWVl}LeHLxbu!rE8|>ta2uj}5RPHp0f( z1e;t7=sX#5U6}HAU*cRJid+dN6u@iR2F4z?V`xt?`V_*p*P)`gjj|J+DeXuX~ z!*J}618^V?!ofHMhvG0Cjw5g+j>6G62FKz!9FG%lB2L1|I0dKTG@Onza3;>e**FL1 z;yj#>3veMW!o|1*m*O&9jw^5_uEN#02G`;`T#p-YBW}XYxCOW3Hr$Roa3}7<-M9z$ z;y&Du2k;;s!ozq3kK!>rjwkRWp2E|32G8O-JdYRfB3{DFcm=QGHN1{D@Fw2E+js}> z;yt{N5AY#A!pHaopW-uojxX>fzQWh|2H)a4e2*XSBYwiq_yxb>H~fx2@F)Jl-}ndr z;y;5j{}vGgqw0YIlj?z@U{s8Tfi3nxF)$_uVK9bZC8WLxnzS48!D@0#jltOpR$UEvCctm;p0lCJan{2Fik2 zF&hR}%md}XoR|x9V;;yqV zBX+{h*af>{H|&mq{l7pxu^0BnKG+xgVL0~30XPr`;b0tsLva`m#}POZN8xB3gJW?V zj>ic&5hvkfoPtwv8cxRcnl zHU@^<1I5L77#|a0LQI5-F$pHcWN4s?77DacqJu7asL)4^VVE3KU`kAdsWAp5^R>vAx6Ki2@tb=v29@fVO*bp0GV{C#=u^BeU7T6M7VQXxI zZLuA;#}3#LJ7H(+f?cs2cE=vr6MJEA?1O!=ABJOp9DoCH5Dvy6I24EBa2$anaTJcm zF*p{-;dq>Y6LAtw#wj=zr{Q#*firOy&c-=77w6%8T!0I45iZ6hxD=P+a$JEcaTTt{ zHMkbn;d@fE(t zH~1Fc;d}gmAMq1@#xM94zu|ZMfj{vV{>DG}7ylWP{kMo1ST7G08KYoSjE2!MFlrts zCI(?JhF~ZLw$1~^#yA)k<6(RZOrQr!h>0*UCc&hb3=K5VLV-3)bkIc)75b<#43lFD zOo^#5HKxI|m=4op2F!?=Ff(Sste6e6V-C!TxiB~8!MvCc^J4)lh=s5)7Qv!e42xq4 zEQzJCG?u}#SPla#?tv;`MXZE@x%WU-uqsx=>R1D7VlAwVb+9hh!}{0&8)74Dj7_j9 zHpAxF0$XA$Y>k0g@<45|9k#~~*bzHnXY7Jqu^V>B9@rCmVQ&l!rw8hb{V*K+;{Y6p zgK#ho!J#+|hvNtwiKB2dj=`}w4#(pJoQRWfGETv%I1Q)c44jFxa5m1txi}B!;{sfW zi*PY6!KJtim*WatiK}omuEDjq4%g!b+=!cSGj74HxDB`C4%~^ma5wJ3y|@qe;{iN~ zhwv~S!J~K#kK+kEiKp;1p24$t4$tESyoi_ZGG4)}cnz=P4ZMlB@HXDTyLb=p;{$w% zkMJ=*!Ke5PpW_RBiLdZAzQMQn4&UPk{D`0MGk(FZ_zl0~5B!P0@HhU!zxdCT{J%xS zNEjKTU{s8T(J=ct8Ud)I2u>cmtLRc7! zU{NfF#jymI#8Oxq%V1e7hvl&XR>VqJ8LMDbtcKOG2G+z{SR3nLU95-ou>m&3M%Wme zU{h>{&9Mcx#8%iE+hAL4hwZTgcEnED8M|Ot?1tU32lm8X*cO}p7vo`kOn?b75hlhYm=u$tfhJlg&_;<4y6B-oA2o(y za!i3KF%_o9G?*6CVS3Df88H)P#w?f>vtf43fjKc3=Egjj7xQ6$EPw^E5EjNFSQLw4 zaV&u)u@siZGFTSNVR@{86|oXl#wu79t6_Dlfi{5Fg=Ve1cE$ z89v7s_!3{?YkY%m@g2U$5BL#3;b;7UU-27$#~=6;f8lTZgMTqX4Bmf?gpn}{M#X3t z9b;fj48mXx!BC8au`v$D#dsJW6JSD2go!Z;CdFiEpota=v{9miE_$fYM~z{a98+LQ zOogd24W`9(m>x4=M$CknF$-qJY?vK$U{1`1xiJss#eA3_3t&MkgoUvP7R6#%97|wH zEQO`943@=mSRN~2MXZFCu?kkjYFHg>U`?!rwXqJ?#d=sD8(>3hgpIKYHpOPx99v*Z zY=y0{4YtL0*d9AzN9=^1u?u#^ZrB}rU{CCYy|EAW#eNu${c!*e#6dV1hu}~ghQo0L zj>J(o8pq&R9Eam^0#3w9I2otlRGfy>aR$!BSvVW#;9Q)C^Kk(##6`Fmm*7%dhRbmU zuEbTi8rR@jT!-s%18&4kxEZ(LR@{c$aR=_iUAPb0(F&@Up1eg#LVPZ^zNii83XrhGzZItMsiykWUQDYb;#}t?nQ(|SQBeuZLEWJu^!gP2G|fAVPkB9O|cm^#}?QUTVZQ# zgKe=Lw#N?G5j$aL?1Ejf8+OMY*b{qUZ|sA8u^)zGe;j}VaS#s1AvhF=;cy&*BXJat z#xXb+$KiOKfD>^NPR1!X6{q2JoPjfO7S6^wI2Y&Pd|ZGFaS<-YCAbuq;c{GoD{&RB z#x=MW*Wr5HfE#fWZpJOR6}RDb+<`lB7w*PAxEJ@~emsB&@em%yBX|^#;c+~HC-D@X z#xr;p&*6EzfEV!+UdAhU6|doSyn#3I7T(4?co*;CeSClq@ew}8C-@Yf;d6X}FYy(= z#y9vD-{E`wfFJP_e#S5O6~Ezk{DD957yiaS_!lDt@&02ZjEqq*Dn`TT7z1Ns5C&ri zhGHy?jd3t8#>4oS025*&OpHk|DJDY$O|(#;jS?Mn(L;qkY7E2Vm;zH`Dol-OFfFFT z^q2uNVkXRtSuiVR!|a#?b7C&cjd?IH=EMA001ILvER034C>F!wSOQC8DJ+d;uq>9t z@>l^YVkNAMRj?{n!|GTAYho>|jdidt*2DVP02^W>Y>Z8?DK^9A*aBN(D{PHzur0R3 z_SgYCVkhj3U9c;5!|vDvdtxu_jeW2$_QP=Oj{|TZ4#L4W1c%}<9F8M!B#y$-I0nb! zI2?}?a3W5^$v6e4;xwF&GjJx(!r3?n=i)q^j|*@iF2cpQ1efA6T#hSnC9cBNxCYna zI$Vz%a3gNQ&A0`(;x^olJ8&oN!rizB_u@X>j|cD|9>T+T1drk|JdP*uB%Z?4cm~hn zIXsUS@FHHq%XkH^;x)XEH}EFj!rOQU@8UhYj}P!6KElWN1fSwFe2y>hCBDMf_y*tN zJA98H@FRZ0&-ewu;y3(`Kkz61!r%A@|6+t--hYgQkueHJ#b_8EV_-}S!e9)+P>hAK zF%HJXco-iOU_wlUi7^Q##bjupi53d9QKEw`dZ^GxjbWG^Q(#I=g{d(Orp0ua9y4G@ z%!HXS3ueV^m>qLqPRxb5F%Ra&e3%~#U_mT|g|P@0#bQ_-OJGSXg{83!mc?>d9xGr) ztb~=Z3RcBxSRHF%O{|5ru@2V7dRQMDU_)$#jj;(f#b($XTVP9Ug{`p-w#9bX9y?%1 z?1Y`M3wFhB*d2RdPwa)gu@Cmeei)AZaR3g)K{yzP;7}Zf!*K+T#8EgJ$KY5ThvRVq zPQ*z#8K>Y>oQBhJ2F}D;I2-5ST%3pVaRDyGMYtH3;8I+M%W(y+#8tQ&*Wg-QhwE_z zZp2Nv8Mok8+=kn62kyjOxEuH2UfhTK@cNB9_@;8T2t&+!Gm#8>zl-{4z(hwt$N ze#B4s8Nc9H{D$B02mZug_#6M=UyKmK`;U<@GDg9u7!9Li42+3E7>pqpim@;@#=*E4 z594D3Oo)jvF($#Jm<$ax(L#YXN_5ag4;A{TF$|Mq3QUQqFg2#Zw3rUlV+PEKnJ_bE z!K|1Kvttg-iMcR0=E1y}5A$OIEQp1$Fc!h0SPY9}2`q`Fur!vzvRDqwV+E{;m9R2a z!Kzpdt78qUiM6mc*1@`159?zCY>17pF*d=b*bJLv3v7w4ur;>9w%88aV+ZVrov<@@ z!LHa1yJHXRiM_Bl_QAf`55uuP4#0sp2nXX39E!tmIF7)PI0{GO7#xe^a6C@Hi8u)- z;}o2V({MV@z?nD;XX6~4i}P?kF2IGj2p8iLT#CzZIj+E!xC&R}8eEI(a6N9ojkpOn z;}+bC+i*MXz@4}YcjF%1i~Ddt9>9Zm2oK{CJc`HgIG(_hcnVMB89a;U@H}3?i+Bky z;}yJ$*YG;tz?*mrZ{r=ji}&z8KEQ|g2p{7Ue2UNTIljP`_zGX+8+?oJ@I8LOkN62c z;}`sj-|##Bz@PXFf8!tgixEP3|1lCq#wZvSqhWN6fiW=%gE0g{F&4(gI2ae>VSG%0 z2{92S#w3^&lc9koS}4#)i4MBxp+X-uhGB9{fhjQ+rp7dw7SmyR%zzm&6K2LNm=&{O zcFch}F&E~>JeU{rVSX%t1+fqo#v)i0i(zprfhDmNmc}wz7RzCItbi4<5?014SQV>b zb*zCku@=_GI#?I$VSQ|X4Y3h6#wOSln_+Wofi1BWw#GKt7TaNa?0_Ay6L!Wf*cH2B zckF>Zu^0BnKG+xgVL0~30XPr`;b0tsLva`m#}POZN8xB3gJW?Vj>ic&5hvkfoPtwv z8cxR&yZK`exYu?QB$VptqYU`Z^6rLhc_#d264D_}*egq5)hR>f*q9cy4s ztcA6)4%WqbSRWf;Lu`bNu?aTCX4o8CU`uR;t+5TZ#dg>pJ77obgq^VqcExVk9eZF; z?1jCt5B9}=7>@mM01m`KI2ecEP#lKCaRiRUQ8*gM;8+}o<8cB`#7Q_Ar{GkahSPBd z&csv02a#7(#vx8PRXhTCxm z?!;ZV8~5N|+=u(|03O6cco>i1Q9Opn@dTd4Q+OKB;8{F}=kWqw#7lS?ui#a@hS%{1 z-o#sY8}Hy_!ytyQ+$Tc@ddubSNIy=;9Go$@9_hE#83Dczu;H=hTriA z{={GS8~@;6j1ZgmA0uI8jDk@y8b-$$7!!jq7(*}=V_|HJgK;q)#>WJh5EEfyOoB-< z85(G!g#vAq=%9-pD)dof7$(OQm=aTAYD|M^F&(DI444r!VP?#NSuq=C#~hdwb75}G zgLyF@=Enk95DQ^pEP_R`7#7D8SQ1NNX)J?fu^g7i3Rn>Rk0dY#~N4@Yhi7y zgLSbU*2f0e5F24*Y=TX(88*ij*b-Y|YixsUu^qO@4%iVpVQ1`uU9lT>#~#=ddtq88 z#yz+f_u+m#fCupq9>ybh6p!I?Jb@?i6rRR2coxs$dAxuZ@e*FfD|i*J;dQ)$H}MwU z#yfZy@8NxXfDiEzKE@~b6rbU9e1R|V6~4wd_!i&cd;EYO@e_W=FZdO|;dlIjKk*m- z#y|KMBgEnT$4D3%qhM5whS4zw#>5~D#t;m}SQs1QU|fuc@i74=#6*}FlVDOzh6b8w zp+FlYI_RQ@3VqZVhRHDnro>d38q;7}Oo!<)17^fbm>IKRR?LRkF$d0#?LISQ)EeRjh{9u?E(}T38$FU|p<- z^|1jq#75W{n_yFHhRv}Bw!~K08rxu7Y=`Z!19rqt*crQESL}w}u?P0VUf3J^U|;Nq z;n*Js;6NONgK-EB#bG!cN8m^tg`;r{j>T~}9w*>LoP?8c3QomoI2~u;Oq_+YaSqPK zc{m>z;6hx4i*X4q#bvl0SKvxqg{yH5uElk@9yj1d+=QEP3vR`2xE*)kPTYmNaS!gr zeYhVF;6Xfuhw%s=#bbCJPvA*Bg{Schp2c%`9xvcUyo8tW3SPx)cpY!xO}vG-@eba_ zdw3ro;6r?bkMRjU#b@{&U*Jo8g|G1qzQuR=9zWnm{DhzJ3x36K_#J=XPyB_y@elsR z2yuD;F%m|`C>Rx^VRVdvF);{(F$6;~7RJUn7#HJVd`y4|F%c%lB$yPFp@Ak^D9}cU z4!Y=}LLW7TVRB4?DKQnM#x$4~(_wndfEh6pX2vX-6|-S>%z-&E7v{!1m>2V5ek_0m zu@Dxmq=6{}%&tbsML7S_f(SQqPIeQbaY zu@N@LCfF34VRLMOEwL50#x~d%+hKd`fE}?DcE&E)6}w?~?14S87xut%&aTpHA5jYY@;bUuCPRAKI6KCOUoP%?59?r)F zxDXfNVqAhtaTzYh6}S>t;c8riYjGW}#|^j-exUdJ1F6K~;dyn}b~9^S_X z_z)lAV|;>7@fkkH7x)ri;cI+@Z}AIFT9}{3gOoWLs2`0s4XrPG}3baw8gD!fg z&_|77m>g4JN=${RF%720beJA9U`EV@nK27y#cY@zb6`%)g}E^g=EZ!N9}8eXEQE!z z2o}X+SR6}WNi2n>u?&{Qa#$WKU`4Eim9Yv|#cEg`YhX>Rg|)E`*2Q{Q9~)ppY=n)m z2{y%M*c@A6OKgR$u?@DxcGw;}U`OnPov{mc#ctRgdtguOg}t#4_Qie}j{R`}4#Yt? z7>D3c9EQVj1dhZ}I2y;`SR9AraRN@nNjMp&;8dK3({TpQ#925S=ipqNhx2g(F2qH+ z7?_uyXKhx_pW9>ha< z7?0plJch^d1fIlGcpA^(Sv-g5@d94NOL!Tt;8nba*YO74#9Me9@8Dg$hxhRTKEy}( z7@y!%e1^~Q1-`^r_!{5fTYQJ_@dJLuPxu+X;8*;H-|+|j#9#Ou|KMMY5TExSBVlBW zf>ALVM#mT!6N4}qLogI$VQh?paWNjo#{`%V6JcUZf=MwM8fc=00&SG&po<sJnOoM4L9j3<&m=QB!X3T_y z7RM4;5=&ueEQ4jS9G1rlSP?5>Wvqf#u^Lv#8dwu+VQs8~b+I1S#|GFC8)0K?f=#g* zHpdp&5?f(wY=dpF9k#~~*bzHnXY7Jqu^V>B9@rCmVQ=h%eX$>gV}Bfg191=z#vwQq zhv9G>fg^Dgj>a)K7RTXuoPZN?5>Cb`I2EVibew@RaTdct8Ud)I2u>cmtLRc7!U{NfF#j(Wy z>DI2pwJ86E(NEARND6{rAtk7!N-9W~peUV-MR!YgDy_7LfJF!fCDNfF3P>sfidcw+ z7@W26v){dcd+)Q~^Iq5a<6P%{u4g_oiXLAncavq=O z3!KjdT*yUS%q3jP7rBftaXDY+3a;cTzQR}e8eiudT+KCH%XNH{>$!n%@om1tce#<9 zxS3nHmG5yI-{*Gj;7;!1Ztme;?&E$Q;0OGW2YHAe@i0H;Cp^NVJjPFXoF{mar+AuY z_!&RvS$@HDJkJaKl3(#6zvd-=!*BT=zvmCU%pZA$Kk;Y&!e99tukv?Z;~)H!fAMc# zXOP|a&xkQoFeOtlHPbLH(=k0WFe5WDGjC!RX64Py#_Y_&oXo}CyoGs~m$xz>^YbOY%P6&j(nFrCEk$S&rrTARpqxtiXz_#LBF~ zs(gfxvKp)NG1g#B)?#heVO`c^eKuf2Hsa%K%qDz-PqHbW;?r!#=4`>1Y{k}W!?tY4 z_UyopOkgJ_vNOA|E4#5fd$1>au{Zm$FZ;1S2XG(?yQ zj^_kUqim91~X_=1cnSmLZ ziJ5s5voI@fW;SML4(4Po=H@NT!@Rte`Iw)#u>cG5cHY4|c^B_yA>PBnEW)BJ#^St} zC0LU8@qRwQQY_6fEX#5%&j7@E4hlV@KwIX*ZBrla}C#W9pB`7Zs1#d zoA2;lZsaCz<`!<{d)&tNxt%+>le@T^d$^bTxSt330YBtH9^ywl%#ZmAkMJmu@lzh> z37+IBp5_^T#?N_{U+^5y^8&x*SG>rtd5PcfTYksy`2#QWM_%Di{F%S-SN_JU{GHeM z2mj<>{F~PqV$2jw$y7|uG)&8MOwSC=$V|-4o0x@Jc{8&yJ9986b1^q>VIJn? zt<1;#yp07|khk*=-pRXoHw*C|7G@C^Wib}#y)40!ypQ+u0hVHEmSI_zV|hNvhxjln zup%q5GOMsEAK{~{#_D{GHCU6iSetcNm-Sem4cL&4_&6K037_DTY|5wjG@G$GTd*Zt zu{GPUE!(j@JFp`Y*olek%r5N8ZtTt;?8#p2%|7hQe(cWy9LPZ&%pn}gXE=<*If5fO zilaG(V>yoFIe`;7iIX{nQ#p;(`7EE~49?^%&gLA>4~#Kl~~rF@ag_!5`%Wv<{#uHq|vm9O!2zQNU8!?j$; zH@Ti0_!i&hJA9WLxrv*(g$=ML`VF7D_z@5DV}8OT zJj!GIl*f63CwYped4`|ybDrfFJje6Az%Tg~FY;?%;y3)3-|>6?z{~uRSNIcu<}dt} zzws)6=QaMpKlvB`=5+?SjsJ`oGX+yJ6;m?}(=r{?GXpa+6EpKBW?@#|%xuig9L&jF z%*|Vvhk1D`^D#efV*wWA?Yx6`@-E)ZLcE8CS%gJdjKz5`ORyyG=^K9^p|QLEg?gcqi}T z-7Lg=SeQjvl*L$__p$^_@;=_r2Uv=wS%zg2KI%B;ewe1wm(8msd$ z)?iK6Vr|x8UDjiLHef?G;^S=0CVYZVvMHb9(`?4(Y{8an#nx=Ywrt1t?7)spU?(QB zGrO=WyRkcauqS)5H~X+J`>{UKiOcyiS8yd)@fE(x*Z4Z$ z;A*bnTCU@pT+a=Bi*NHCzRQi=#Le8ot$dH$_&&FD2X}H8cXJQ-av%5e06*Y|Jjg@* zh==(xKj9G`JnRyelFe`6n zHfCoI=43AB<}J*_yu6k9n4h$3qHvJoF=V>aOv ze3DK16rW}@HfIaAWGl928@6RTwr2-+WCA-ek)7FvUD=J@*@HdVi@n*0ec6xwIe-H> zh=VzVL-`DcaX3eCBu8;H$8apiaXcq*A}4V&r*JB#aXO#nbDY7MoWKjc9k;zvBpkNF9Y@F<{5s* z&v}+#@Ep(c0>9)}yvVP4iQn*Be#h_m126MOUg1ytnZNK?{>H2Po!9sW|Kwl%o7WlS zHU2YV%oI$?R7}k@Ov`jk&kW4SOw7!in1xw+GqW)}b1)}!F*k2v9_Hn(%*Xt^jRjbc zxAP9($-8(r3-KNnW)T);F&5{&EWwhzkN5KdmSSm^VOf@Ac|OR8_%JK5A}g^ntFS5` z;iIg^>U@kfSd+C_n{`;1^;n+`*pQ9*I2*GGpWu^h%BT1=o3S}tuq9iuHQTT)+p#@6 zup<-LiHYpYF6_!~?9LwS$zJTuKJ3eW?9Txl$Uz*;AsotQIE=$Nf+IPKqdA6SIgaBw zffG52lR1S`IgQi#ET7{H&g3l4<{ZxDJU-7CIG+o+kc+sOOSqITav5LZa=y$JT**~@ zg|G58zRowenrpb0>-Z+ua|7Sv+kA)baw9i!Gq-Rn-{UsE&+Xj7o!rIU+{3-x$NfCO z5BMPu@(@4bVSda{c!WoJjGyv2Pw*s9@ifoyGk(sq{DS9ro)`Efzv4xH%}e}--|{5BgRa@luX6cOvAKH$Mnp=jLgK$ zyop(ul{YgRvoi;CG8c367Up4I-pYK;&)Zml1$jH~;GMjSce4=hVPO_wQ5Iuy-pdj! z$@_ReA7Ck#W*L@cIhN;xe25RT0xPl-E3*o#@)17DYOK!3Sc5fLi?vyYby<(~*?x=|oA3!f$)WjExyfn z_%1hc6E|}UxAHx11N?v=@*ofKBOd0*{Den%l*jlfkMjgi z@)S?=3_s)NJj*Y5j^}xSU-BznF@F)JvU-&D3<5m97Yy5+M z@-P0)>kRT4{~0l63Z`T#re+$ZWjdy324-X?X68-I!mPZR*_fRP&v6E4au#QE4(DZ8{CnmBpyRa*}u{(RP zCws9s`>-$ju|EfJAO~?Uhj1vL;V=&82#(|^j^-GSyhxsu-;SnC? zF@DP9Ji(JZ#nU{)&-gjd@(Z5hd0ya`{E8R(H81fSe#`IpJ%8Y3{>Ur*i9hof{>tBY zmA~^E|KOkei+}SvgWHV%j2JToQ!*7(GY!)+9n&)dGcpr1^Co6tR^H5P%+4Il$z06M zTbPG=c`NfVKW}3J7Ub=`gLm>S-pxY1hlN>$MOlo+c`r+_B=6(>e1N4`nq^p)S;k(?(P29{a+{*X3jqh_i zcW@_naX0sHFZXdj5AXwi$b&q@k9e3L^AjH7Q6A%`JkAq5$x}SdGyII7^DMvMIiBYQ ze#x(Rkzex?zu~w1j^FbKUgnRy!k_pvf8nqEjaT_QukjE5$-nqFuQMnR+(;oK#!SJK zOvThp!?aAt^vuAF%*4#RiCLJHH!~ZvGY4}r7jyF#=3!pm%6!bv+gN}Fc{}gmoxF>8 zvk>oLVHROg7GrVV%MvWf`*=SeU@4Yn8J1-^mgj?fh!3*@E3y(RvkI&75kAUltj@<+ zgEd);wONOCS&#MEfDPG*kFznG@CiQ2rhJM|vl*MS1zWNeTeA(@vK`yA13NN-otVhZ z?82_>#_sIFp6tcm?8Cn7$Nn6^fgHra9KxY|hQm0VBRG|E;n)$H**WO@;z?j``peQ+{sb?!e%{6cEXdn=2k+!vyqkr14-2yhi?SGt^In!3o*YaRz5{7H4w~=W-sO=L?+A z1zgBQT+Ah0$``qeFL60v<_fOlD!#&3`5Is68(hsbT+4NQlk2&GZ}Dxu!*{uno4A=< zxRvj58{g-4?%+=D;%@HYUhd<59^eQ3kOz5)AMr3h<|jPDqddk>d7LMBlBal@XZRUE z=UINib3D%r{E}bsBERM(e#3A19lz%fyv!eYg+K9U{=#4R8?W+rUgID9lYjAVUT1K- z@t+Z6reI2@Vrr&gTBc)qW?)8UVrJgNEX>NAnT^?*gE^Utxp@ooFfVUqKIZ3bEWm=i zop%qg78X`Iey`5b3(CTDRr=Ws6P@p-<$`CPz-T*Sp(!litX%lHzP^JT8!O0ME7 ze3h^9b-uyXT*I|o$2Ym28~7IA<~w|s8@Y*_xrJN#9=Gv*Zs!i}!9`5Bn?&kr1 zzz=zlhxic>^J9L(BRtAu{FKLef+u;3r+J2-@pGQ#7d*%FyudH{6)*B@Ug9_Wmf!Jv z{=m!pkyrQ=f95azmA~;Sf9Ey+!9V#I|K@cDcNqT}F=h&;WGbd+8m47Bre_9bWF}_j zP0YfqyqVdUojI73xtN=`Fc0(cR_0@V-o^qf$lG}b@8n&)n}v7}3$qA|vKWi=UY1}< z-pBj-086nn%djlVu{XLAnc zavq=O3!KjdT*yUS%q3jP7rBftaXDY+3a;cTzQR}e8eiudT+KCH%XNH{>$!n%@om1t zce#<9xS3nHmG5yI-{*Gj;7;!1Ztme;?&E$Q;0OGW2YHAe@i0H;Cp^NVJjPFXoF{ma zr+AuY_!&RvS$@HDJkJaKl3(#6zvd-=!*BT=zvmCU%pZA$Kk;Y&!e99tukv?Z;~)H! zfAMc#XK<(SpAloGU`nQ9YNlaYrek_$U`A$QX5Pds%*vaYjoF!lIhl*Oc?4~#Kl~~rF@ag_!5`% zWv<{#uHq|vm9O!2zQNU8!?j$;H@Ti0_!i&hJA9WLxrv*(g$=ML`VF7D_z@5DV}8OTJj!GIl*f63CwYped4`|ybDrfFJje6Az%Tg~FY;?% z;y3)3-|>6?z{~uRSNIcu<}dt}zws)6=QaMpKlvB`=5+>l8UGnEW(uZcDyC){re!*& zX9i|uCT8YM%)+d^nc0|~Ihd2Vn47mS5A*U?=3{=|#sVzJ+j$4?n~>mS9QV$NTvJOR+S|uq?~5JRjske3%tjk(F4PRalje@KIJ{bw0)#tjSue%{r{h zdaTa|Y{*7@oQ>IpPw+`LiCwP*lc$#PU89(P)e!+7*&kOvLU-2Tp<|Tf^Z}}a+=MTKhA9;m8@n`s}Tn5wCRFp5*8OqG1z#M_g1&YF-d>A-{>|Lsu;cTZ?Ov(V(Q^~}j(o1|~s zq;I>VZ~LTghoo=EneCHK^k4FoGAUmrOaJdPR_9`zl(TColX6z&Q0nADhCvj>L5d({ zkSa(WqzTdn>4Nk@h9F~*Daah$6l4jq1~&)Ug6u(#AZL&($Q|4gN2<8V1f`!4NU~#Y{SQ@+-EDK%=mIp5fD}t55 zs^FF2)!?<@_27+Qb+9H_8>|c74Auu5g13UVgLi^=gN?zaU~{k~*c!YSYzy8Gwg)?c zox!ePcd#ee8|(}A2M2-=f)9g(!J*)z;BfG9@JVnaI2s%aJ`IisCxVl~so->QCipD) zJUAPC5u6Lo2N!}bgRg>%!Pmj1;G5vv;Je`a;D_LH@MCZ#_$l}~_$Bx?_${~^{2p8j z{s{gI{tEsMt|v8r7)D_nrU+ApslwD@nlNpcE=(V02s4J6!pz}KVU{p!cypL7%pT?l zbB4LX+~F-@o-l8CYnU(0AKn%g2n&X{hj)Z`hIfT`hlRp>!op#ZuxMB;EFRt)mIzCR z_l5U|4}_({(qWmfY*;QVA3hj96h0hQ2rGt_!pdQluxj{7_-I%ytR6lV)(C5cwZhtA zov?0LFRULn2pfit!pFnLVUzHQ@X4@g_*D3G*eq-wwg_8>t-{t}o3L%zE^Hrm2s?%e zVW%)L>>PFpyN2Du?qQFxXV@$39rg+PhW*0+;ec>pI4B$(4he^b&xFIm;o*pIWH>4u z9gYddhU3EV;e>EvI4PVQP6?-m)57WDv*B~$jBsW+E1Vt93Fn6M!so*m!ujEXaACM8 zTpTV5mxeEf%fgq!<>AZWig0DPDtsk;HGD06J$xfv9j*!2hU>yN!}Z~Y@U8Ie@SX78 zaAUYB+#GHRw}$V9+rsz5?ct7aXSgfe9qtMDhWo<(;eqgj@Wb$6cqsfRJRE);ei9xD zkA}y>Ps8KkiST52Dm)#Y2|o)z56^~Qgy+KZ;f3(a@T>4*_;q+G{3iT1{4V@H{2{y? z{uo{fe+qvNe+hpLe+#dMzlYbtKf*u5zrw%6>q!@X7)4PWrHE2SsiM?Tnka3QE=nI| zh%!c*qRi1vQI;rcbaRv~${yv2az?qL+|eyjo+xj0Ym_g_AKexehzdrxM|VVbMt4Pb zM}?w$qQX&;sAyCyDjwY%m5544_eJ+d4@9M+(ovbHY*a2PA3Yd76g?bOh$=>vqRLT~ zsA}{`^k`HqsvbQS)re|FwW8Wlov3b9FRC9kh#E$XqQ|4gQIqJ2=*g&Q^i=e8)GTTq zwTN0qt)kXZo2YHnE@~fjh&o0IQKu*|>Kt{6x<=ii?op4ZXVfd|9rcO&M*X7x(ST@R zG$q>bSU~LIvjl*eG(msjz-6#Pov|}iRff>DmopVi9U-ykIqJ4MCYRO(S_*C z=&R^r^mTM8`X>4|`Y!rD`XRa;{TN+|eu{pMeu;jKev7U~zem@iKcYXQzoNgR>q)o% zFplClP7$Y!Q^l#{G;!KEU7SA75NC`t#hK%q;w*92_~tlUoITDF=Ztg3x#L^nJaOLm z);M3BKfWz45EqPZkMD@@jPHu?jtj;2#D(J`anZO~Ts*!vE)kcE?~Ct`ABaoErQK7KHMD1JDu5Lb*V#g*eKan<;d_|dppTs?j)t`XOaYsIzWI&s~&UR*zJ5I2k) z#gE60<0kPF@sn}W_^J5mxLMphZV|VPTg9#8HgVgyUEDtI5O<6d;!bg5+&S(Nca6Kn z-Qyl{&$w6IJMI(rjr+y@;{ox&cu+hz9ug0YpNWUX!{ZV0$aqvdIvx{`jmO2~;|cM^ zcv3t$o)S-ur^VCbXXEGM8S%_`Ry;eN6VHw3#m~nt#Pj0?@xpjfyf|JGFO6S}m&Gr| z%j1{h74gb=Rs2f)YW!OKdi+MbI$jg6jn~C*#_Qt^@mulR@jLOm@y2*lygA+yZ;juJ zx5e+r+v6Sa&Ujb6JKhuTjrYa-;{)*r@rUuj_)z>&d^r9%{v6o4wn30*7nKv;Dv+`zUV|M0XPUd26-oiZ0 z%UhX`|5o(dY!_fb{-=P&v6E4au#QE4(D6o4wn30*7nKv;Dv+`zUV|M0XPUd26-oiZ0%UhX``FR@) zupn>e9lVow@opC4JuJ*3EXram&U;ycC3zq3=L0Oo(k#QWEXVSEkPq=;R$xU|Vr5og zRX)N;S&h~C7;CU5Yq2)#urBMdJ{zzh8}V^AW)nWaC)t!w@o6?=bGBehwqk3xVOzFi zdv;()Ca@C|*_mC~mEG8#J=l}I*qeRWm;KnE12~X_IG95?l+SP&hjRo+aui2%499XD z$8!QFauO$V3a4@!r}J4p#~GZ-S)9!|oXdH9o-c4d7jPjLaWR*0DPQC=zQpBxnJc)G ztN03EDZ`3ryLZ@kLid5wSYPyWTfd7VkG0R9>O88K!GrerFnW*VktI;Lj^ zW@IL2=1t7Pth|}on4LM8lew7uS|&N&!aPiVN0XdxWj^NTZ7jfoOnzOHobKSAyo-0U z5bt4O7GY5qV{zWgSej*6mgQKU5Aq>C%nGc?O03K(tjb6DD66qL zA7c&HWG&Wa9oA(%)@K7YWFtP##%#hT_#~V1DL&0+Y|a*J$yRL5Hf+mwY|jqt$OLv` zB0IASyRsX*vj=;!7kjf0`?4SVa{vc&5C?Mzhw>Q?<8Y4PNRHxYj^S92<9JTsL{8#l zPT^Ee<8(gD=Qx8iIg7J7hjTfP&+`S&=K?O|A};01%An|c#&W862IZM{EpxA2VUln zyuzROGk@W){Eb)nJFoE%{>i`iH?K1(0N_Rn88K!GrerFnW*VktI;Lj^W@IL2=1t7P zth|}on4LM8lew6iw=fU$@>b?!e%{6cEXdn=2k+!vyqkr14-2yhi?SGt^In!3o*YaRz5{7H4w~=W-sO z=L?+A1zgBQT+Ah0$``qeFL60v<_fOlD!#&3`5Is68(hsbT+4NQlk2&GZ}Dxu!*{un zo4A=d7LMBlBal@ zXZRUE=UINib3D%r{E}ZW`K|kZ4$12~IVmQuUHbvaL4uQ0v}@g`{mg_?31yOB$d^m3 zo;2|O6F-o+-* z>Z$vGShzY@!{ih6P3)I+@X(o)Q}k-z z@xGZ!O*{0)cA>;!Nj6W4X?-}Ul@co?E!rn0KLSqr)Bg~(nj$Ig^+wQYV&SCRCKgFv z6ir%8j(heUAX@dt8E-_={tqXL{?~~{B>iY&@_ld3E!t}!XF=|%2H(Mb?DTkOUr((yYy{8v-&VE=@QZLf?sj97K5ddq^8c{RLuV#jOsbfKOU-Xo{&xwNlWvNcYu)I=2|pz+O1g9OZJqE} z;@sqw-pSF`aZ;rvmQGqEl$e~ljmAm1J~`t*&Gm0h*|$yN_~cGHBk90@S}JjN((1n# z{$EMOf$NOpGlDCN~8FYKS)DA@;^7g?HYfh=E7`8~-V zONX9HcI*9Fi(E@G)6V4slWpGQ!9CZq+-O|k@BjbmfB&zvXyT!yF7yAD=K0@TDyNgL lkmO5cd-BSExm41&>C^k)FP5w>mOh=jw*U7lrf=Wk{}(x7+h70y literal 0 HcmV?d00001 diff --git a/pandas/io/tests/generate_legacy_storage_files.py b/pandas/io/tests/generate_legacy_storage_files.py index 25fd86d899c08..d0365cb2c30b3 100644 --- a/pandas/io/tests/generate_legacy_storage_files.py +++ b/pandas/io/tests/generate_legacy_storage_files.py @@ -5,7 +5,7 @@ SparseSeries, SparseDataFrame, Index, MultiIndex, bdate_range, to_msgpack, date_range, period_range, - Timestamp, Categorical, Period) + Timestamp, NaT, Categorical, Period) from pandas.compat import u import os import sys @@ -140,6 +140,13 @@ def create_data(): int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) + timestamp = dict(normal=Timestamp('2011-01-01'), + nat=NaT, + tz=Timestamp('2011-01-01', tz='US/Eastern'), + freq=Timestamp('2011-01-01', freq='D'), + both=Timestamp('2011-01-01', tz='Asia/Tokyo', + freq='M')) + return dict(series=series, frame=frame, panel=panel, @@ -149,7 +156,8 @@ def create_data(): sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), - cat=cat) + cat=cat, + timestamp=timestamp) def create_pickle_data(): diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index ad7d6c3c9f94f..0a491a69af8e2 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -8,7 +8,7 @@ from distutils.version import LooseVersion from pandas import compat -from pandas.compat import u +from pandas.compat import u, PY3 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, date_range, period_range, Index, Categorical) from pandas.core.common import PerformanceWarning @@ -58,6 +58,19 @@ def check_arbitrary(a, b): assert_series_equal(a, b) elif isinstance(a, Index): assert_index_equal(a, b) + elif isinstance(a, Categorical): + # Temp, + # Categorical.categories is changed from str to bytes in PY3 + # maybe the same as GH 13591 + if PY3 and b.categories.inferred_type == 'string': + pass + else: + tm.assert_categorical_equal(a, b) + elif a is NaT: + assert b is NaT + elif isinstance(a, Timestamp): + assert a == b + assert a.freq == b.freq else: assert(a == b) @@ -815,8 +828,8 @@ def check_min_structure(self, data): for typ, v in self.minimum_structure.items(): assert typ in data, '"{0}" not found in unpacked data'.format(typ) for kind in v: - assert kind in data[ - typ], '"{0}" not found in data["{1}"]'.format(kind, typ) + msg = '"{0}" not found in data["{1}"]'.format(kind, typ) + assert kind in data[typ], msg def compare(self, vf, version): # GH12277 encoding default used to be latin-1, now utf-8 @@ -839,8 +852,8 @@ def compare(self, vf, version): # use a specific comparator # if available - comparator = getattr( - self, "compare_{typ}_{dt}".format(typ=typ, dt=dt), None) + comp_method = "compare_{typ}_{dt}".format(typ=typ, dt=dt) + comparator = getattr(self, comp_method, None) if comparator is not None: comparator(result, expected, typ, version) else: @@ -872,9 +885,8 @@ def read_msgpacks(self, version): n = 0 for f in os.listdir(pth): # GH12142 0.17 files packed in P2 can't be read in P3 - if (compat.PY3 and - version.startswith('0.17.') and - f.split('.')[-4][-1] == '2'): + if (compat.PY3 and version.startswith('0.17.') and + f.split('.')[-4][-1] == '2'): continue vf = os.path.join(pth, f) try: diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index e337ad4dcfed2..55c14fee9e3ed 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -46,6 +46,12 @@ def compare_element(self, result, expected, typ, version=None): if typ.startswith('sp_'): comparator = getattr(tm, "assert_%s_equal" % typ) comparator(result, expected, exact_indices=False) + elif typ == 'timestamp': + if expected is pd.NaT: + assert result is pd.NaT + else: + tm.assert_equal(result, expected) + tm.assert_equal(result.freq, expected.freq) else: comparator = getattr(tm, "assert_%s_equal" % typ, tm.assert_almost_equal) diff --git a/pandas/lib.pxd b/pandas/lib.pxd index 36c91faa00036..554b0248e97ea 100644 --- a/pandas/lib.pxd +++ b/pandas/lib.pxd @@ -1,3 +1,4 @@ # prototypes for sharing cdef bint is_null_datetimelike(v) +cpdef bint is_period(val) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 262e036ff44f1..234ac7ea2c60c 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -33,7 +33,7 @@ def is_bool(object obj): def is_complex(object obj): return util.is_complex_object(obj) -def is_period(object val): +cpdef bint is_period(object val): """ Return a boolean if this is a Period object """ return util.is_period_object(val) @@ -538,9 +538,6 @@ def is_time_array(ndarray[object] values): return False return True -def is_period(object o): - from pandas import Period - return isinstance(o,Period) def is_period_array(ndarray[object] values): cdef Py_ssize_t i, n = len(values) diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index aca0d0dbc107b..af2e295ae0cfc 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -24,7 +24,7 @@ cimport cython from datetime cimport * cimport util cimport lib -from lib cimport is_null_datetimelike +from lib cimport is_null_datetimelike, is_period import lib from pandas import tslib from tslib import Timedelta, Timestamp, iNaT, NaT @@ -484,8 +484,11 @@ def extract_freq(ndarray[object] values): for i in range(n): p = values[i] + try: - return p.freq + # now Timestamp / NaT has freq attr + if is_period(p): + return p.freq except AttributeError: pass diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b86b248ead290..a6246790f83cb 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -965,7 +965,7 @@ def test_indexing_with_datetime_tz(self): # indexing - fast_xs df = DataFrame({'a': date_range('2014-01-01', periods=10, tz='UTC')}) result = df.iloc[5] - expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', offset='D') + expected = Timestamp('2014-01-06 00:00:00+0000', tz='UTC', freq='D') self.assertEqual(result, expected) result = df.loc[5] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a80a3af56b18f..c632704b7c5eb 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -426,10 +426,10 @@ def test_constructor_with_datetime_tz(self): # indexing result = s.iloc[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', offset='D')) + tz='US/Eastern', freq='D')) result = s[0] self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', - tz='US/Eastern', offset='D')) + tz='US/Eastern', freq='D')) result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index c4ccef13f2844..1b1db90ea713d 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -2365,7 +2365,7 @@ def test_reset_index_datetime(self): 'a': np.arange(6, dtype='int64')}, columns=['level_0', 'level_1', 'a']) expected['level_1'] = expected['level_1'].apply( - lambda d: pd.Timestamp(d, offset='D', tz=tz)) + lambda d: pd.Timestamp(d, freq='D', tz=tz)) assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 83cb768b37aaa..9b36bc5907066 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -558,7 +558,7 @@ def _generate(cls, start, end, periods, name, offset, @property def _box_func(self): - return lambda x: Timestamp(x, offset=self.offset, tz=self.tz) + return lambda x: Timestamp(x, freq=self.offset, tz=self.tz) def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ @@ -1199,8 +1199,9 @@ def __iter__(self): for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, l) - converted = tslib.ints_to_pydatetime( - data[start_i:end_i], tz=self.tz, offset=self.offset, box=True) + converted = tslib.ints_to_pydatetime(data[start_i:end_i], + tz=self.tz, freq=self.freq, + box=True) for v in converted: yield v diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 360944e355b4d..17b6dd12a5c02 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -124,10 +124,11 @@ def test_minmax(self): def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') - self.assertEqual(np.min(dr), Timestamp( - '2016-01-15 00:00:00', offset='D')) - self.assertEqual(np.max(dr), Timestamp( - '2016-01-20 00:00:00', offset='D')) + + self.assertEqual(np.min(dr), + Timestamp('2016-01-15 00:00:00', freq='D')) + self.assertEqual(np.max(dr), + Timestamp('2016-01-20 00:00:00', freq='D')) errmsg = "the 'out' parameter is not supported" tm.assertRaisesRegexp(ValueError, errmsg, np.min, dr, out=0) @@ -148,11 +149,11 @@ def test_round(self): elt = rng[1] expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 01:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 02:00:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 01:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 02:00:00', tz=tz, freq='30T'), ]) expected_elt = expected_rng[1] @@ -175,10 +176,10 @@ def test_repeat(self): freq='30Min', tz=tz) expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), ]) tm.assert_index_equal(rng.repeat(reps), expected_rng) @@ -192,10 +193,10 @@ def test_numpy_repeat(self): freq='30Min', tz=tz) expected_rng = DatetimeIndex([ - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:00:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), - Timestamp('2016-01-01 00:30:00', tz=tz, offset='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:00:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), + Timestamp('2016-01-01 00:30:00', tz=tz, freq='30T'), ]) tm.assert_index_equal(np.repeat(rng, reps), expected_rng) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index b0caa1f6a77cb..e594d31e57296 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3884,36 +3884,36 @@ def test_datetimeindex_accessors(self): self.assertEqual(dti.is_month_start[0], 1) tests = [ - (Timestamp('2013-06-01', offset='M').is_month_start, 1), - (Timestamp('2013-06-01', offset='BM').is_month_start, 0), - (Timestamp('2013-06-03', offset='M').is_month_start, 0), - (Timestamp('2013-06-03', offset='BM').is_month_start, 1), - (Timestamp('2013-02-28', offset='Q-FEB').is_month_end, 1), - (Timestamp('2013-02-28', offset='Q-FEB').is_quarter_end, 1), - (Timestamp('2013-02-28', offset='Q-FEB').is_year_end, 1), - (Timestamp('2013-03-01', offset='Q-FEB').is_month_start, 1), - (Timestamp('2013-03-01', offset='Q-FEB').is_quarter_start, 1), - (Timestamp('2013-03-01', offset='Q-FEB').is_year_start, 1), - (Timestamp('2013-03-31', offset='QS-FEB').is_month_end, 1), - (Timestamp('2013-03-31', offset='QS-FEB').is_quarter_end, 0), - (Timestamp('2013-03-31', offset='QS-FEB').is_year_end, 0), - (Timestamp('2013-02-01', offset='QS-FEB').is_month_start, 1), - (Timestamp('2013-02-01', offset='QS-FEB').is_quarter_start, 1), - (Timestamp('2013-02-01', offset='QS-FEB').is_year_start, 1), - (Timestamp('2013-06-30', offset='BQ').is_month_end, 0), - (Timestamp('2013-06-30', offset='BQ').is_quarter_end, 0), - (Timestamp('2013-06-30', offset='BQ').is_year_end, 0), - (Timestamp('2013-06-28', offset='BQ').is_month_end, 1), - (Timestamp('2013-06-28', offset='BQ').is_quarter_end, 1), - (Timestamp('2013-06-28', offset='BQ').is_year_end, 0), - (Timestamp('2013-06-30', offset='BQS-APR').is_month_end, 0), - (Timestamp('2013-06-30', offset='BQS-APR').is_quarter_end, 0), - (Timestamp('2013-06-30', offset='BQS-APR').is_year_end, 0), - (Timestamp('2013-06-28', offset='BQS-APR').is_month_end, 1), - (Timestamp('2013-06-28', offset='BQS-APR').is_quarter_end, 1), - (Timestamp('2013-03-29', offset='BQS-APR').is_year_end, 1), - (Timestamp('2013-11-01', offset='AS-NOV').is_year_start, 1), - (Timestamp('2013-10-31', offset='AS-NOV').is_year_end, 1), + (Timestamp('2013-06-01', freq='M').is_month_start, 1), + (Timestamp('2013-06-01', freq='BM').is_month_start, 0), + (Timestamp('2013-06-03', freq='M').is_month_start, 0), + (Timestamp('2013-06-03', freq='BM').is_month_start, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), + (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), + (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), + (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), + (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), + (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), + (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), + (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), + (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), + (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), + (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), + (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), + (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), + (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), + (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), (Timestamp('2012-02-01').days_in_month, 29), (Timestamp('2013-02-01').days_in_month, 28)] diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index c6436163b9edb..ce88edcf4249b 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -255,6 +255,21 @@ def test_constructor_keyword(self): hour=1, minute=2, second=3, microsecond=999999)), repr(Timestamp('2015-11-12 01:02:03.999999'))) + def test_constructor_offset_depr(self): + # GH 12160 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp('2011-01-01', offset='D') + self.assertEqual(ts.freq, 'D') + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + self.assertEqual(ts.offset, 'D') + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp('2011-01-01', offset='D', freq='D') + def test_conversion(self): # GH 9255 ts = Timestamp('2000-01-01') @@ -312,13 +327,13 @@ def test_repr(self): self.assertNotIn(freq_repr, repr(date_tz)) self.assertEqual(date_tz, eval(repr(date_tz))) - date_freq = Timestamp(date, offset=freq) + date_freq = Timestamp(date, freq=freq) self.assertIn(date, repr(date_freq)) self.assertNotIn(tz_repr, repr(date_freq)) self.assertIn(freq_repr, repr(date_freq)) self.assertEqual(date_freq, eval(repr(date_freq))) - date_tz_freq = Timestamp(date, tz=tz, offset=freq) + date_tz_freq = Timestamp(date, tz=tz, freq=freq) self.assertIn(date, repr(date_tz_freq)) self.assertIn(tz_repr, repr(date_tz_freq)) self.assertIn(freq_repr, repr(date_tz_freq)) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 0db4282808a26..e45523be738df 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -63,6 +63,7 @@ from pandas.compat import parse_date, string_types, iteritems, StringIO, callabl import operator import collections +import warnings # initialize numpy import_array() @@ -86,23 +87,24 @@ try: except NameError: # py3 basestring = str -cdef inline object create_timestamp_from_ts(int64_t value, pandas_datetimestruct dts, object tz, object offset): +cdef inline object create_timestamp_from_ts(int64_t value, pandas_datetimestruct dts, + object tz, object freq): cdef _Timestamp ts_base ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) - ts_base.value = value - ts_base.offset = offset + ts_base.freq = freq ts_base.nanosecond = dts.ps / 1000 return ts_base -cdef inline object create_datetime_from_ts(int64_t value, pandas_datetimestruct dts, object tz, object offset): +cdef inline object create_datetime_from_ts(int64_t value, pandas_datetimestruct dts, + object tz, object freq): return datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) -def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): +def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False): # convert an i8 repr to an ndarray of datetimes or Timestamp (if box == True) cdef: @@ -113,9 +115,9 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): ndarray[object] result = np.empty(n, dtype=object) object (*func_create)(int64_t, pandas_datetimestruct, object, object) - if box and util.is_string_object(offset): + if box and util.is_string_object(freq): from pandas.tseries.frequencies import to_offset - offset = to_offset(offset) + freq = to_offset(freq) if box: func_create = create_timestamp_from_ts @@ -130,7 +132,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): result[i] = NaT else: pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, tz, offset) + result[i] = func_create(value, dts, tz, freq) elif _is_tzlocal(tz) or _is_fixed_offset(tz): for i in range(n): value = arr[i] @@ -138,7 +140,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): result[i] = NaT else: pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - dt = create_datetime_from_ts(value, dts, tz, offset) + dt = create_datetime_from_ts(value, dts, tz, freq) dt = dt + tz.utcoffset(dt) if box: dt = Timestamp(dt) @@ -163,7 +165,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): new_tz = tz pandas_datetime_to_datetimestruct(value + deltas[pos], PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, new_tz, offset) + result[i] = func_create(value, dts, new_tz, freq) else: for i in range(n): @@ -172,7 +174,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, offset=None, box=False): result[i] = NaT else: pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts) - result[i] = func_create(value, dts, None, offset) + result[i] = func_create(value, dts, None, freq) return result @@ -259,10 +261,10 @@ class Timestamp(_Timestamp): """ @classmethod - def fromordinal(cls, ordinal, offset=None, tz=None): + def fromordinal(cls, ordinal, freq=None, tz=None, offset=None): """ passed an ordinal, translate and convert to a ts note: by definition there cannot be any tz info on the ordinal itself """ - return cls(datetime.fromordinal(ordinal),offset=offset,tz=tz) + return cls(datetime.fromordinal(ordinal), freq=freq, tz=tz, offset=offset) @classmethod def now(cls, tz=None): @@ -309,11 +311,12 @@ class Timestamp(_Timestamp): def combine(cls, date, time): return cls(datetime.combine(date, time)) - def __new__(cls, - object ts_input=_no_input, object offset=None, tz=None, unit=None, - year=None, month=None, day=None, - hour=None, minute=None, second=None, microsecond=None, - tzinfo=None): + def __new__(cls, object ts_input=_no_input, + object freq=None, tz=None, unit=None, + year=None, month=None, day=None, + hour=None, minute=None, second=None, microsecond=None, + tzinfo=None, + object offset=None): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. # @@ -338,15 +341,24 @@ class Timestamp(_Timestamp): cdef _TSObject ts cdef _Timestamp ts_base + if offset is not None: + # deprecate offset kwd in 0.19.0, GH13593 + if freq is not None: + msg = "Can only specify freq or offset, not both" + raise TypeError(msg) + warnings.warn("offset is deprecated. Use freq instead", + FutureWarning) + freq = offset + if ts_input is _no_input: # User passed keyword arguments. return Timestamp(datetime(year, month, day, hour or 0, minute or 0, second or 0, microsecond or 0, tzinfo), tz=tzinfo) - elif is_integer_object(offset): + elif is_integer_object(freq): # User passed positional arguments: # Timestamp(year, month, day[, hour[, minute[, second[, microsecond[, tzinfo]]]]]) - return Timestamp(datetime(ts_input, offset, tz, unit or 0, + return Timestamp(datetime(ts_input, freq, tz, unit or 0, year or 0, month or 0, day or 0, hour), tz=hour) ts = convert_to_tsobject(ts_input, tz, unit, 0, 0) @@ -354,9 +366,9 @@ class Timestamp(_Timestamp): if ts.value == NPY_NAT: return NaT - if util.is_string_object(offset): + if util.is_string_object(freq): from pandas.tseries.frequencies import to_offset - offset = to_offset(offset) + freq = to_offset(freq) # make datetime happy ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month, @@ -365,7 +377,7 @@ class Timestamp(_Timestamp): # fill out rest of data ts_base.value = ts.value - ts_base.offset = offset + ts_base.freq = freq ts_base.nanosecond = ts.dts.ps / 1000 return ts_base @@ -433,16 +445,18 @@ class Timestamp(_Timestamp): return self.tzinfo @property - def freq(self): - return self.offset + def offset(self): + warnings.warn(".offset is deprecated. Use .freq instead", + FutureWarning) + return self.freq def __setstate__(self, state): self.value = state[0] - self.offset = state[1] + self.freq = state[1] self.tzinfo = state[2] def __reduce__(self): - object_state = self.value, self.offset, self.tzinfo + object_state = self.value, self.freq, self.tzinfo return (Timestamp, object_state) def to_period(self, freq=None): @@ -491,7 +505,7 @@ class Timestamp(_Timestamp): @property def freqstr(self): - return getattr(self.offset, 'freqstr', self.offset) + return getattr(self.freq, 'freqstr', self.freq) @property def is_month_start(self): @@ -602,7 +616,7 @@ class Timestamp(_Timestamp): def replace(self, **kwds): return Timestamp(datetime.replace(self, **kwds), - offset=self.offset) + freq=self.freq) def to_pydatetime(self, warn=True): """ @@ -911,16 +925,6 @@ cdef inline bint _is_multiple(int64_t us, int64_t mult): return us % mult == 0 -def apply_offset(ndarray[object] values, object offset): - cdef: - Py_ssize_t i, n = len(values) - ndarray[int64_t] new_values - object boxed - - result = np.empty(n, dtype='M8[ns]') - new_values = result.view('i8') - - cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: if op == Py_EQ: return lhs == rhs @@ -955,7 +959,7 @@ cdef str _NDIM_STRING = "ndim" cdef class _Timestamp(datetime): cdef readonly: int64_t value, nanosecond - object offset # frequency reference + object freq # frequency reference def __hash__(_Timestamp self): if self.nanosecond: @@ -1029,9 +1033,9 @@ cdef class _Timestamp(datetime): pass tz = ", tz='{0}'".format(zone) if zone is not None else "" - offset = ", offset='{0}'".format(self.offset.freqstr) if self.offset is not None else "" + freq = ", freq='{0}'".format(self.freq.freqstr) if self.freq is not None else "" - return "Timestamp('{stamp}'{tz}{offset})".format(stamp=stamp, tz=tz, offset=offset) + return "Timestamp('{stamp}'{tz}{freq})".format(stamp=stamp, tz=tz, freq=freq) cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, int op) except -1: @@ -1083,17 +1087,17 @@ cdef class _Timestamp(datetime): if is_timedelta64_object(other): other_int = other.astype('timedelta64[ns]').view('i8') - return Timestamp(self.value + other_int, tz=self.tzinfo, offset=self.offset) + return Timestamp(self.value + other_int, tz=self.tzinfo, freq=self.freq) elif is_integer_object(other): - if self.offset is None: + if self.freq is None: raise ValueError("Cannot add integral value to Timestamp " - "without offset.") - return Timestamp((self.offset * other).apply(self), offset=self.offset) + "without freq.") + return Timestamp((self.freq * other).apply(self), freq=self.freq) elif isinstance(other, timedelta) or hasattr(other, 'delta'): nanos = _delta_to_nanoseconds(other) - result = Timestamp(self.value + nanos, tz=self.tzinfo, offset=self.offset) + result = Timestamp(self.value + nanos, tz=self.tzinfo, freq=self.freq) if getattr(other, 'normalize', False): result = Timestamp(normalize_date(result)) return result From c989570319464fe3b7227e69db9f27601ab7a66d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 10 Jun 2016 03:29:17 +0100 Subject: [PATCH 13/44] CLN: Remove the engine parameter in CSVFormatter and to_csv closes #13419 xref #11274 --- doc/source/whatsnew/v0.19.0.txt | 9 ++ pandas/core/frame.py | 1 - pandas/formats/format.py | 133 ++-------------------------- pandas/tests/formats/test_format.py | 6 -- pandas/tests/frame/test_to_csv.py | 109 +++++++++-------------- 5 files changed, 56 insertions(+), 202 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index a6c3c0c5d7f79..3e05003389b54 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -436,6 +436,15 @@ Deprecations - top-level ``pd.ordered_merge()`` has been renamed to ``pd.merge_ordered()`` and the original name will be removed in a future version (:issue:`13358`) - ``Timestamp.offset`` property (and named arg in the constructor), has been deprecated in favor of ``freq`` (:issue:`12160`) + +.. _whatsnew_0190.prior_deprecations: + +Removal of prior version deprecations/changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`) + + .. _whatsnew_0190.performance: Performance Improvements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e804271d8afa9..356abc67b168a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1342,7 +1342,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=columns, header=header, index=index, index_label=index_label, mode=mode, chunksize=chunksize, quotechar=quotechar, - engine=kwds.get("engine"), tupleize_cols=tupleize_cols, date_format=date_format, doublequote=doublequote, diff --git a/pandas/formats/format.py b/pandas/formats/format.py index 0c6a15db4ccfe..cc46ed57aeff0 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -30,7 +30,6 @@ import itertools import csv -import warnings common_docstring = """ Parameters @@ -1326,15 +1325,10 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, mode='w', nanRep=None, encoding=None, compression=None, quoting=None, line_terminator='\n', - chunksize=None, engine=None, tupleize_cols=False, - quotechar='"', date_format=None, doublequote=True, - escapechar=None, decimal='.'): - - if engine is not None: - warnings.warn("'engine' keyword is deprecated and will be " - "removed in a future version", FutureWarning, - stacklevel=3) - self.engine = engine # remove for 0.18 + chunksize=None, tupleize_cols=False, quotechar='"', + date_format=None, doublequote=True, escapechar=None, + decimal='.'): + self.obj = obj if path_or_buf is None: @@ -1369,11 +1363,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.date_format = date_format - # GH3457 - if not self.obj.columns.is_unique and engine == 'python': - raise NotImplementedError("columns.is_unique == False not " - "supported with engine='python'") - self.tupleize_cols = tupleize_cols self.has_mi_columns = (isinstance(obj.columns, MultiIndex) and not self.tupleize_cols) @@ -1430,108 +1419,6 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', if not index: self.nlevels = 0 - # original python implem. of df.to_csv - # invoked by df.to_csv(engine=python) - def _helper_csv(self, writer, na_rep=None, cols=None, header=True, - index=True, index_label=None, float_format=None, - date_format=None): - if cols is None: - cols = self.columns - - has_aliases = isinstance(header, (tuple, list, np.ndarray, Index)) - if has_aliases or header: - if index: - # should write something for index label - if index_label is not False: - if index_label is None: - if isinstance(self.obj.index, MultiIndex): - index_label = [] - for i, name in enumerate(self.obj.index.names): - if name is None: - name = '' - index_label.append(name) - else: - index_label = self.obj.index.name - if index_label is None: - index_label = [''] - else: - index_label = [index_label] - elif not isinstance(index_label, - (list, tuple, np.ndarray, Index)): - # given a string for a DF with Index - index_label = [index_label] - - encoded_labels = list(index_label) - else: - encoded_labels = [] - - if has_aliases: - if len(header) != len(cols): - raise ValueError(('Writing %d cols but got %d aliases' - % (len(cols), len(header)))) - else: - write_cols = header - else: - write_cols = cols - encoded_cols = list(write_cols) - - writer.writerow(encoded_labels + encoded_cols) - else: - encoded_cols = list(cols) - writer.writerow(encoded_cols) - - if date_format is None: - date_formatter = lambda x: Timestamp(x)._repr_base - else: - - def strftime_with_nulls(x): - x = Timestamp(x) - if notnull(x): - return x.strftime(date_format) - - date_formatter = lambda x: strftime_with_nulls(x) - - data_index = self.obj.index - - if isinstance(self.obj.index, PeriodIndex): - data_index = self.obj.index.to_timestamp() - - if isinstance(data_index, DatetimeIndex) and date_format is not None: - data_index = Index([date_formatter(x) for x in data_index]) - - values = self.obj.copy() - values.index = data_index - values.columns = values.columns.to_native_types( - na_rep=na_rep, float_format=float_format, date_format=date_format, - quoting=self.quoting) - values = values[cols] - - series = {} - for k, v in compat.iteritems(values._series): - series[k] = v._values - - nlevels = getattr(data_index, 'nlevels', 1) - for j, idx in enumerate(data_index): - row_fields = [] - if index: - if nlevels == 1: - row_fields = [idx] - else: # handle MultiIndex - row_fields = list(idx) - for i, col in enumerate(cols): - val = series[col][j] - if lib.checknull(val): - val = na_rep - - if float_format is not None and com.is_float(val): - val = float_format % val - elif isinstance(val, (np.datetime64, Timestamp)): - val = date_formatter(val) - - row_fields.append(val) - - writer.writerow(row_fields) - def save(self): # create the writer & save if hasattr(self.path_or_buf, 'write'): @@ -1555,17 +1442,7 @@ def save(self): else: self.writer = csv.writer(f, **writer_kwargs) - if self.engine == 'python': - # to be removed in 0.13 - self._helper_csv(self.writer, na_rep=self.na_rep, - float_format=self.float_format, - cols=self.cols, header=self.header, - index=self.index, - index_label=self.index_label, - date_format=self.date_format) - - else: - self._save() + self._save() finally: if close: diff --git a/pandas/tests/formats/test_format.py b/pandas/tests/formats/test_format.py index c5e9c258b293a..7a282e7eb14ad 100644 --- a/pandas/tests/formats/test_format.py +++ b/pandas/tests/formats/test_format.py @@ -3329,12 +3329,6 @@ def test_to_csv_date_format(self): self.assertEqual(df_sec_grouped.mean().to_csv(date_format='%Y-%m-%d'), expected_ymd_sec) - # deprecation GH11274 - def test_to_csv_engine_kw_deprecation(self): - with tm.assert_produces_warning(FutureWarning): - df = DataFrame({'col1': [1], 'col2': ['a'], 'col3': [10.1]}) - df.to_csv(engine='python') - def test_period(self): # GH 12615 df = pd.DataFrame({'A': pd.period_range('2013-01', diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index c23702ef46ad2..55c7ebb183ce5 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -10,7 +10,7 @@ from pandas.compat import (lmap, range, lrange, StringIO, u) from pandas.parser import CParserError from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp, - date_range, read_csv, compat) + date_range, read_csv, compat, to_datetime) import pandas as pd from pandas.util.testing import (assert_almost_equal, @@ -139,7 +139,7 @@ def test_to_csv_from_csv5(self): self.tzframe.to_csv(path) result = pd.read_csv(path, index_col=0, parse_dates=['A']) - converter = lambda c: pd.to_datetime(result[c]).dt.tz_localize( + converter = lambda c: to_datetime(result[c]).dt.tz_localize( 'UTC').dt.tz_convert(self.tzframe[c].dt.tz) result['B'] = converter('B') result['C'] = converter('C') @@ -162,15 +162,6 @@ def test_to_csv_cols_reordering(self): assert_frame_equal(df[cols], rs_c, check_names=False) - def test_to_csv_legacy_raises_on_dupe_cols(self): - df = mkdf(10, 3) - df.columns = ['a', 'a', 'b'] - with ensure_clean() as path: - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): - self.assertRaises(NotImplementedError, - df.to_csv, path, engine='python') - def test_to_csv_new_dupe_cols(self): import pandas as pd @@ -712,7 +703,6 @@ def test_to_csv_dups_cols(self): cols.extend([0, 1, 2]) df.columns = cols - from pandas import to_datetime with ensure_clean() as filename: df.to_csv(filename) result = read_csv(filename, index_col=0) @@ -993,72 +983,57 @@ def test_to_csv_compression_value_error(self): filename, compression="zip") def test_to_csv_date_format(self): - from pandas import to_datetime with ensure_clean('__tmp_to_csv_date_format__') as path: - for engine in [None, 'python']: - w = FutureWarning if engine == 'python' else None - - dt_index = self.tsframe.index - datetime_frame = DataFrame( - {'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index) - - with tm.assert_produces_warning(w, check_stacklevel=False): - datetime_frame.to_csv( - path, date_format='%Y%m%d', engine=engine) - - # Check that the data was put in the specified format - test = read_csv(path, index_col=0) - - datetime_frame_int = datetime_frame.applymap( - lambda x: int(x.strftime('%Y%m%d'))) - datetime_frame_int.index = datetime_frame_int.index.map( - lambda x: int(x.strftime('%Y%m%d'))) + dt_index = self.tsframe.index + datetime_frame = DataFrame( + {'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index) + datetime_frame.to_csv(path, date_format='%Y%m%d') - assert_frame_equal(test, datetime_frame_int) + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) - with tm.assert_produces_warning(w, check_stacklevel=False): - datetime_frame.to_csv( - path, date_format='%Y-%m-%d', engine=engine) + datetime_frame_int = datetime_frame.applymap( + lambda x: int(x.strftime('%Y%m%d'))) + datetime_frame_int.index = datetime_frame_int.index.map( + lambda x: int(x.strftime('%Y%m%d'))) - # Check that the data was put in the specified format - test = read_csv(path, index_col=0) - datetime_frame_str = datetime_frame.applymap( - lambda x: x.strftime('%Y-%m-%d')) - datetime_frame_str.index = datetime_frame_str.index.map( - lambda x: x.strftime('%Y-%m-%d')) + assert_frame_equal(test, datetime_frame_int) - assert_frame_equal(test, datetime_frame_str) + datetime_frame.to_csv(path, date_format='%Y-%m-%d') - # Check that columns get converted - datetime_frame_columns = datetime_frame.T + # Check that the data was put in the specified format + test = read_csv(path, index_col=0) + datetime_frame_str = datetime_frame.applymap( + lambda x: x.strftime('%Y-%m-%d')) + datetime_frame_str.index = datetime_frame_str.index.map( + lambda x: x.strftime('%Y-%m-%d')) - with tm.assert_produces_warning(w, check_stacklevel=False): - datetime_frame_columns.to_csv( - path, date_format='%Y%m%d', engine=engine) + assert_frame_equal(test, datetime_frame_str) - test = read_csv(path, index_col=0) + # Check that columns get converted + datetime_frame_columns = datetime_frame.T + datetime_frame_columns.to_csv(path, date_format='%Y%m%d') - datetime_frame_columns = datetime_frame_columns.applymap( - lambda x: int(x.strftime('%Y%m%d'))) - # Columns don't get converted to ints by read_csv - datetime_frame_columns.columns = ( - datetime_frame_columns.columns - .map(lambda x: x.strftime('%Y%m%d'))) + test = read_csv(path, index_col=0) - assert_frame_equal(test, datetime_frame_columns) + datetime_frame_columns = datetime_frame_columns.applymap( + lambda x: int(x.strftime('%Y%m%d'))) + # Columns don't get converted to ints by read_csv + datetime_frame_columns.columns = ( + datetime_frame_columns.columns + .map(lambda x: x.strftime('%Y%m%d'))) - # test NaTs - nat_index = to_datetime( - ['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000']) - nat_frame = DataFrame({'A': nat_index}, index=nat_index) + assert_frame_equal(test, datetime_frame_columns) - with tm.assert_produces_warning(w, check_stacklevel=False): - nat_frame.to_csv( - path, date_format='%Y-%m-%d', engine=engine) + # test NaTs + nat_index = to_datetime( + ['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000']) + nat_frame = DataFrame({'A': nat_index}, index=nat_index) + nat_frame.to_csv(path, date_format='%Y-%m-%d') - test = read_csv(path, parse_dates=[0, 1], index_col=0) + test = read_csv(path, parse_dates=[0, 1], index_col=0) - assert_frame_equal(test, nat_frame) + assert_frame_equal(test, nat_frame) def test_to_csv_with_dst_transitions(self): @@ -1077,7 +1052,7 @@ def test_to_csv_with_dst_transitions(self): # we have to reconvert the index as we # don't parse the tz's result = read_csv(path, index_col=0) - result.index = pd.to_datetime(result.index).tz_localize( + result.index = to_datetime(result.index).tz_localize( 'UTC').tz_convert('Europe/London') assert_frame_equal(result, df) @@ -1089,9 +1064,9 @@ def test_to_csv_with_dst_transitions(self): with ensure_clean('csv_date_format_with_dst') as path: df.to_csv(path, index=True) result = read_csv(path, index_col=0) - result.index = pd.to_datetime(result.index).tz_localize( + result.index = to_datetime(result.index).tz_localize( 'UTC').tz_convert('Europe/Paris') - result['idx'] = pd.to_datetime(result['idx']).astype( + result['idx'] = to_datetime(result['idx']).astype( 'datetime64[ns, Europe/Paris]') assert_frame_equal(result, df) From c2cc68d6eb233bc74c6bd032650704030c4b9a9d Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sun, 10 Jul 2016 18:02:26 -0400 Subject: [PATCH 14/44] BUG: Block/DTI doesnt handle tzlocal properly Author: sinhrks Closes #13583 from sinhrks/tzlocal and squashes the following commits: 93f59a3 [sinhrks] BUG: DTI doesnt handle tzlocal properly --- doc/source/whatsnew/v0.19.0.txt | 2 ++ pandas/tools/tests/test_merge.py | 12 ++++++++ pandas/tseries/tests/test_timezones.py | 40 ++++++++++++++++++++++++++ pandas/tslib.pyx | 7 +++-- 4 files changed, 58 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 3e05003389b54..70c466ed51681 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -492,6 +492,8 @@ Bug Fixes - Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` appropriately when empty (:issue:`13212`) - Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) - Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue:`13306`) +- Bug in ``.tz_localize`` with ``dateutil.tz.tzlocal`` may return incorrect result (:issue:`13583`) +- Bug in ``DatetimeTZDtype`` dtype with ``dateutil.tz.tzlocal`` cannot be regarded as valid dtype (:issue:`13583`) - Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) - Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index c8d1bae78dad3..6c448de741e0c 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1263,6 +1263,18 @@ def test_concat_tz_series_with_datetimelike(self): result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) + def test_concat_tz_series_tzlocal(self): + # GH 13583 + tm._skip_if_no_dateutil() + import dateutil + x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()), + pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())] + y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()), + pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y)) + self.assertEqual(result.dtype, 'datetime64[ns, tzlocal()]') + def test_concat_period_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D')) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index d68ff793c9b6a..71a041d5139a2 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -1061,6 +1061,46 @@ def test_tslib_tz_convert_dst(self): self.assert_numpy_array_equal(idx.hour, np.array([4, 4], dtype=np.int32)) + def test_tzlocal(self): + # GH 13583 + ts = Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()) + self.assertEqual(ts.tz, dateutil.tz.tzlocal()) + self.assertTrue("tz='tzlocal()')" in repr(ts)) + + tz = tslib.maybe_get_tz('tzlocal()') + self.assertEqual(tz, dateutil.tz.tzlocal()) + + # get offset using normal datetime for test + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = offset.total_seconds() * 1000000000 + self.assertEqual(ts.value + offset, Timestamp('2011-01-01').value) + + def test_tz_localize_tzlocal(self): + # GH 13583 + offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1)) + offset = int(offset.total_seconds() * 1000000000) + + dti = date_range(start='2001-01-01', end='2001-03-01') + dti2 = dti.tz_localize(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8) + + dti = date_range(start='2001-01-01', end='2001-03-01', + tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_localize(None) + tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8) + + def test_tz_convert_tzlocal(self): + # GH 13583 + # tz_convert doesn't affect to internal + dti = date_range(start='2001-01-01', end='2001-03-01', tz='UTC') + dti2 = dti.tz_convert(dateutil.tz.tzlocal()) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + + dti = date_range(start='2001-01-01', end='2001-03-01', + tz=dateutil.tz.tzlocal()) + dti2 = dti.tz_convert(None) + tm.assert_numpy_array_equal(dti2.asi8, dti.asi8) + class TestTimeZoneCacheKey(tm.TestCase): def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self): diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index e45523be738df..62f8b10e3eea2 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1595,7 +1595,9 @@ cpdef inline object maybe_get_tz(object tz): Otherwise, just return tz. """ if isinstance(tz, string_types): - if tz.startswith('dateutil/'): + if tz == 'tzlocal()': + tz = _dateutil_tzlocal() + elif tz.startswith('dateutil/'): zone = tz[9:] tz = _dateutil_gettz(zone) # On Python 3 on Windows, the filename is not always set correctly. @@ -3771,7 +3773,6 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): return np.array([], dtype=np.int64) # Convert to UTC - if _get_zone(tz1) != 'UTC': utc_dates = np.empty(n, dtype=np.int64) if _is_tzlocal(tz1): @@ -3825,7 +3826,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): dts.min, dts.sec, dts.us, tz2) delta = int(total_seconds(_get_utcoffset(tz2, dt))) * 1000000000 result[i] = v + delta - return result + return result # Convert UTC to other timezone trans, deltas, typ = _get_dst_info(tz2) From 2e8c993d68e6edb5afaa54b0742fac8f01a04abb Mon Sep 17 00:00:00 2001 From: Sinhrks Date: Mon, 11 Jul 2016 10:37:08 +0900 Subject: [PATCH 15/44] BUG: Series contains NaT with object dtype comparison incorrect (#13592) closes #9005 --- doc/source/whatsnew/v0.19.0.txt | 2 + pandas/core/ops.py | 35 +++++---- pandas/indexes/base.py | 20 +++-- pandas/lib.pyx | 12 +-- pandas/tests/series/test_operators.py | 101 ++++++++++++++++++++++---- pandas/tseries/base.py | 2 +- pandas/tseries/tdi.py | 2 +- pandas/tseries/tests/test_base.py | 78 ++++++++++++++++++++ 8 files changed, 208 insertions(+), 44 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 70c466ed51681..706ec903daaa2 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -527,6 +527,8 @@ Bug Fixes - Bug in extension dtype creation where the created types were not is/identical (:issue:`13285`) - Bug in ``NaT`` - ``Period`` raises ``AttributeError`` (:issue:`13071`) +- Bug in ``Series`` comparison may output incorrect result if rhs contains ``NaT`` (:issue:`9005`) +- Bug in ``Series`` and ``Index`` comparison may output incorrect result if it contains ``NaT`` with ``object`` dtype (:issue:`13592`) - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`) - Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 34ab3ae6863b5..0af7b6d80ce0e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -27,7 +27,8 @@ is_integer_dtype, is_categorical_dtype, is_object_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_datetime64tz_dtype, - is_bool_dtype, PerformanceWarning, ABCSeries) + is_bool_dtype, PerformanceWarning, + ABCSeries, ABCIndex) # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -664,6 +665,22 @@ def wrapper(left, right, name=name, na_op=na_op): return wrapper +def _comp_method_OBJECT_ARRAY(op, x, y): + if isinstance(y, list): + y = lib.list_to_object_array(y) + if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): + if not is_object_dtype(y.dtype): + y = y.astype(np.object_) + + if isinstance(y, (ABCSeries, ABCIndex)): + y = y.values + + result = lib.vec_compare(x, y, op) + else: + result = lib.scalar_compare(x, y, op) + return result + + def _comp_method_SERIES(op, name, str_rep, masker=False): """ Wrapper function for Series arithmetic operations, to avoid @@ -680,16 +697,7 @@ def na_op(x, y): return op(y, x) if is_object_dtype(x.dtype): - if isinstance(y, list): - y = lib.list_to_object_array(y) - - if isinstance(y, (np.ndarray, ABCSeries)): - if not is_object_dtype(y.dtype): - result = lib.vec_compare(x, y.astype(np.object_), op) - else: - result = lib.vec_compare(x, y, op) - else: - result = lib.scalar_compare(x, y, op) + result = _comp_method_OBJECT_ARRAY(op, x, y) else: # we want to compare like types @@ -713,12 +721,11 @@ def na_op(x, y): (not isscalar(y) and needs_i8_conversion(y))): if isscalar(y): + mask = isnull(x) y = _index.convert_scalar(x, _values_from_object(y)) else: + mask = isnull(x) | isnull(y) y = y.view('i8') - - mask = isnull(x) - x = x.view('i8') try: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index ad27010714f63..e697dc63c2cdb 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -31,6 +31,7 @@ is_list_like, is_bool_dtype, is_integer_dtype, is_float_dtype, needs_i8_conversion) +from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin from pandas.core.config import get_option @@ -3182,8 +3183,11 @@ def _evaluate_compare(self, other): if needs_i8_conversion(self) and needs_i8_conversion(other): return self._evaluate_compare(other, op) - func = getattr(self.values, op) - result = func(np.asarray(other)) + if is_object_dtype(self) and self.nlevels == 1: + # don't pass MultiIndex + result = _comp_method_OBJECT_ARRAY(op, self.values, other) + else: + result = op(self.values, np.asarray(other)) # technically we could support bool dtyped Index # for now just return the indexing array directly @@ -3196,12 +3200,12 @@ def _evaluate_compare(self, other): return _evaluate_compare - cls.__eq__ = _make_compare('__eq__') - cls.__ne__ = _make_compare('__ne__') - cls.__lt__ = _make_compare('__lt__') - cls.__gt__ = _make_compare('__gt__') - cls.__le__ = _make_compare('__le__') - cls.__ge__ = _make_compare('__ge__') + cls.__eq__ = _make_compare(operator.eq) + cls.__ne__ = _make_compare(operator.ne) + cls.__lt__ = _make_compare(operator.lt) + cls.__gt__ = _make_compare(operator.gt) + cls.__le__ = _make_compare(operator.le) + cls.__ge__ = _make_compare(operator.ge) @classmethod def _add_numericlike_set_methods_disabled(cls): diff --git a/pandas/lib.pyx b/pandas/lib.pyx index a9c7f93097f1b..7cbb502315b64 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -768,12 +768,12 @@ def scalar_compare(ndarray[object] values, object val, object op): raise ValueError('Unrecognized operator') result = np.empty(n, dtype=bool).view(np.uint8) - isnull_val = _checknull(val) + isnull_val = checknull(val) if flag == cpython.Py_NE: for i in range(n): x = values[i] - if _checknull(x): + if checknull(x): result[i] = True elif isnull_val: result[i] = True @@ -785,7 +785,7 @@ def scalar_compare(ndarray[object] values, object val, object op): elif flag == cpython.Py_EQ: for i in range(n): x = values[i] - if _checknull(x): + if checknull(x): result[i] = False elif isnull_val: result[i] = False @@ -798,7 +798,7 @@ def scalar_compare(ndarray[object] values, object val, object op): else: for i in range(n): x = values[i] - if _checknull(x): + if checknull(x): result[i] = False elif isnull_val: result[i] = False @@ -864,7 +864,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): x = left[i] y = right[i] - if _checknull(x) or _checknull(y): + if checknull(x) or checknull(y): result[i] = True else: result[i] = cpython.PyObject_RichCompareBool(x, y, flag) @@ -873,7 +873,7 @@ def vec_compare(ndarray[object] left, ndarray[object] right, object op): x = left[i] y = right[i] - if _checknull(x) or _checknull(y): + if checknull(x) or checknull(y): result[i] = False else: result[i] = cpython.PyObject_RichCompareBool(x, y, flag) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 6ab382beb7973..9c401e9ce6da8 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -980,24 +980,97 @@ def test_comparison_invalid(self): self.assertRaises(TypeError, lambda: x <= y) def test_more_na_comparisons(self): - left = Series(['a', np.nan, 'c']) - right = Series(['a', np.nan, 'd']) + for dtype in [None, object]: + left = Series(['a', np.nan, 'c'], dtype=dtype) + right = Series(['a', np.nan, 'd'], dtype=dtype) - result = left == right - expected = Series([True, False, False]) - assert_series_equal(result, expected) + result = left == right + expected = Series([True, False, False]) + assert_series_equal(result, expected) - result = left != right - expected = Series([False, True, True]) - assert_series_equal(result, expected) + result = left != right + expected = Series([False, True, True]) + assert_series_equal(result, expected) - result = left == np.nan - expected = Series([False, False, False]) - assert_series_equal(result, expected) + result = left == np.nan + expected = Series([False, False, False]) + assert_series_equal(result, expected) - result = left != np.nan - expected = Series([True, True, True]) - assert_series_equal(result, expected) + result = left != np.nan + expected = Series([True, True, True]) + assert_series_equal(result, expected) + + def test_nat_comparisons(self): + data = [([pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')], + [pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]), + + ([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')], + [pd.NaT, pd.NaT, pd.Timedelta('3 days')]), + + ([pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')], + [pd.NaT, pd.NaT, pd.Period('2011-03', freq='M')])] + + # add lhs / rhs switched data + data = data + [(r, l) for l, r in data] + + for l, r in data: + for dtype in [None, object]: + left = Series(l, dtype=dtype) + + # Series, Index + for right in [Series(r, dtype=dtype), Index(r, dtype=dtype)]: + expected = Series([False, False, True]) + assert_series_equal(left == right, expected) + + expected = Series([True, True, False]) + assert_series_equal(left != right, expected) + + expected = Series([False, False, False]) + assert_series_equal(left < right, expected) + + expected = Series([False, False, False]) + assert_series_equal(left > right, expected) + + expected = Series([False, False, True]) + assert_series_equal(left >= right, expected) + + expected = Series([False, False, True]) + assert_series_equal(left <= right, expected) + + def test_nat_comparisons_scalar(self): + data = [[pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')], + + [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')], + + [pd.Period('2011-01', freq='M'), pd.NaT, + pd.Period('2011-03', freq='M')]] + + for l in data: + for dtype in [None, object]: + left = Series(l, dtype=dtype) + + expected = Series([False, False, False]) + assert_series_equal(left == pd.NaT, expected) + assert_series_equal(pd.NaT == left, expected) + + expected = Series([True, True, True]) + assert_series_equal(left != pd.NaT, expected) + assert_series_equal(pd.NaT != left, expected) + + expected = Series([False, False, False]) + assert_series_equal(left < pd.NaT, expected) + assert_series_equal(pd.NaT > left, expected) + assert_series_equal(left <= pd.NaT, expected) + assert_series_equal(pd.NaT >= left, expected) + + assert_series_equal(left > pd.NaT, expected) + assert_series_equal(pd.NaT < left, expected) + assert_series_equal(left >= pd.NaT, expected) + assert_series_equal(pd.NaT <= left, expected) def test_comparison_different_length(self): a = Series(['a', 'b', 'c']) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 2e3d1ace9734c..4bafac873ea09 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -142,7 +142,7 @@ def _evaluate_compare(self, other, op): other = type(self)(other) # compare - result = getattr(self.asi8, op)(other.asi8) + result = op(self.asi8, other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 84f357481a28e..af4c46e2d16fa 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -36,7 +36,7 @@ def _td_index_cmp(opname, nat_result=False): def wrapper(self, other): func = getattr(super(TimedeltaIndex, self), opname) - if _is_convertible_to_td(other): + if _is_convertible_to_td(other) or other is tslib.NaT: other = _to_m8(other) result = func(other) if com.isnull(other): diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 17b6dd12a5c02..68cea17ba3fc9 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -458,6 +458,32 @@ def test_sub_period(self): with tm.assertRaises(TypeError): p - idx + def test_comp_nat(self): + left = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, + pd.Timestamp('2011-01-03')]) + right = pd.DatetimeIndex([pd.NaT, pd.NaT, pd.Timestamp('2011-01-03')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + def test_value_counts_unique(self): # GH 7735 for tz in [None, 'UTC', 'Asia/Tokyo', 'US/Eastern']: @@ -1238,6 +1264,32 @@ def test_addition_ops(self): expected = Timestamp('20130102') self.assertEqual(result, expected) + def test_comp_nat(self): + left = pd.TimedeltaIndex([pd.Timedelta('1 days'), pd.NaT, + pd.Timedelta('3 days')]) + right = pd.TimedeltaIndex([pd.NaT, pd.NaT, pd.Timedelta('3 days')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + def test_value_counts_unique(self): # GH 7735 @@ -2039,6 +2091,32 @@ def test_sub_isub(self): rng -= 1 tm.assert_index_equal(rng, expected) + def test_comp_nat(self): + left = pd.PeriodIndex([pd.Period('2011-01-01'), pd.NaT, + pd.Period('2011-01-03')]) + right = pd.PeriodIndex([pd.NaT, pd.NaT, pd.Period('2011-01-03')]) + + for l, r in [(left, right), (left.asobject, right.asobject)]: + result = l == r + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + + result = l != r + expected = np.array([True, True, False]) + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l == pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT == r, expected) + + expected = np.array([True, True, True]) + tm.assert_numpy_array_equal(l != pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT != l, expected) + + expected = np.array([False, False, False]) + tm.assert_numpy_array_equal(l < pd.NaT, expected) + tm.assert_numpy_array_equal(pd.NaT > l, expected) + def test_value_counts_unique(self): # GH 7735 idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10) From 5605f99105b6a97fb950fbebbf83d2cac0b23ef1 Mon Sep 17 00:00:00 2001 From: Sinhrks Date: Mon, 11 Jul 2016 16:31:10 +0900 Subject: [PATCH 16/44] CLN/TST: Add tests for nan/nat mixed input (#13477) closes #13467 --- doc/source/whatsnew/v0.19.0.txt | 3 +- pandas/indexes/base.py | 3 +- pandas/src/inference.pyx | 72 ++++++-- pandas/src/util.pxd | 2 +- pandas/tests/indexes/test_base.py | 43 +++++ pandas/tests/series/test_constructors.py | 21 ++- pandas/tests/test_infer_and_convert.py | 209 +++++++++++++++++++++++ pandas/tslib.pyx | 10 +- 8 files changed, 334 insertions(+), 29 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 706ec903daaa2..046690e28dba5 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -484,8 +484,9 @@ Bug Fixes - Bug in ``Series.str.extractall()`` with ``str`` index raises ``ValueError`` (:issue:`13156`) - Bug in ``Series.str.extractall()`` with single group and quantifier (:issue:`13382`) - - Bug in ``DatetimeIndex`` and ``Period`` subtraction raises ``ValueError`` or ``AttributeError`` rather than ``TypeError`` (:issue:`13078`) +- Bug in ``Index`` and ``Series`` created with ``NaN`` and ``NaT`` mixed data may not have ``datetime64`` dtype (:issue:`13324`) +- Bug in ``Index`` and ``Series`` may ignore ``np.datetime64('nat')`` and ``np.timdelta64('nat')`` to infer dtype (:issue:`13324`) - Bug in ``PeriodIndex`` and ``Period`` subtraction raises ``AttributeError`` (:issue:`13071`) - Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) - Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e697dc63c2cdb..0bb80be013275 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -243,8 +243,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # don't support boolean explicity ATM pass elif inferred != 'string': - if (inferred.startswith('datetime') or - tslib.is_timestamp_array(subarr)): + if inferred.startswith('datetime'): if (lib.is_datetime_with_singletz_array(subarr) or 'tz' in kwargs): diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 234ac7ea2c60c..9f96037c97c62 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -103,6 +103,7 @@ def infer_dtype(object _values): Py_ssize_t i, n object val ndarray values + bint seen_pdnat = False, seen_val = False if isinstance(_values, np.ndarray): values = _values @@ -141,17 +142,34 @@ def infer_dtype(object _values): values = values.ravel() # try to use a valid value - for i in range(n): - val = util.get_value_1d(values, i) - if not is_null_datetimelike(val): - break + for i from 0 <= i < n: + val = util.get_value_1d(values, i) - if util.is_datetime64_object(val) or val is NaT: + # do not use is_nul_datetimelike to keep + # np.datetime64('nat') and np.timedelta64('nat') + if util._checknull(val): + pass + elif val is NaT: + seen_pdnat = True + else: + seen_val = True + break + + # if all values are nan/NaT + if seen_val is False and seen_pdnat is True: + return 'datetime' + # float/object nan is handled in latter logic + + if util.is_datetime64_object(val): if is_datetime64_array(values): return 'datetime64' elif is_timedelta_or_timedelta64_array(values): return 'timedelta' + elif is_timedelta(val): + if is_timedelta_or_timedelta64_array(values): + return 'timedelta' + elif util.is_integer_object(val): # a timedelta will show true here as well if is_timedelta(val): @@ -200,17 +218,15 @@ def infer_dtype(object _values): if is_bytes_array(values): return 'bytes' - elif is_timedelta(val): - if is_timedelta_or_timedelta64_array(values): - return 'timedelta' - elif is_period(val): if is_period_array(values): return 'period' for i in range(n): val = util.get_value_1d(values, i) - if util.is_integer_object(val): + if (util.is_integer_object(val) and + not util.is_timedelta64_object(val) and + not util.is_datetime64_object(val)): return 'mixed-integer' return 'mixed' @@ -237,20 +253,46 @@ def is_possible_datetimelike_array(object arr): return False return seen_datetime or seen_timedelta + cdef inline bint is_null_datetimelike(v): # determine if we have a null for a timedelta/datetime (or integer versions)x if util._checknull(v): return True + elif v is NaT: + return True elif util.is_timedelta64_object(v): return v.view('int64') == iNaT elif util.is_datetime64_object(v): return v.view('int64') == iNaT elif util.is_integer_object(v): return v == iNaT + return False + + +cdef inline bint is_null_datetime64(v): + # determine if we have a null for a datetime (or integer versions)x, + # excluding np.timedelta64('nat') + if util._checknull(v): + return True + elif v is NaT: + return True + elif util.is_datetime64_object(v): + return v.view('int64') == iNaT + return False + + +cdef inline bint is_null_timedelta64(v): + # determine if we have a null for a timedelta (or integer versions)x, + # excluding np.datetime64('nat') + if util._checknull(v): + return True elif v is NaT: return True + elif util.is_timedelta64_object(v): + return v.view('int64') == iNaT return False + cdef inline bint is_datetime(object o): return PyDateTime_Check(o) @@ -420,7 +462,7 @@ def is_datetime_array(ndarray[object] values): # return False for all nulls for i in range(n): v = values[i] - if is_null_datetimelike(v): + if is_null_datetime64(v): # we are a regular null if util._checknull(v): null_count += 1 @@ -437,7 +479,7 @@ def is_datetime64_array(ndarray values): # return False for all nulls for i in range(n): v = values[i] - if is_null_datetimelike(v): + if is_null_datetime64(v): # we are a regular null if util._checknull(v): null_count += 1 @@ -481,7 +523,7 @@ def is_timedelta_array(ndarray values): return False for i in range(n): v = values[i] - if is_null_datetimelike(v): + if is_null_timedelta64(v): # we are a regular null if util._checknull(v): null_count += 1 @@ -496,7 +538,7 @@ def is_timedelta64_array(ndarray values): return False for i in range(n): v = values[i] - if is_null_datetimelike(v): + if is_null_timedelta64(v): # we are a regular null if util._checknull(v): null_count += 1 @@ -512,7 +554,7 @@ def is_timedelta_or_timedelta64_array(ndarray values): return False for i in range(n): v = values[i] - if is_null_datetimelike(v): + if is_null_timedelta64(v): # we are a regular null if util._checknull(v): null_count += 1 diff --git a/pandas/src/util.pxd b/pandas/src/util.pxd index 96a23a91cc7c2..fcb5583a0a6e7 100644 --- a/pandas/src/util.pxd +++ b/pandas/src/util.pxd @@ -98,4 +98,4 @@ cdef inline bint _checknan(object val): return not cnp.PyArray_Check(val) and val != val cdef inline bint is_period_object(object val): - return getattr(val,'_typ','_typ') == 'period' + return getattr(val, '_typ', '_typ') == 'period' diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d535eaa238567..67869901b068e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -203,6 +203,49 @@ def __array__(self, dtype=None): result = pd.Index(ArrayLike(array)) self.assert_index_equal(result, expected) + def test_index_ctor_infer_nan_nat(self): + # GH 13467 + exp = pd.Float64Index([np.nan, np.nan]) + self.assertEqual(exp.dtype, np.float64) + tm.assert_index_equal(Index([np.nan, np.nan]), exp) + tm.assert_index_equal(Index(np.array([np.nan, np.nan])), exp) + + exp = pd.DatetimeIndex([pd.NaT, pd.NaT]) + self.assertEqual(exp.dtype, 'datetime64[ns]') + tm.assert_index_equal(Index([pd.NaT, pd.NaT]), exp) + tm.assert_index_equal(Index(np.array([pd.NaT, pd.NaT])), exp) + + exp = pd.DatetimeIndex([pd.NaT, pd.NaT]) + self.assertEqual(exp.dtype, 'datetime64[ns]') + + for data in [[pd.NaT, np.nan], [np.nan, pd.NaT], + [np.nan, np.datetime64('nat')], + [np.datetime64('nat'), np.nan]]: + tm.assert_index_equal(Index(data), exp) + tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) + + exp = pd.TimedeltaIndex([pd.NaT, pd.NaT]) + self.assertEqual(exp.dtype, 'timedelta64[ns]') + + for data in [[np.nan, np.timedelta64('nat')], + [np.timedelta64('nat'), np.nan], + [pd.NaT, np.timedelta64('nat')], + [np.timedelta64('nat'), pd.NaT]]: + + tm.assert_index_equal(Index(data), exp) + tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) + + # mixed np.datetime64/timedelta64 nat results in object + data = [np.datetime64('nat'), np.timedelta64('nat')] + exp = pd.Index(data, dtype=object) + tm.assert_index_equal(Index(data), exp) + tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) + + data = [np.timedelta64('nat'), np.datetime64('nat')] + exp = pd.Index(data, dtype=object) + tm.assert_index_equal(Index(data), exp) + tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) + def test_index_ctor_infer_periodindex(self): xp = period_range('2012-1-1', freq='M', periods=3) rs = Index(xp) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c632704b7c5eb..2a7e8a957977f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -252,6 +252,24 @@ def test_constructor_pass_none(self): expected = Series(index=Index([None])) assert_series_equal(s, expected) + def test_constructor_pass_nan_nat(self): + # GH 13467 + exp = Series([np.nan, np.nan], dtype=np.float64) + self.assertEqual(exp.dtype, np.float64) + tm.assert_series_equal(Series([np.nan, np.nan]), exp) + tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) + + exp = Series([pd.NaT, pd.NaT]) + self.assertEqual(exp.dtype, 'datetime64[ns]') + tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp) + tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp) + + tm.assert_series_equal(Series([pd.NaT, np.nan]), exp) + tm.assert_series_equal(Series(np.array([pd.NaT, np.nan])), exp) + + tm.assert_series_equal(Series([np.nan, pd.NaT]), exp) + tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) + def test_constructor_cast(self): self.assertRaises(ValueError, Series, ['a', 'b', 'c'], dtype=float) @@ -688,8 +706,9 @@ def test_constructor_dtype_timedelta64(self): td = Series([np.timedelta64(300000000), pd.NaT]) self.assertEqual(td.dtype, 'timedelta64[ns]') + # because iNaT is int, not coerced to timedelta td = Series([np.timedelta64(300000000), tslib.iNaT]) - self.assertEqual(td.dtype, 'timedelta64[ns]') + self.assertEqual(td.dtype, 'object') td = Series([np.timedelta64(300000000), np.nan]) self.assertEqual(td.dtype, 'timedelta64[ns]') diff --git a/pandas/tests/test_infer_and_convert.py b/pandas/tests/test_infer_and_convert.py index a6941369b35be..5f016322f101f 100644 --- a/pandas/tests/test_infer_and_convert.py +++ b/pandas/tests/test_infer_and_convert.py @@ -180,6 +180,207 @@ def test_datetime(self): index = Index(dates) self.assertEqual(index.inferred_type, 'datetime64') + def test_infer_dtype_datetime(self): + + arr = np.array([pd.Timestamp('2011-01-01'), + pd.Timestamp('2011-01-02')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.datetime64('2011-01-01'), + np.datetime64('2011-01-01')], dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') + + arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Timestamp('2011-01-02')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, np.datetime64('2011-01-02')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') + + arr = np.array([n, datetime(2011, 1, 1)]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, pd.Timestamp('2011-01-02'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, np.datetime64('2011-01-02'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') + + arr = np.array([n, datetime(2011, 1, 1), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + # different type of nat + arr = np.array([np.timedelta64('nat'), + np.datetime64('2011-01-02')], dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.datetime64('2011-01-02'), + np.timedelta64('nat')], dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + # mixed datetime + arr = np.array([datetime(2011, 1, 1), + pd.Timestamp('2011-01-02')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + # should be datetime? + arr = np.array([np.datetime64('2011-01-01'), + pd.Timestamp('2011-01-02')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([pd.Timestamp('2011-01-02'), + np.datetime64('2011-01-01')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed-integer') + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + def test_infer_dtype_timedelta(self): + + arr = np.array([pd.Timedelta('1 days'), + pd.Timedelta('2 days')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([np.timedelta64(1, 'D'), + np.timedelta64(2, 'D')], dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([timedelta(1), timedelta(2)]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Timedelta('1 days')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, np.timedelta64(1, 'D')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, timedelta(1)]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, pd.Timedelta('1 days'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, np.timedelta64(1, 'D'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, timedelta(1), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + # different type of nat + arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], + dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], + dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + self.assertEqual(pd.lib.infer_dtype(arr), 'floating') + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([None, np.nan, np.nan]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + # pd.NaT + arr = np.array([pd.NaT]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([pd.NaT, np.nan]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.nan, pd.NaT]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.nan, pd.NaT, np.nan]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + arr = np.array([None, pd.NaT, None]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') + + # np.datetime64(nat) + arr = np.array([np.datetime64('nat')]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64('nat'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') + + arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') + + arr = np.array([np.timedelta64('nat')], dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64('nat'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) + self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64('nat'), + np.timedelta64('nat'), np.nan]) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], + dtype=object) + self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) + self.assertTrue(pd.lib.is_datetime_array(arr)) + self.assertTrue(pd.lib.is_datetime64_array(arr)) + self.assertFalse(pd.lib.is_timedelta_array(arr)) + self.assertFalse(pd.lib.is_timedelta64_array(arr)) + self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) + self.assertFalse(pd.lib.is_datetime_array(arr)) + self.assertFalse(pd.lib.is_datetime64_array(arr)) + self.assertTrue(pd.lib.is_timedelta_array(arr)) + self.assertTrue(pd.lib.is_timedelta64_array(arr)) + self.assertTrue(pd.lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), + np.timedelta64('nat')]) + self.assertFalse(pd.lib.is_datetime_array(arr)) + self.assertFalse(pd.lib.is_datetime64_array(arr)) + self.assertFalse(pd.lib.is_timedelta_array(arr)) + self.assertFalse(pd.lib.is_timedelta64_array(arr)) + self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT]) + self.assertTrue(pd.lib.is_datetime_array(arr)) + self.assertTrue(pd.lib.is_datetime64_array(arr)) + self.assertTrue(pd.lib.is_timedelta_array(arr)) + self.assertTrue(pd.lib.is_timedelta64_array(arr)) + self.assertTrue(pd.lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, np.nan], dtype=object) + self.assertFalse(pd.lib.is_datetime_array(arr)) + self.assertFalse(pd.lib.is_datetime64_array(arr)) + self.assertFalse(pd.lib.is_timedelta_array(arr)) + self.assertFalse(pd.lib.is_timedelta64_array(arr)) + self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) + def test_date(self): dates = [date(2012, 1, x) for x in range(1, 20)] @@ -244,6 +445,13 @@ def test_categorical(self): result = lib.infer_dtype(Series(arr)) self.assertEqual(result, 'categorical') + def test_is_period(self): + self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) + self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) + self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) + self.assertFalse(lib.is_period(1)) + self.assertFalse(lib.is_period(np.nan)) + class TestConvert(tm.TestCase): @@ -437,6 +645,7 @@ def test_convert_downcast_int64(self): result = lib.downcast_int64(arr, na_values) self.assert_numpy_array_equal(result, expected) + if __name__ == '__main__': import nose diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 62f8b10e3eea2..fe4de11864522 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -843,15 +843,6 @@ cdef _tz_format(object obj, object zone): except: return ', tz=%s' % zone -def is_timestamp_array(ndarray[object] values): - cdef int i, n = len(values) - if n == 0: - return False - for i in range(n): - if not is_timestamp(values[i]): - return False - return True - cpdef object get_value_box(ndarray arr, object loc): cdef: @@ -957,6 +948,7 @@ cdef str _NDIM_STRING = "ndim" # (see Timestamp class above). This will serve as a C extension type that # shadows the python class, where we do any heavy lifting. cdef class _Timestamp(datetime): + cdef readonly: int64_t value, nanosecond object freq # frequency reference From 2f7fdd07eb0925016a28cf9ff324e351eac0c4df Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 11 Jul 2016 17:02:24 +0200 Subject: [PATCH 17/44] BUG: groupby apply on selected columns yielding scalar (GH13568) (#13585) closes #13568 --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/core/groupby.py | 5 ++++- pandas/tests/test_groupby.py | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 046690e28dba5..4cc16aac15f8b 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -491,6 +491,7 @@ Bug Fixes - Bug in ``PeriodIndex`` construction returning a ``float64`` index in some circumstances (:issue:`13067`) - Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) - Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame`` appropriately when empty (:issue:`13212`) +- Bug in ``groupby(..).apply(..)`` when the passed function returns scalar values per group (:issue:`13468`). - Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) - Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue:`13306`) - Bug in ``.tz_localize`` with ``dateutil.tz.tzlocal`` may return incorrect result (:issue:`13583`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 8d33c27481d93..077acc1e81444 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -3403,11 +3403,14 @@ def first_non_None_value(values): return self._reindex_output(result) + # values are not series or array-like but scalars else: # only coerce dates if we find at least 1 datetime coerce = True if any([isinstance(x, Timestamp) for x in values]) else False - return (Series(values, index=key_index, name=self.name) + # self.name not passed through to Series as the result + # should not take the name of original selection of columns + return (Series(values, index=key_index) ._convert(datetime=True, coerce=coerce)) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index efcba758e3b38..a52f22fe2032a 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2584,6 +2584,16 @@ def test_apply_series_yield_constant(self): result = self.df.groupby(['A', 'B'])['C'].apply(len) self.assertEqual(result.index.names[:2], ('A', 'B')) + def test_apply_frame_yield_constant(self): + # GH13568 + result = self.df.groupby(['A', 'B']).apply(len) + self.assertTrue(isinstance(result, Series)) + self.assertIsNone(result.name) + + result = self.df.groupby(['A', 'B'])[['C', 'D']].apply(len) + self.assertTrue(isinstance(result, Series)) + self.assertIsNone(result.name) + def test_apply_frame_to_series(self): grouped = self.df.groupby(['A', 'B']) result = grouped.apply(len) From 65849d3c9feea395d6e6a124f7a3b11ecdb943cb Mon Sep 17 00:00:00 2001 From: Jeffrey Gerard Date: Mon, 11 Jul 2016 20:21:06 +0200 Subject: [PATCH 18/44] TST: Clean up tests of DataFrame.sort_{index,values} (#13496) * TST: Clean up tests of DataFrame.sort_{index,values} * Factor out Series sorting tests to own file. * Delegate deprecated sort() and order() to their own tests. Before this commit, the `Series.sort_values()` tests relied on deprecated `Series.sort()` and `Series.order()` as the source of truth. However they both merely called `Series.sort_values()` under the hood. This commit consolidates the core test logic against `.sort_values()` directly, while `.sort()` and `.order()` merely check for equivalence with `.sort_values()`. Also removes some no-op assertions that had rotted from the old days of `sort()`/`order()`. * Remove 'by' docstring from Series.sort_values * Document defaults for optional sorting args * Move more sort_values, sort_index tests to be together. * Add test for Series.sort_index(sort_remaining=True) * Improve `sort_values` tests when multiple `by`s Duplicates values in the test DataFrame are necessary to fully test this feature. * PEP8 cleanup * Annotate tests with GH issue * Fix indentation - docstring string replacement --- pandas/core/frame.py | 8 +- pandas/core/generic.py | 32 +++--- pandas/core/series.py | 3 +- pandas/tests/frame/test_sorting.py | 116 ++++++++++---------- pandas/tests/series/test_analytics.py | 136 ------------------------ pandas/tests/series/test_sorting.py | 146 ++++++++++++++++++++++++++ 6 files changed, 226 insertions(+), 215 deletions(-) create mode 100644 pandas/tests/series/test_sorting.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 356abc67b168a..b4509c999a5da 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -68,8 +68,12 @@ # --------------------------------------------------------------------- # Docstring templates -_shared_doc_kwargs = dict(axes='index, columns', klass='DataFrame', - axes_single_arg="{0, 1, 'index', 'columns'}") +_shared_doc_kwargs = dict( + axes='index, columns', klass='DataFrame', + axes_single_arg="{0, 1, 'index', 'columns'}", + optional_by=""" + by : str or list of str + Name or list of names which refer to the axis items.""") _numeric_only_doc = """numeric_only : boolean, default None Include only float, int, boolean data. If None, will attempt to use diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7b271df4085cc..1aadc50b76f95 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -37,10 +37,13 @@ # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = dict() -_shared_doc_kwargs = dict(axes='keywords for axes', klass='NDFrame', - axes_single_arg='int or labels for object', - args_transpose='axes to permute (int or label for' - ' object)') +_shared_doc_kwargs = dict( + axes='keywords for axes', klass='NDFrame', + axes_single_arg='int or labels for object', + args_transpose='axes to permute (int or label for object)', + optional_by=""" + by : str or list of str + Name or list of names which refer to the axis items.""") def is_dictlike(x): @@ -1961,21 +1964,20 @@ def add_suffix(self, suffix): .. versionadded:: 0.17.0 Parameters - ---------- - by : string name or list of names which refer to the axis items - axis : %(axes)s to direct sorting - ascending : bool or list of bool + ----------%(optional_by)s + axis : %(axes)s to direct sorting, default 0 + ascending : bool or list of bool, default True Sort ascending vs. descending. Specify list for multiple sort orders. If this is a list of bools, must match the length of the by. - inplace : bool + inplace : bool, default False if True, perform operation in-place - kind : {`quicksort`, `mergesort`, `heapsort`} + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' Choice of sorting algorithm. See also ndarray.np.sort for more information. `mergesort` is the only stable algorithm. For DataFrames, this option is only applied when sorting on a single column or label. - na_position : {'first', 'last'} + na_position : {'first', 'last'}, default 'last' `first` puts NaNs at the beginning, `last` puts NaNs at the end Returns @@ -1997,16 +1999,16 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, if not None, sort on values in specified index level(s) ascending : boolean, default True Sort ascending vs. descending - inplace : bool + inplace : bool, default False if True, perform operation in-place - kind : {`quicksort`, `mergesort`, `heapsort`} + kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort' Choice of sorting algorithm. See also ndarray.np.sort for more information. `mergesort` is the only stable algorithm. For DataFrames, this option is only applied when sorting on a single column or label. - na_position : {'first', 'last'} + na_position : {'first', 'last'}, default 'last' `first` puts NaNs at the beginning, `last` puts NaNs at the end - sort_remaining : bool + sort_remaining : bool, default True if true and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level diff --git a/pandas/core/series.py b/pandas/core/series.py index e2726bef0bd03..8015670212181 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -62,7 +62,8 @@ axes='index', klass='Series', axes_single_arg="{0, 'index'}", inplace="""inplace : boolean, default False If True, performs operation inplace and returns None.""", - duplicated='Series') + duplicated='Series', + optional_by='') def _coerce_method(converter): diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index ff2159f8b6f40..4d57216c8f870 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -21,75 +21,68 @@ class TestDataFrameSorting(tm.TestCase, TestData): _multiprocess_can_split_ = True - def test_sort_values(self): - # API for 9816 + def test_sort_index(self): + # GH13496 - # sort_index frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - frame.sort(columns='A') - with tm.assert_produces_warning(FutureWarning): - frame.sort() - + # axis=0 : sort rows by index labels unordered = frame.ix[[3, 2, 4, 1]] - expected = unordered.sort_index() - result = unordered.sort_index(axis=0) + expected = frame assert_frame_equal(result, expected) - unordered = frame.ix[:, [2, 1, 3, 0]] - expected = unordered.sort_index(axis=1) + result = unordered.sort_index(ascending=False) + expected = frame[::-1] + assert_frame_equal(result, expected) + # axis=1 : sort columns by column names + unordered = frame.ix[:, [2, 1, 3, 0]] result = unordered.sort_index(axis=1) - assert_frame_equal(result, expected) + assert_frame_equal(result, frame) + + result = unordered.sort_index(axis=1, ascending=False) + expected = frame.ix[:, ::-1] assert_frame_equal(result, expected) - # sortlevel - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + def test_sort_index_multiindex(self): + # GH13496 + + # sort rows by specified level of multi-index + mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) df = DataFrame([[1, 2], [3, 4]], mi) result = df.sort_index(level='A', sort_remaining=False) expected = df.sortlevel('A', sort_remaining=False) assert_frame_equal(result, expected) + # sort columns by specified level of multi-index df = df.T result = df.sort_index(level='A', axis=1, sort_remaining=False) expected = df.sortlevel('A', axis=1, sort_remaining=False) assert_frame_equal(result, expected) - # MI sort, but no by + # MI sort, but no level: sort_level has no effect mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) df = DataFrame([[1, 2], [3, 4]], mi) result = df.sort_index(sort_remaining=False) expected = df.sort_index() assert_frame_equal(result, expected) - def test_sort_index(self): + def test_sort(self): frame = DataFrame(np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=['A', 'B', 'C', 'D']) - # axis=0 - unordered = frame.ix[[3, 2, 4, 1]] - sorted_df = unordered.sort_index(axis=0) - expected = frame - assert_frame_equal(sorted_df, expected) - - sorted_df = unordered.sort_index(ascending=False) - expected = frame[::-1] - assert_frame_equal(sorted_df, expected) - - # axis=1 - unordered = frame.ix[:, ['D', 'B', 'C', 'A']] - sorted_df = unordered.sort_index(axis=1) - expected = frame - assert_frame_equal(sorted_df, expected) + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + frame.sort(columns='A') + with tm.assert_produces_warning(FutureWarning): + frame.sort() - sorted_df = unordered.sort_index(axis=1, ascending=False) - expected = frame.ix[:, ::-1] - assert_frame_equal(sorted_df, expected) + def test_sort_values(self): + frame = DataFrame([[1, 1, 2], [3, 1, 0], [4, 5, 6]], + index=[1, 2, 3], columns=list('ABC')) # by column sorted_df = frame.sort_values(by='A') @@ -109,16 +102,17 @@ def test_sort_index(self): sorted_df = frame.sort_values(by=['A'], ascending=[False]) assert_frame_equal(sorted_df, expected) - # check for now - sorted_df = frame.sort_values(by='A') - assert_frame_equal(sorted_df, expected[::-1]) - expected = frame.sort_values(by='A') + # multiple bys + sorted_df = frame.sort_values(by=['B', 'C']) + expected = frame.loc[[2, 1, 3]] assert_frame_equal(sorted_df, expected) - expected = frame.sort_values(by=['A', 'B'], ascending=False) - sorted_df = frame.sort_values(by=['A', 'B']) + sorted_df = frame.sort_values(by=['B', 'C'], ascending=False) assert_frame_equal(sorted_df, expected[::-1]) + sorted_df = frame.sort_values(by=['B', 'A'], ascending=[True, False]) + assert_frame_equal(sorted_df, expected) + self.assertRaises(ValueError, lambda: frame.sort_values( by=['A', 'B'], axis=2, inplace=True)) @@ -130,6 +124,25 @@ def test_sort_index(self): with assertRaisesRegexp(ValueError, msg): frame.sort_values(by=['A', 'B'], axis=0, ascending=[True] * 5) + def test_sort_values_inplace(self): + frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4], + columns=['A', 'B', 'C', 'D']) + + sorted_df = frame.copy() + sorted_df.sort_values(by='A', inplace=True) + expected = frame.sort_values(by='A') + assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by='A', ascending=False, inplace=True) + expected = frame.sort_values(by='A', ascending=False) + assert_frame_equal(sorted_df, expected) + + sorted_df = frame.copy() + sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True) + expected = frame.sort_values(by=['A', 'B'], ascending=False) + assert_frame_equal(sorted_df, expected) + def test_sort_index_categorical_index(self): df = (DataFrame({'A': np.arange(6, dtype='int64'), @@ -361,25 +374,6 @@ def test_sort_index_different_sortorder(self): result = idf['C'].sort_index(ascending=[1, 0]) assert_series_equal(result, expected['C']) - def test_sort_inplace(self): - frame = DataFrame(np.random.randn(4, 4), index=[1, 2, 3, 4], - columns=['A', 'B', 'C', 'D']) - - sorted_df = frame.copy() - sorted_df.sort_values(by='A', inplace=True) - expected = frame.sort_values(by='A') - assert_frame_equal(sorted_df, expected) - - sorted_df = frame.copy() - sorted_df.sort_values(by='A', ascending=False, inplace=True) - expected = frame.sort_values(by='A', ascending=False) - assert_frame_equal(sorted_df, expected) - - sorted_df = frame.copy() - sorted_df.sort_values(by=['A', 'B'], ascending=False, inplace=True) - expected = frame.sort_values(by=['A', 'B'], ascending=False) - assert_frame_equal(sorted_df, expected) - def test_sort_index_duplicates(self): # with 9816, these are all translated to .sort_values diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 0dbff0a028619..d9e2d8096c8d7 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -5,7 +5,6 @@ from distutils.version import LooseVersion import nose -import random from numpy import nan import numpy as np @@ -1418,141 +1417,6 @@ def test_is_monotonic(self): self.assertFalse(s.is_monotonic) self.assertTrue(s.is_monotonic_decreasing) - def test_sort_values(self): - - ts = self.ts.copy() - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - ts.sort() - - self.assert_series_equal(ts, self.ts.sort_values()) - self.assert_index_equal(ts.index, self.ts.sort_values().index) - - ts.sort_values(ascending=False, inplace=True) - self.assert_series_equal(ts, self.ts.sort_values(ascending=False)) - self.assert_index_equal(ts.index, - self.ts.sort_values(ascending=False).index) - - # GH 5856/5853 - # Series.sort_values operating on a view - df = DataFrame(np.random.randn(10, 4)) - s = df.iloc[:, 0] - - def f(): - s.sort_values(inplace=True) - - self.assertRaises(ValueError, f) - - # test order/sort inplace - # GH6859 - ts1 = self.ts.copy() - ts1.sort_values(ascending=False, inplace=True) - ts2 = self.ts.copy() - ts2.sort_values(ascending=False, inplace=True) - assert_series_equal(ts1, ts2) - - ts1 = self.ts.copy() - ts1 = ts1.sort_values(ascending=False, inplace=False) - ts2 = self.ts.copy() - ts2 = ts.sort_values(ascending=False) - assert_series_equal(ts1, ts2) - - def test_sort_index(self): - rindex = list(self.ts.index) - random.shuffle(rindex) - - random_order = self.ts.reindex(rindex) - sorted_series = random_order.sort_index() - assert_series_equal(sorted_series, self.ts) - - # descending - sorted_series = random_order.sort_index(ascending=False) - assert_series_equal(sorted_series, - self.ts.reindex(self.ts.index[::-1])) - - def test_sort_index_inplace(self): - - # For #11402 - rindex = list(self.ts.index) - random.shuffle(rindex) - - # descending - random_order = self.ts.reindex(rindex) - result = random_order.sort_index(ascending=False, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) - - # ascending - random_order = self.ts.reindex(rindex) - result = random_order.sort_index(ascending=True, inplace=True) - self.assertIs(result, None, - msg='sort_index() inplace should return None') - assert_series_equal(random_order, self.ts) - - def test_sort_API(self): - - # API for 9816 - - # sortlevel - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - s = Series([1, 2], mi) - backwards = s.iloc[[1, 0]] - - res = s.sort_index(level='A') - assert_series_equal(backwards, res) - - # sort_index - rindex = list(self.ts.index) - random.shuffle(rindex) - - random_order = self.ts.reindex(rindex) - sorted_series = random_order.sort_index(level=0) - assert_series_equal(sorted_series, self.ts) - - # compat on axis - sorted_series = random_order.sort_index(axis=0) - assert_series_equal(sorted_series, self.ts) - - self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) - - sorted_series = random_order.sort_index(level=0, axis=0) - assert_series_equal(sorted_series, self.ts) - - self.assertRaises(ValueError, - lambda: random_order.sort_index(level=0, axis=1)) - - def test_order(self): - - # 9816 deprecated - with tm.assert_produces_warning(FutureWarning): - self.ts.order() - - ts = self.ts.copy() - ts[:5] = np.NaN - vals = ts.values - - result = ts.sort_values() - self.assertTrue(np.isnan(result[-5:]).all()) - self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) - - result = ts.sort_values(na_position='first') - self.assertTrue(np.isnan(result[:5]).all()) - self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) - - # something object-type - ser = Series(['A', 'B'], [1, 2]) - # no failure - ser.sort_values() - - # ascending=False - ordered = ts.sort_values(ascending=False) - expected = np.sort(ts.valid().values)[::-1] - assert_almost_equal(expected, ordered.valid().values) - ordered = ts.sort_values(ascending=False, na_position='first') - assert_almost_equal(expected, ordered.valid().values) - def test_nsmallest_nlargest(self): # float, int, datetime64 (use i8), timedelts64 (same), # object that are numbers, object that are strings diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py new file mode 100644 index 0000000000000..826201adbdb50 --- /dev/null +++ b/pandas/tests/series/test_sorting.py @@ -0,0 +1,146 @@ +# coding=utf-8 + +import numpy as np +import random + +from pandas import (DataFrame, Series, MultiIndex) + +from pandas.util.testing import (assert_series_equal, assert_almost_equal) +import pandas.util.testing as tm + +from .common import TestData + + +class TestSeriesSorting(TestData, tm.TestCase): + + _multiprocess_can_split_ = True + + def test_sort(self): + + ts = self.ts.copy() + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + ts.sort() # sorts inplace + self.assert_series_equal(ts, self.ts.sort_values()) + + def test_order(self): + + # 9816 deprecated + with tm.assert_produces_warning(FutureWarning): + result = self.ts.order() + self.assert_series_equal(result, self.ts.sort_values()) + + def test_sort_values(self): + + # check indexes are reordered corresponding with the values + ser = Series([3, 2, 4, 1], ['A', 'B', 'C', 'D']) + expected = Series([1, 2, 3, 4], ['D', 'B', 'A', 'C']) + result = ser.sort_values() + self.assert_series_equal(expected, result) + + ts = self.ts.copy() + ts[:5] = np.NaN + vals = ts.values + + result = ts.sort_values() + self.assertTrue(np.isnan(result[-5:]).all()) + self.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:])) + + # na_position + result = ts.sort_values(na_position='first') + self.assertTrue(np.isnan(result[:5]).all()) + self.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:])) + + # something object-type + ser = Series(['A', 'B'], [1, 2]) + # no failure + ser.sort_values() + + # ascending=False + ordered = ts.sort_values(ascending=False) + expected = np.sort(ts.valid().values)[::-1] + assert_almost_equal(expected, ordered.valid().values) + ordered = ts.sort_values(ascending=False, na_position='first') + assert_almost_equal(expected, ordered.valid().values) + + # inplace=True + ts = self.ts.copy() + ts.sort_values(ascending=False, inplace=True) + self.assert_series_equal(ts, self.ts.sort_values(ascending=False)) + self.assert_index_equal(ts.index, + self.ts.sort_values(ascending=False).index) + + # GH 5856/5853 + # Series.sort_values operating on a view + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0] + + def f(): + s.sort_values(inplace=True) + + self.assertRaises(ValueError, f) + + def test_sort_index(self): + rindex = list(self.ts.index) + random.shuffle(rindex) + + random_order = self.ts.reindex(rindex) + sorted_series = random_order.sort_index() + assert_series_equal(sorted_series, self.ts) + + # descending + sorted_series = random_order.sort_index(ascending=False) + assert_series_equal(sorted_series, + self.ts.reindex(self.ts.index[::-1])) + + # compat on level + sorted_series = random_order.sort_index(level=0) + assert_series_equal(sorted_series, self.ts) + + # compat on axis + sorted_series = random_order.sort_index(axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) + + sorted_series = random_order.sort_index(level=0, axis=0) + assert_series_equal(sorted_series, self.ts) + + self.assertRaises(ValueError, + lambda: random_order.sort_index(level=0, axis=1)) + + def test_sort_index_inplace(self): + + # For #11402 + rindex = list(self.ts.index) + random.shuffle(rindex) + + # descending + random_order = self.ts.reindex(rindex) + result = random_order.sort_index(ascending=False, inplace=True) + self.assertIs(result, None, + msg='sort_index() inplace should return None') + assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) + + # ascending + random_order = self.ts.reindex(rindex) + result = random_order.sort_index(ascending=True, inplace=True) + self.assertIs(result, None, + msg='sort_index() inplace should return None') + assert_series_equal(random_order, self.ts) + + def test_sort_index_multiindex(self): + + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) + s = Series([1, 2], mi) + backwards = s.iloc[[1, 0]] + + # implicit sort_remaining=True + res = s.sort_index(level='A') + assert_series_equal(backwards, res) + + # GH13496 + # rows share same level='A': sort has no effect without remaining lvls + res = s.sort_index(level='A', sort_remaining=False) + assert_series_equal(s, res) From 8dbc0f49ccd8617c1ac5c2daf38b55db4335efa1 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 12 Jul 2016 09:07:41 +0200 Subject: [PATCH 19/44] DOC: asfreq clarify original NaNs are not filled (GH9963) (#13617) --- pandas/core/generic.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1aadc50b76f95..b4bcae47cbbdf 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3916,16 +3916,20 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, def asfreq(self, freq, method=None, how=None, normalize=False): """ - Convert all TimeSeries inside to specified frequency using DateOffset - objects. Optionally provide fill method to pad/backfill missing values. + Convert TimeSeries to specified frequency. + + Optionally provide filling method to pad/backfill missing values. Parameters ---------- freq : DateOffset object, or string - method : {'backfill', 'bfill', 'pad', 'ffill', None} - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill method + method : {'backfill'/'bfill', 'pad'/'ffill'}, default None + Method to use for filling holes in reindexed Series (note this + does not fill NaNs that already were present): + + * 'pad' / 'ffill': propagate last valid observation forward to next + valid + * 'backfill' / 'bfill': use NEXT valid observation to fill how : {'start', 'end'}, default end For PeriodIndex only, see PeriodIndex.asfreq normalize : bool, default False From 93b7d1319731304f388717f2651f3a278749c517 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Tue, 12 Jul 2016 06:51:03 -0400 Subject: [PATCH 20/44] BUG: Invalid Timedelta op may raise ValueError Author: sinhrks Closes #13624 from sinhrks/timedelta_comp and squashes the following commits: 856df95 [sinhrks] BUG: Invalid Timedelta op may raise ValueError --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/tseries/tdi.py | 10 +++++--- pandas/tseries/tests/test_timedeltas.py | 32 +++++++++++++++++++++++++ pandas/tseries/timedeltas.py | 4 ++-- pandas/tslib.pyx | 9 ++++++- 5 files changed, 50 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 4cc16aac15f8b..8661d87a617ba 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -543,7 +543,7 @@ Bug Fixes - Bug in ``.to_html``, ``.to_latex`` and ``.to_string`` silently ignore custom datetime formatter passed through the ``formatters`` key word (:issue:`10690`) - Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) - +- Bug in invalid ``Timedelta`` arithmetic and comparison may raise ``ValueError`` rather than ``TypeError`` (:issue:`13624`) - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) - Bug in ``groupby`` with ``as_index=False`` returns all NaN's when grouping on multiple columns including a categorical one (:issue:`13204`) diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index af4c46e2d16fa..dbc0078b67ae7 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -35,16 +35,20 @@ def _td_index_cmp(opname, nat_result=False): """ def wrapper(self, other): + msg = "cannot compare a TimedeltaIndex with type {0}" func = getattr(super(TimedeltaIndex, self), opname) if _is_convertible_to_td(other) or other is tslib.NaT: - other = _to_m8(other) + try: + other = _to_m8(other) + except ValueError: + # failed to parse as timedelta + raise TypeError(msg.format(type(other))) result = func(other) if com.isnull(other): result.fill(nat_result) else: if not com.is_list_like(other): - raise TypeError("cannot compare a TimedeltaIndex with type " - "{0}".format(type(other))) + raise TypeError(msg.format(type(other))) other = TimedeltaIndex(other).values result = func(other) diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index c3bd62849bf82..4f985998d5e20 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -472,6 +472,21 @@ class Other: self.assertTrue(td.__mul__(other) is NotImplemented) self.assertTrue(td.__floordiv__(td) is NotImplemented) + def test_ops_error_str(self): + # GH 13624 + td = Timedelta('1 day') + + for l, r in [(td, 'a'), ('a', td)]: + + with tm.assertRaises(TypeError): + l + r + + with tm.assertRaises(TypeError): + l > r + + self.assertFalse(l == r) + self.assertTrue(l != r) + def test_fields(self): def check(value): # that we are int/long like @@ -1432,6 +1447,23 @@ def test_comparisons_nat(self): expected = np.array([True, True, True, True, True, False]) self.assert_numpy_array_equal(result, expected) + def test_ops_error_str(self): + # GH 13624 + tdi = TimedeltaIndex(['1 day', '2 days']) + + for l, r in [(tdi, 'a'), ('a', tdi)]: + with tm.assertRaises(TypeError): + l + r + + with tm.assertRaises(TypeError): + l > r + + with tm.assertRaises(TypeError): + l == r + + with tm.assertRaises(TypeError): + l != r + def test_map(self): rng = timedelta_range('1 day', periods=10) diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 7ff5d7adcaa35..5a28218500858 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -74,8 +74,8 @@ def _convert_listlike(arg, box, unit, name=None): value = arg.astype('timedelta64[{0}]'.format( unit)).astype('timedelta64[ns]', copy=False) else: - value = tslib.array_to_timedelta64( - _ensure_object(arg), unit=unit, errors=errors) + value = tslib.array_to_timedelta64(_ensure_object(arg), + unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) if box: diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index fe4de11864522..650b4c7979d8d 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -2912,10 +2912,17 @@ class Timedelta(_Timedelta): if not self._validate_ops_compat(other): return NotImplemented - other = Timedelta(other) if other is NaT: return NaT + + try: + other = Timedelta(other) + except ValueError: + # failed to parse as timedelta + return NotImplemented + return Timedelta(op(self.value, other.value), unit='ns') + f.__name__ = name return f From dbd53306e4a1c091cf41426d1297648b042c771c Mon Sep 17 00:00:00 2001 From: sinhrks Date: Tue, 12 Jul 2016 06:52:54 -0400 Subject: [PATCH 21/44] CLN: Cleanup ops.py Author: sinhrks Closes #13605 from sinhrks/ops_cln2 and squashes the following commits: 729997b [sinhrks] CLN: Cleanup ops.py --- pandas/core/ops.py | 365 +++++++++++++------------- pandas/tests/frame/test_operators.py | 66 ++++- pandas/tests/series/test_operators.py | 74 +++--- 3 files changed, 284 insertions(+), 221 deletions(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 0af7b6d80ce0e..3aaca1eea486e 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -20,7 +20,6 @@ from pandas.compat import bind_method import pandas.core.missing as missing import pandas.algos as _algos -import pandas.core.algorithms as algos from pandas.core.common import (is_list_like, notnull, isnull, _values_from_object, _maybe_match_name, needs_i8_conversion, is_datetimelike_v_numeric, @@ -258,30 +257,87 @@ def add_flex_arithmetic_methods(cls, flex_arith_method, exclude=exclude) -class _TimeOp(object): +class _Op(object): + """ - Wrapper around Series datetime/time/timedelta arithmetic operations. - Generally, you should use classmethod ``maybe_convert_for_time_op`` as an - entry point. + Wrapper around Series arithmetic operations. + Generally, you should use classmethod ``_Op.get_op`` as an entry point. + + This validates and coerces lhs and rhs depending on its dtype and + based on op. See _TimeOp also. + + Parameters + ---------- + left : Series + lhs of op + right : object + rhs of op + name : str + name of op + na_op : callable + a function which wraps op """ - fill_value = iNaT + + fill_value = np.nan wrap_results = staticmethod(lambda x: x) dtype = None def __init__(self, left, right, name, na_op): + self.left = left + self.right = right + + self.name = name + self.na_op = na_op + + self.lvalues = left + self.rvalues = right + + @classmethod + def get_op(cls, left, right, name, na_op): + """ + Get op dispatcher, returns _Op or _TimeOp. + + If ``left`` and ``right`` are appropriate for datetime arithmetic with + operation ``name``, processes them and returns a ``_TimeOp`` object + that stores all the required values. Otherwise, it will generate + either a ``_Op``, indicating that the operation is performed via + normal numpy path. + """ + is_timedelta_lhs = is_timedelta64_dtype(left) + is_datetime_lhs = (is_datetime64_dtype(left) or + is_datetime64tz_dtype(left)) - # need to make sure that we are aligning the data if isinstance(left, ABCSeries) and isinstance(right, ABCSeries): - left, right = left.align(right, copy=False) + # avoid repated alignment + if not left.index.equals(right.index): + left, right = left.align(right, copy=False) + + index, lidx, ridx = left.index.join(right.index, how='outer', + return_indexers=True) + # if DatetimeIndex have different tz, convert to UTC + left.index = index + right.index = index + + if not (is_datetime_lhs or is_timedelta_lhs): + return _Op(left, right, name, na_op) + else: + return _TimeOp(left, right, name, na_op) + + +class _TimeOp(_Op): + """ + Wrapper around Series datetime/time/timedelta arithmetic operations. + Generally, you should use classmethod ``_Op.get_op`` as an entry point. + """ + fill_value = iNaT + + def __init__(self, left, right, name, na_op): + super(_TimeOp, self).__init__(left, right, name, na_op) lvalues = self._convert_to_array(left, name=name) rvalues = self._convert_to_array(right, name=name, other=lvalues) - self.name = name - self.na_op = na_op - # left - self.left = left self.is_offset_lhs = self._is_offset(left) self.is_timedelta_lhs = is_timedelta64_dtype(lvalues) self.is_datetime64_lhs = is_datetime64_dtype(lvalues) @@ -292,7 +348,6 @@ def __init__(self, left, right, name, na_op): self.is_floating_lhs = left.dtype.kind == 'f' # right - self.right = right self.is_offset_rhs = self._is_offset(right) self.is_datetime64_rhs = is_datetime64_dtype(rvalues) self.is_datetime64tz_rhs = is_datetime64tz_dtype(rvalues) @@ -543,26 +598,6 @@ def _is_offset(self, arr_or_obj): else: return False - @classmethod - def maybe_convert_for_time_op(cls, left, right, name, na_op): - """ - if ``left`` and ``right`` are appropriate for datetime arithmetic with - operation ``name``, processes them and returns a ``_TimeOp`` object - that stores all the required values. Otherwise, it will generate - either a ``NotImplementedError`` or ``None``, indicating that the - operation is unsupported for datetimes (e.g., an unsupported r_op) or - that the data is not the right type for time ops. - """ - # decide if we can do it - is_timedelta_lhs = is_timedelta64_dtype(left) - is_datetime_lhs = (is_datetime64_dtype(left) or - is_datetime64tz_dtype(left)) - - if not (is_datetime_lhs or is_timedelta_lhs): - return None - - return cls(left, right, name, na_op) - def _arith_method_SERIES(op, name, str_rep, fill_zeros=None, default_axis=None, **eval_kwargs): @@ -615,53 +650,28 @@ def wrapper(left, right, name=name, na_op=na_op): if isinstance(right, pd.DataFrame): return NotImplemented - time_converted = _TimeOp.maybe_convert_for_time_op(left, right, name, - na_op) + converted = _Op.get_op(left, right, name, na_op) - if time_converted is None: - lvalues, rvalues = left, right - dtype = None - wrap_results = lambda x: x - elif time_converted is NotImplemented: - return NotImplemented - else: - left, right = time_converted.left, time_converted.right - lvalues, rvalues = time_converted.lvalues, time_converted.rvalues - dtype = time_converted.dtype - wrap_results = time_converted.wrap_results - na_op = time_converted.na_op + left, right = converted.left, converted.right + lvalues, rvalues = converted.lvalues, converted.rvalues + dtype = converted.dtype + wrap_results = converted.wrap_results + na_op = converted.na_op if isinstance(rvalues, ABCSeries): - rindex = getattr(rvalues, 'index', rvalues) name = _maybe_match_name(left, rvalues) lvalues = getattr(lvalues, 'values', lvalues) rvalues = getattr(rvalues, 'values', rvalues) - if left.index.equals(rindex): - index = left.index - else: - index, lidx, ridx = left.index.join(rindex, how='outer', - return_indexers=True) - - if lidx is not None: - lvalues = algos.take_1d(lvalues, lidx) - - if ridx is not None: - rvalues = algos.take_1d(rvalues, ridx) - - result = wrap_results(safe_na_op(lvalues, rvalues)) - return left._constructor(result, index=index, - name=name, dtype=dtype) + # _Op aligns left and right else: - # scalars + name = left.name if (hasattr(lvalues, 'values') and not isinstance(lvalues, pd.DatetimeIndex)): lvalues = lvalues.values - result = wrap_results(safe_na_op(lvalues, rvalues)) - return left._constructor(result, - index=left.index, name=left.name, - dtype=dtype) - + result = wrap_results(safe_na_op(lvalues, rvalues)) + return left._constructor(result, index=left.index, + name=name, dtype=dtype) return wrapper @@ -895,6 +905,32 @@ def wrapper(self, other): _op_descriptions[reverse_op]['reverse'] = k +_flex_doc_SERIES = """ +%s of series and other, element-wise (binary operator `%s`). + +Equivalent to ``%s``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other: Series or scalar value +fill_value : None or float value, default None (NaN) + Fill missing (NaN) values with this value. If both Series are + missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Returns +------- +result : Series + +See also +-------- +Series.%s +""" + + def _flex_method_SERIES(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs): op_name = name.replace('__', '') @@ -904,30 +940,8 @@ def _flex_method_SERIES(op, name, str_rep, default_axis=None, fill_zeros=None, else: equiv = 'series ' + op_desc['op'] + ' other' - doc = """ - %s of series and other, element-wise (binary operator `%s`). - - Equivalent to ``%s``, but with support to substitute a fill_value for - missing data in one of the inputs. - - Parameters - ---------- - other: Series or scalar value - fill_value : None or float value, default None (NaN) - Fill missing (NaN) values with this value. If both Series are - missing, the result will be missing - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - - Returns - ------- - result : Series - - See also - -------- - Series.%s - """ % (op_desc['desc'], op_name, equiv, op_desc['reverse']) + doc = _flex_doc_SERIES % (op_desc['desc'], op_name, equiv, + op_desc['reverse']) @Appender(doc) def flex_wrapper(self, other, level=None, fill_value=None, axis=0): @@ -983,6 +997,75 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): result : DataFrame """ +_flex_doc_FRAME = """ +%s of dataframe and other, element-wise (binary operator `%s`). + +Equivalent to ``%s``, but with support to substitute a fill_value for +missing data in one of the inputs. + +Parameters +---------- +other : Series, DataFrame, or constant +axis : {0, 1, 'index', 'columns'} + For Series input, axis to match Series index on +fill_value : None or float value, default None + Fill missing (NaN) values with this value. If both DataFrame + locations are missing, the result will be missing +level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level + +Notes +----- +Mismatched indices will be unioned together + +Returns +------- +result : DataFrame + +See also +-------- +DataFrame.%s +""" + + +def _align_method_FRAME(left, right, axis): + """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ + + def to_series(right): + msg = 'Unable to coerce to Series, length must be {0}: given {1}' + if axis is not None and left._get_axis_name(axis) == 'index': + if len(left.index) != len(right): + raise ValueError(msg.format(len(left.index), len(right))) + right = left._constructor_sliced(right, index=left.index) + else: + if len(left.columns) != len(right): + raise ValueError(msg.format(len(left.columns), len(right))) + right = left._constructor_sliced(right, index=left.columns) + return right + + if isinstance(right, (list, tuple)): + right = to_series(right) + + elif isinstance(right, np.ndarray) and right.ndim: # skips np scalar + + if right.ndim == 1: + right = to_series(right) + + elif right.ndim == 2: + if left.shape != right.shape: + msg = ("Unable to coerce to DataFrame, " + "shape must be {0}: given {1}") + raise ValueError(msg.format(left.shape, right.shape)) + + right = left._constructor(right, index=left.index, + columns=left.columns) + else: + msg = 'Unable to coerce to Series/DataFrame, dim must be <= 2: {0}' + raise ValueError(msg.format(right.shape, )) + + return right + def _arith_method_FRAME(op, name, str_rep=None, default_axis='columns', fill_zeros=None, **eval_kwargs): @@ -1027,75 +1110,20 @@ def na_op(x, y): else: equiv = 'dataframe ' + op_desc['op'] + ' other' - doc = """ - %s of dataframe and other, element-wise (binary operator `%s`). - - Equivalent to ``%s``, but with support to substitute a fill_value for - missing data in one of the inputs. - - Parameters - ---------- - other : Series, DataFrame, or constant - axis : {0, 1, 'index', 'columns'} - For Series input, axis to match Series index on - fill_value : None or float value, default None - Fill missing (NaN) values with this value. If both DataFrame - locations are missing, the result will be missing - level : int or name - Broadcast across a level, matching Index values on the - passed MultiIndex level - - Notes - ----- - Mismatched indices will be unioned together - - Returns - ------- - result : DataFrame - - See also - -------- - DataFrame.%s - """ % (op_desc['desc'], op_name, equiv, op_desc['reverse']) + doc = _flex_doc_FRAME % (op_desc['desc'], op_name, equiv, + op_desc['reverse']) else: doc = _arith_doc_FRAME % name @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): + + other = _align_method_FRAME(self, other, axis) + if isinstance(other, pd.DataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): return self._combine_series(other, na_op, fill_value, axis, level) - elif isinstance(other, (list, tuple)): - if axis is not None and self._get_axis_name(axis) == 'index': - # TODO: Get all of these to use _constructor_sliced - # casted = self._constructor_sliced(other, index=self.index) - casted = pd.Series(other, index=self.index) - else: - # casted = self._constructor_sliced(other, index=self.columns) - casted = pd.Series(other, index=self.columns) - return self._combine_series(casted, na_op, fill_value, axis, level) - elif isinstance(other, np.ndarray) and other.ndim: # skips np scalar - if other.ndim == 1: - if axis is not None and self._get_axis_name(axis) == 'index': - # casted = self._constructor_sliced(other, - # index=self.index) - casted = pd.Series(other, index=self.index) - else: - # casted = self._constructor_sliced(other, - # index=self.columns) - casted = pd.Series(other, index=self.columns) - return self._combine_series(casted, na_op, fill_value, axis, - level) - elif other.ndim == 2: - # casted = self._constructor(other, index=self.index, - # columns=self.columns) - casted = pd.DataFrame(other, index=self.index, - columns=self.columns) - return self._combine_frame(casted, na_op, fill_value, level) - else: - raise ValueError("Incompatible argument shape: %s" % - (other.shape, )) else: if fill_value is not None: self = self.fillna(fill_value) @@ -1135,39 +1163,14 @@ def na_op(x, y): @Appender('Wrapper for flexible comparison methods %s' % name) def f(self, other, axis=default_axis, level=None): + + other = _align_method_FRAME(self, other, axis) + if isinstance(other, pd.DataFrame): # Another DataFrame return self._flex_compare_frame(other, na_op, str_rep, level) elif isinstance(other, ABCSeries): return self._combine_series(other, na_op, None, axis, level) - - elif isinstance(other, (list, tuple)): - if axis is not None and self._get_axis_name(axis) == 'index': - casted = pd.Series(other, index=self.index) - else: - casted = pd.Series(other, index=self.columns) - - return self._combine_series(casted, na_op, None, axis, level) - - elif isinstance(other, np.ndarray): - if other.ndim == 1: - if axis is not None and self._get_axis_name(axis) == 'index': - casted = pd.Series(other, index=self.index) - else: - casted = pd.Series(other, index=self.columns) - - return self._combine_series(casted, na_op, None, axis, level) - - elif other.ndim == 2: - casted = pd.DataFrame(other, index=self.index, - columns=self.columns) - - return self._flex_compare_frame(casted, na_op, str_rep, level) - - else: - raise ValueError("Incompatible argument shape: %s" % - (other.shape, )) - else: return self._combine_const(other, na_op) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index ee7c296f563f0..e2e0f568e4098 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -417,10 +417,11 @@ def test_arith_flex_frame(self): # ndim >= 3 ndim_5 = np.ones(self.frame.shape + (3, 4, 5)) - with assertRaisesRegexp(ValueError, 'shape'): + msg = "Unable to coerce to Series/DataFrame" + with assertRaisesRegexp(ValueError, msg): f(self.frame, ndim_5) - with assertRaisesRegexp(ValueError, 'shape'): + with assertRaisesRegexp(ValueError, msg): getattr(self.frame, op)(ndim_5) # res_add = self.frame.add(self.frame) @@ -581,8 +582,9 @@ def _check_unaligned_frame(meth, op, df, other): # scalar assert_frame_equal(f(0), o(df, 0)) # NAs + msg = "Unable to coerce to Series/DataFrame" assert_frame_equal(f(np.nan), o(df, np.nan)) - with assertRaisesRegexp(ValueError, 'shape'): + with assertRaisesRegexp(ValueError, msg): f(ndim_5) # Series @@ -662,6 +664,17 @@ def _test_seq(df, idx_ser, col_ser): exp = DataFrame({'col': [False, True, False]}) assert_frame_equal(result, exp) + def test_dti_tz_convert_to_utc(self): + base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], tz='UTC') + idx1 = base.tz_convert('Asia/Tokyo')[:2] + idx2 = base.tz_convert('US/Eastern')[1:] + + df1 = DataFrame({'A': [1, 2]}, index=idx1) + df2 = DataFrame({'A': [1, 1]}, index=idx2) + exp = DataFrame({'A': [np.nan, 3, np.nan]}, index=base) + assert_frame_equal(df1 + df2, exp) + def test_arith_flex_series(self): df = self.simple @@ -1176,6 +1189,53 @@ def test_inplace_ops_identity(self): assert_frame_equal(df2, expected) self.assertIs(df._data, df2._data) + def test_alignment_non_pandas(self): + index = ['A', 'B', 'C'] + columns = ['X', 'Y', 'Z'] + df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns) + + align = pd.core.ops._align_method_FRAME + + for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3])]: + + tm.assert_series_equal(align(df, val, 'index'), + Series([1, 2, 3], index=df.index)) + tm.assert_series_equal(align(df, val, 'columns'), + Series([1, 2, 3], index=df.columns)) + + # length mismatch + msg = 'Unable to coerce to Series, length must be 3: given 2' + for val in [[1, 2], (1, 2), np.array([1, 2])]: + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'index') + + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'columns') + + val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + tm.assert_frame_equal(align(df, val, 'index'), + DataFrame(val, index=df.index, + columns=df.columns)) + tm.assert_frame_equal(align(df, val, 'columns'), + DataFrame(val, index=df.index, + columns=df.columns)) + + # shape mismatch + msg = 'Unable to coerce to DataFrame, shape must be' + val = np.array([[1, 2, 3], [4, 5, 6]]) + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'index') + + with tm.assertRaisesRegexp(ValueError, msg): + align(df, val, 'columns') + + val = np.zeros((3, 3, 3)) + with tm.assertRaises(ValueError): + align(df, val, 'index') + with tm.assertRaises(ValueError): + align(df, val, 'columns') + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 9c401e9ce6da8..5ebe528ff8cab 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -571,11 +571,11 @@ def run_ops(ops, get_ser, test_ser): td2 / td1 # ## datetime64 ### - dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), Timestamp( - '20120103')]) + dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')]) dt1.iloc[2] = np.nan - dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), Timestamp( - '20120104')]) + dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), + Timestamp('20120104')]) ops = ['__add__', '__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__radd__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] @@ -607,9 +607,10 @@ def run_ops(ops, get_ser, test_ser): ops = ['__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] - dt1 = Series( - date_range('2000-01-01 09:00:00', periods=5, - tz='US/Eastern'), name='foo') + + tz = 'US/Eastern' + dt1 = Series(date_range('2000-01-01 09:00:00', periods=5, + tz=tz), name='foo') dt2 = dt1.copy() dt2.iloc[2] = np.nan td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H')) @@ -618,58 +619,48 @@ def run_ops(ops, get_ser, test_ser): run_ops(ops, dt1, td1) result = dt1 + td1[0] - expected = ( - dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt2 + td2[0] - expected = ( - dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) # odd numpy behavior with scalar timedeltas if not _np_version_under1p8: result = td1[0] + dt1 - expected = ( - dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = td2[0] + dt2 - expected = ( - dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1[0] - expected = ( - dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) self.assertRaises(TypeError, lambda: td1[0] - dt1) result = dt2 - td2[0] - expected = ( - dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz) + assert_series_equal(result, exp) self.assertRaises(TypeError, lambda: td2[0] - dt2) result = dt1 + td1 - expected = ( - dt1.dt.tz_localize(None) + td1).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt2 + td2 - expected = ( - dt2.dt.tz_localize(None) + td2).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) + td2).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt1 - td1 - expected = ( - dt1.dt.tz_localize(None) - td1).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt1.dt.tz_localize(None) - td1).dt.tz_localize(tz) + assert_series_equal(result, exp) result = dt2 - td2 - expected = ( - dt2.dt.tz_localize(None) - td2).dt.tz_localize('US/Eastern') - assert_series_equal(result, expected) + exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz) + assert_series_equal(result, exp) self.assertRaises(TypeError, lambda: td1 - dt1) self.assertRaises(TypeError, lambda: td2 - dt2) @@ -1555,3 +1546,12 @@ def test_datetime64_with_index(self): df['expected'] = df['date'] - df.index.to_series() df['result'] = df['date'] - df.index assert_series_equal(df['result'], df['expected'], check_names=False) + + def test_dti_tz_convert_to_utc(self): + base = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='UTC') + idx1 = base.tz_convert('Asia/Tokyo')[:2] + idx2 = base.tz_convert('US/Eastern')[1:] + + res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) + assert_series_equal(res, Series([np.nan, 3, np.nan], index=base)) From 7c357d20f6cd0f379790c200e91075a179ebab75 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Tue, 12 Jul 2016 13:19:49 -0400 Subject: [PATCH 22/44] CLN: Removed outtype in DataFrame.to_dict (#13627) Follows up from #8486 in 0.15.0 by removing outtype in DataFrame.to_dict() --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/core/frame.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 8661d87a617ba..f457b8d4bd1f6 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -443,6 +443,7 @@ Removal of prior version deprecations/changes ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`) +- ``DataFrame.to_dict()`` has dropped the ``outtype`` parameter in favor of ``orient`` (:issue:`13627`, :issue:`8486`) .. _whatsnew_0190.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4509c999a5da..e01fc6dca6be3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -818,7 +818,6 @@ def from_dict(cls, data, orient='columns', dtype=None): return cls(data, index=index, columns=columns, dtype=dtype) - @deprecate_kwarg(old_arg_name='outtype', new_arg_name='orient') def to_dict(self, orient='dict'): """Convert DataFrame to dictionary. From 27d29158780bc7127bd944fc41eed3b74f38870b Mon Sep 17 00:00:00 2001 From: yui-knk Date: Tue, 12 Jul 2016 22:14:09 -0400 Subject: [PATCH 23/44] CLN: Fix compile time warnings This commit suppresses these warnings warning: comparison of constant -1 with expression\ of type 'PANDAS_DATETIMEUNIT' is always true\ [-Wtautological-constant-out-of-range-compare] Author: yui-knk Closes #13607 from yui-knk/fix_c_warning and squashes the following commits: e9eee1d [yui-knk] CLN: Fix compile time warnings --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/src/datetime/np_datetime_strings.c | 28 ++++------------------- pandas/src/ujson/python/objToJSON.c | 2 +- 3 files changed, 6 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index f457b8d4bd1f6..fb09f99f2a7fe 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -536,6 +536,7 @@ Bug Fixes - Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`) - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`) - Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`) +- Clean some compile time warnings in datetime parsing (:issue:`13607`) - Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`) diff --git a/pandas/src/datetime/np_datetime_strings.c b/pandas/src/datetime/np_datetime_strings.c index 3a1d37f86cc28..b633d6cde0820 100644 --- a/pandas/src/datetime/np_datetime_strings.c +++ b/pandas/src/datetime/np_datetime_strings.c @@ -460,7 +460,7 @@ parse_iso_8601_datetime(char *str, int len, } /* Check the casting rule */ - if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, + if (!can_cast_datetime64_units(bestunit, unit, casting)) { PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit " "'%s' using casting rule %s", @@ -503,7 +503,7 @@ parse_iso_8601_datetime(char *str, int len, } /* Check the casting rule */ - if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, + if (!can_cast_datetime64_units(bestunit, unit, casting)) { PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit " "'%s' using casting rule %s", @@ -975,7 +975,7 @@ parse_iso_8601_datetime(char *str, int len, } /* Check the casting rule */ - if (unit != -1 && !can_cast_datetime64_units(bestunit, unit, + if (!can_cast_datetime64_units(bestunit, unit, casting)) { PyErr_Format(PyExc_TypeError, "Cannot parse \"%s\" as unit " "'%s' using casting rule %s", @@ -1005,11 +1005,6 @@ get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base) { int len = 0; - /* If no unit is provided, return the maximum length */ - if (base == -1) { - return PANDAS_DATETIME_MAX_ISO8601_STRLEN; - } - switch (base) { /* Generic units can only be used to represent NaT */ /*case PANDAS_FR_GENERIC:*/ @@ -1146,28 +1141,13 @@ make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen, local = 0; } - /* Automatically detect a good unit */ - if (base == -1) { - base = lossless_unit_from_datetimestruct(dts); - /* - * If there's a timezone, use at least minutes precision, - * and never split up hours and minutes by default - */ - if ((base < PANDAS_FR_m && local) || base == PANDAS_FR_h) { - base = PANDAS_FR_m; - } - /* Don't split up dates by default */ - else if (base < PANDAS_FR_D) { - base = PANDAS_FR_D; - } - } /* * Print weeks with the same precision as days. * * TODO: Could print weeks with YYYY-Www format if the week * epoch is a Monday. */ - else if (base == PANDAS_FR_W) { + if (base == PANDAS_FR_W) { base = PANDAS_FR_D; } diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 925c18cd23d8f..1080e9548ba56 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -450,7 +450,7 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, si static void *PandasDateTimeStructToJSON(pandas_datetimestruct *dts, JSONTypeContext *tc, void *outValue, size_t *_outLen) { - int base = ((PyObjectEncoder*) tc->encoder)->datetimeUnit; + PANDAS_DATETIMEUNIT base = ((PyObjectEncoder*) tc->encoder)->datetimeUnit; if (((PyObjectEncoder*) tc->encoder)->datetimeIso) { From 06103dd7735335e51fcd77a36b2e8a714286a059 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 13 Jul 2016 12:31:44 +0200 Subject: [PATCH 24/44] Pin IPython for doc build to 4.x (see #13639) --- ci/requirements-2.7_DOC_BUILD.run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7_DOC_BUILD.run b/ci/requirements-2.7_DOC_BUILD.run index b87a41df4191d..a07721c75cf34 100644 --- a/ci/requirements-2.7_DOC_BUILD.run +++ b/ci/requirements-2.7_DOC_BUILD.run @@ -1,4 +1,4 @@ -ipython +ipython=4 ipykernel sphinx nbconvert From 7dd4091458d9117e57d2ad9ce3126855bd00108c Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 13 Jul 2016 07:51:59 -0400 Subject: [PATCH 25/44] CLN: reorg type inference & introspection closes #12503 Author: Jeff Reback Closes #13147 from jreback/types and squashes the following commits: 244649a [Jeff Reback] CLN: reorg type inference & introspection --- ci/lint.sh | 2 +- doc/source/whatsnew/v0.19.0.txt | 22 +- pandas/__init__.py | 2 +- pandas/api/__init__.py | 1 + pandas/api/tests/__init__.py | 0 pandas/api/tests/test_api.py | 213 +++ pandas/api/types/__init__.py | 4 + pandas/compat/numpy/function.py | 3 +- pandas/computation/ops.py | 8 +- pandas/computation/pytables.py | 4 +- pandas/computation/tests/test_eval.py | 19 +- pandas/core/algorithms.py | 145 +- pandas/core/api.py | 2 +- pandas/core/base.py | 31 +- pandas/core/categorical.py | 33 +- pandas/core/common.py | 1656 +------------------ pandas/core/config_init.py | 2 +- pandas/core/convert.py | 127 -- pandas/core/frame.py | 113 +- pandas/core/generic.py | 106 +- pandas/core/groupby.py | 94 +- pandas/core/indexing.py | 49 +- pandas/core/internals.py | 129 +- pandas/core/missing.py | 73 +- pandas/core/nanops.py | 27 +- pandas/core/ops.py | 36 +- pandas/core/panel.py | 26 +- pandas/core/reshape.py | 10 +- pandas/core/series.py | 69 +- pandas/core/strings.py | 25 +- pandas/core/window.py | 41 +- pandas/formats/format.py | 43 +- pandas/formats/printing.py | 4 +- pandas/formats/style.py | 7 +- pandas/indexes/base.py | 79 +- pandas/indexes/category.py | 38 +- pandas/indexes/multi.py | 34 +- pandas/indexes/numeric.py | 22 +- pandas/indexes/range.py | 18 +- pandas/io/common.py | 4 +- pandas/io/data.py | 4 +- pandas/io/excel.py | 26 +- pandas/io/html.py | 4 +- pandas/io/packers.py | 8 +- pandas/io/parsers.py | 44 +- pandas/io/pickle.py | 6 +- pandas/io/pytables.py | 37 +- pandas/io/sql.py | 16 +- pandas/io/stata.py | 16 +- pandas/io/tests/test_sql.py | 15 +- pandas/io/tests/test_stata.py | 2 +- pandas/sparse/array.py | 46 +- pandas/sparse/frame.py | 10 +- pandas/sparse/list.py | 4 +- pandas/sparse/panel.py | 6 +- pandas/sparse/series.py | 8 +- pandas/src/testing.pyx | 12 +- pandas/stats/moments.py | 4 +- pandas/stats/ols.py | 2 +- pandas/tests/frame/test_apply.py | 6 +- pandas/tests/frame/test_constructors.py | 3 +- pandas/tests/frame/test_dtypes.py | 8 +- pandas/tests/frame/test_indexing.py | 16 +- pandas/tests/indexing/test_indexing.py | 17 +- pandas/tests/series/test_constructors.py | 13 +- pandas/tests/series/test_datetime_values.py | 7 +- pandas/tests/series/test_indexing.py | 24 +- pandas/tests/series/test_quantile.py | 6 +- pandas/tests/test_base.py | 8 +- pandas/tests/test_categorical.py | 61 +- pandas/tests/test_common.py | 658 +------- pandas/tests/test_generic.py | 4 +- pandas/tests/test_graphics.py | 6 +- pandas/tests/test_groupby.py | 12 +- pandas/tests/test_infer_and_convert.py | 653 -------- pandas/tests/test_lib.py | 1 + pandas/tests/test_multilevel.py | 5 +- pandas/tests/test_nanops.py | 4 +- pandas/tests/test_panel.py | 5 +- pandas/tests/test_panel4d.py | 4 +- pandas/tests/test_strings.py | 7 +- pandas/tests/types/test_cast.py | 193 +++ pandas/tests/types/test_common.py | 22 + pandas/tests/types/test_dtypes.py | 19 +- pandas/tests/types/test_generic.py | 36 +- pandas/tests/types/test_inference.py | 820 +++++++++ pandas/tests/types/test_io.py | 116 ++ pandas/tests/types/test_missing.py | 243 +++ pandas/tests/types/test_types.py | 40 - pandas/tools/merge.py | 46 +- pandas/tools/pivot.py | 6 +- pandas/tools/plotting.py | 67 +- pandas/tools/tile.py | 14 +- pandas/tools/util.py | 19 +- pandas/tseries/base.py | 44 +- pandas/tseries/common.py | 16 +- pandas/tseries/converter.py | 28 +- pandas/tseries/frequencies.py | 23 +- pandas/tseries/index.py | 43 +- pandas/tseries/offsets.py | 4 +- pandas/tseries/period.py | 62 +- pandas/tseries/tdi.py | 33 +- pandas/tseries/tests/test_bin_groupby.py | 6 +- pandas/tseries/tests/test_period.py | 4 +- pandas/tseries/tests/test_resample.py | 5 +- pandas/tseries/tests/test_timeseries.py | 3 +- pandas/tseries/tests/test_timezones.py | 2 +- pandas/tseries/timedeltas.py | 8 +- pandas/tseries/tools.py | 35 +- pandas/tseries/util.py | 4 +- pandas/types/api.py | 121 +- pandas/types/cast.py | 860 ++++++++++ pandas/types/common.py | 448 +++++ pandas/types/concat.py | 47 +- pandas/types/inference.py | 104 ++ pandas/types/missing.py | 394 +++++ pandas/util/testing.py | 20 +- pandas/util/validators.py | 4 +- 118 files changed, 4944 insertions(+), 4134 deletions(-) create mode 100644 pandas/api/__init__.py create mode 100644 pandas/api/tests/__init__.py create mode 100644 pandas/api/tests/test_api.py create mode 100644 pandas/api/types/__init__.py delete mode 100644 pandas/core/convert.py delete mode 100644 pandas/tests/test_infer_and_convert.py create mode 100644 pandas/tests/types/test_cast.py create mode 100644 pandas/tests/types/test_common.py create mode 100644 pandas/tests/types/test_inference.py create mode 100644 pandas/tests/types/test_io.py create mode 100644 pandas/tests/types/test_missing.py delete mode 100644 pandas/tests/types/test_types.py create mode 100644 pandas/types/cast.py create mode 100644 pandas/types/common.py create mode 100644 pandas/types/inference.py create mode 100644 pandas/types/missing.py diff --git a/ci/lint.sh b/ci/lint.sh index a4c960084040f..9f582f72fcdd7 100755 --- a/ci/lint.sh +++ b/ci/lint.sh @@ -8,7 +8,7 @@ RET=0 if [ "$LINT" ]; then echo "Linting" - for path in 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util' + for path in 'api' 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util' do echo "linting -> pandas/$path" flake8 pandas/$path --filename '*.py' diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index fb09f99f2a7fe..bef02a06135de 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -10,6 +10,7 @@ users upgrade to this version. Highlights include: - :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` +- pandas development api, see :ref:`here ` .. contents:: What's new in v0.18.2 :local: @@ -20,6 +21,25 @@ Highlights include: New features ~~~~~~~~~~~~ +.. _whatsnew_0190.dev_api: + +pandas development API +^^^^^^^^^^^^^^^^^^^^^^ + +As part of making pandas APi more uniform and accessible in the future, we have created a standard +sub-package of pandas, ``pandas.api`` to hold public API's. We are starting by exposing type +introspection functions in ``pandas.api.types``. More sub-packages and officially sanctioned API's +will be published in future versions of pandas. + +The following are now part of this API: + +.. ipython:: python + + import pprint + from pandas.api import types + funcs = [ f for f in dir(types) if not f.startswith('_') ] + pprint.pprint(funcs) + .. _whatsnew_0190.enhancements.asof_merge: :func:`merge_asof` for asof-style time-series joining @@ -227,7 +247,7 @@ Other enhancements - Consistent with the Python API, ``pd.read_csv()`` will now interpret ``+inf`` as positive infinity (:issue:`13274`) - The ``DataFrame`` constructor will now respect key ordering if a list of ``OrderedDict`` objects are passed in (:issue:`13304`) - ``pd.read_html()`` has gained support for the ``decimal`` option (:issue:`12907`) -- A top-level function :func:`union_categorical` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) +- A function :func:`union_categorical` has been added for combining categoricals, see :ref:`Unioning Categoricals` (:issue:`13361`) - ``Series`` has gained the properties ``.is_monotonic``, ``.is_monotonic_increasing``, ``.is_monotonic_decreasing``, similar to ``Index`` (:issue:`13336`) .. _whatsnew_0190.api: diff --git a/pandas/__init__.py b/pandas/__init__.py index 350898c9925e7..2d91c97144e3c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -16,7 +16,7 @@ if missing_dependencies: raise ImportError("Missing required dependencies {0}".format(missing_dependencies)) - +del hard_dependencies, dependency, missing_dependencies # numpy compat from pandas.compat.numpy import * diff --git a/pandas/api/__init__.py b/pandas/api/__init__.py new file mode 100644 index 0000000000000..fcbf42f6dabc4 --- /dev/null +++ b/pandas/api/__init__.py @@ -0,0 +1 @@ +""" public toolkit API """ diff --git a/pandas/api/tests/__init__.py b/pandas/api/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py new file mode 100644 index 0000000000000..3f6c97441d659 --- /dev/null +++ b/pandas/api/tests/test_api.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +import pandas as pd +from pandas.core import common as com +from pandas import api +from pandas.api import types +from pandas.util import testing as tm + +_multiprocess_can_split_ = True + + +class Base(object): + + def check(self, namespace, expected, ignored=None): + # see which names are in the namespace, minus optional + # ignored ones + # compare vs the expected + + result = sorted([f for f in dir(namespace) if not f.startswith('_')]) + if ignored is not None: + result = sorted(list(set(result) - set(ignored))) + + expected = sorted(expected) + tm.assert_almost_equal(result, expected) + + +class TestPDApi(Base, tm.TestCase): + + # these are optionally imported based on testing + # & need to be ignored + ignored = ['tests', 'rpy', 'sandbox', 'locale'] + + # top-level sub-packages + lib = ['api', 'compat', 'computation', 'core', + 'indexes', 'formats', 'pandas', + 'test', 'tools', 'tseries', + 'types', 'util', 'options', 'io'] + + # top-level packages that are c-imports, should rename to _* + # to avoid naming conflicts + lib_to_rename = ['algos', 'hashtable', 'tslib', 'msgpack', 'sparse', + 'json', 'lib', 'index', 'parser'] + + # these are already deprecated; awaiting removal + deprecated_modules = ['ols', 'stats'] + + # misc + misc = ['IndexSlice', 'NaT'] + + # top-level classes + classes = ['Categorical', 'CategoricalIndex', 'DataFrame', 'DateOffset', + 'DatetimeIndex', 'ExcelFile', 'ExcelWriter', 'Float64Index', + 'Grouper', 'HDFStore', 'Index', 'Int64Index', 'MultiIndex', + 'Period', 'PeriodIndex', 'RangeIndex', + 'Series', 'SparseArray', 'SparseDataFrame', + 'SparseSeries', 'TimeGrouper', 'Timedelta', + 'TimedeltaIndex', 'Timestamp'] + + # these are already deprecated; awaiting removal + deprecated_classes = ['SparsePanel', 'TimeSeries', 'WidePanel', + 'SparseTimeSeries'] + + # these should be deperecated in the future + deprecated_classes_in_future = ['Panel', 'Panel4D', + 'SparseList', 'Term'] + + # these should be removed from top-level namespace + remove_classes_from_top_level_namespace = ['Expr'] + + # external modules exposed in pandas namespace + modules = ['np', 'datetime', 'datetools'] + + # top-level functions + funcs = ['bdate_range', 'concat', 'crosstab', 'cut', + 'date_range', 'eval', + 'factorize', 'get_dummies', 'get_store', + 'infer_freq', 'isnull', 'lreshape', + 'match', 'melt', 'notnull', 'offsets', + 'merge', 'merge_ordered', 'merge_asof', + 'period_range', + 'pivot', 'pivot_table', 'plot_params', 'qcut', + 'scatter_matrix', + 'show_versions', 'timedelta_range', 'unique', + 'value_counts', 'wide_to_long'] + + # top-level option funcs + funcs_option = ['reset_option', 'describe_option', 'get_option', + 'option_context', 'set_option', + 'set_eng_float_format'] + + # top-level read_* funcs + funcs_read = ['read_clipboard', 'read_csv', 'read_excel', 'read_fwf', + 'read_gbq', 'read_hdf', 'read_html', 'read_json', + 'read_msgpack', 'read_pickle', 'read_sas', 'read_sql', + 'read_sql_query', 'read_sql_table', 'read_stata', + 'read_table'] + + # top-level to_* funcs + funcs_to = ['to_datetime', 'to_msgpack', + 'to_numeric', 'to_pickle', 'to_timedelta'] + + # these should be deperecated in the future + deprecated_funcs_in_future = ['pnow', 'groupby', 'info'] + + # these are already deprecated; awaiting removal + deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', + 'ewmvol', 'expanding_apply', 'expanding_corr', + 'expanding_count', 'expanding_cov', 'expanding_kurt', + 'expanding_max', 'expanding_mean', 'expanding_median', + 'expanding_min', 'expanding_quantile', + 'expanding_skew', 'expanding_std', 'expanding_sum', + 'expanding_var', 'fama_macbeth', 'rolling_apply', + 'rolling_corr', 'rolling_count', 'rolling_cov', + 'rolling_kurt', 'rolling_max', 'rolling_mean', + 'rolling_median', 'rolling_min', 'rolling_quantile', + 'rolling_skew', 'rolling_std', 'rolling_sum', + 'rolling_var', 'rolling_window', 'ordered_merge'] + + def test_api(self): + + self.check(pd, + self.lib + self.lib_to_rename + self.misc + + self.modules + self.deprecated_modules + + self.classes + self.deprecated_classes + + self.deprecated_classes_in_future + + self.remove_classes_from_top_level_namespace + + self.funcs + self.funcs_option + + self.funcs_read + self.funcs_to + + self.deprecated_funcs + + self.deprecated_funcs_in_future, + self.ignored) + + +class TestApi(Base, tm.TestCase): + + allowed = ['tests', 'types'] + + def test_api(self): + + self.check(api, self.allowed) + + +class TestTypes(Base, tm.TestCase): + + allowed = ['is_any_int_dtype', 'is_bool', 'is_bool_dtype', + 'is_categorical', 'is_categorical_dtype', 'is_complex', + 'is_complex_dtype', 'is_datetime64_any_dtype', + 'is_datetime64_dtype', 'is_datetime64_ns_dtype', + 'is_datetime64tz_dtype', 'is_datetimetz', 'is_dtype_equal', + 'is_extension_type', 'is_float', 'is_float_dtype', + 'is_floating_dtype', 'is_int64_dtype', 'is_integer', + 'is_integer_dtype', 'is_number', 'is_numeric_dtype', + 'is_object_dtype', 'is_scalar', 'is_sparse', + 'is_string_dtype', 'is_timedelta64_dtype', + 'is_timedelta64_ns_dtype', + 'is_re', 'is_re_compilable', + 'is_dict_like', 'is_iterator', + 'is_list_like', 'is_hashable', + 'is_named_tuple', 'is_sequence', + 'pandas_dtype'] + + def test_types(self): + + self.check(types, self.allowed) + + def check_deprecation(self, fold, fnew): + with tm.assert_produces_warning(FutureWarning): + try: + result = fold('foo') + expected = fnew('foo') + self.assertEqual(result, expected) + except TypeError: + self.assertRaises(TypeError, + lambda: fnew('foo')) + except AttributeError: + self.assertRaises(AttributeError, + lambda: fnew('foo')) + + def test_deprecation_core_common(self): + + # test that we are in fact deprecating + # the pandas.core.common introspectors + for t in self.allowed: + self.check_deprecation(getattr(com, t), getattr(types, t)) + + def test_deprecation_core_common_moved(self): + + # these are in pandas.types.common + l = ['is_datetime_arraylike', + 'is_datetime_or_timedelta_dtype', + 'is_datetimelike', + 'is_datetimelike_v_numeric', + 'is_datetimelike_v_object', + 'is_datetimetz', + 'is_int_or_datetime_dtype', + 'is_period_arraylike', + 'is_string_like', + 'is_string_like_dtype'] + + from pandas.types import common as c + for t in l: + self.check_deprecation(getattr(com, t), getattr(c, t)) + + def test_removed_from_core_common(self): + + for t in ['is_null_datelike_scalar', + 'ensure_float']: + self.assertRaises(AttributeError, lambda: getattr(com, t)) + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/api/types/__init__.py b/pandas/api/types/__init__.py new file mode 100644 index 0000000000000..ee217543f0420 --- /dev/null +++ b/pandas/api/types/__init__.py @@ -0,0 +1,4 @@ +""" public toolkit API """ + +from pandas.types.api import * # noqa +del np # noqa diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index 15bf6d31b7109..adc17c7514832 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -21,7 +21,8 @@ from numpy import ndarray from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) -from pandas.core.common import is_bool, is_integer, UnsupportedFunctionCall +from pandas.core.common import UnsupportedFunctionCall +from pandas.types.common import is_integer, is_bool from pandas.compat import OrderedDict diff --git a/pandas/computation/ops.py b/pandas/computation/ops.py index 7a0743f6b2778..96a04cff9372e 100644 --- a/pandas/computation/ops.py +++ b/pandas/computation/ops.py @@ -7,11 +7,11 @@ import numpy as np +from pandas.types.common import is_list_like, is_scalar import pandas as pd from pandas.compat import PY3, string_types, text_type import pandas.core.common as com from pandas.formats.printing import pprint_thing, pprint_thing_encoded -import pandas.lib as lib from pandas.core.base import StringMixin from pandas.computation.common import _ensure_decoded, _result_type_many from pandas.computation.scope import _DEFAULT_GLOBALS @@ -100,7 +100,7 @@ def update(self, value): @property def isscalar(self): - return lib.isscalar(self._value) + return is_scalar(self._value) @property def type(self): @@ -229,7 +229,7 @@ def _in(x, y): try: return x.isin(y) except AttributeError: - if com.is_list_like(x): + if is_list_like(x): try: return y.isin(x) except AttributeError: @@ -244,7 +244,7 @@ def _not_in(x, y): try: return ~x.isin(y) except AttributeError: - if com.is_list_like(x): + if is_list_like(x): try: return ~y.isin(x) except AttributeError: diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index d6d55d15fec30..e375716b0d606 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -7,6 +7,8 @@ from datetime import datetime, timedelta import numpy as np import pandas as pd + +from pandas.types.common import is_list_like import pandas.core.common as com from pandas.compat import u, string_types, DeepChainMap from pandas.core.base import StringMixin @@ -127,7 +129,7 @@ def pr(left, right): def conform(self, rhs): """ inplace conform rhs """ - if not com.is_list_like(rhs): + if not is_list_like(rhs): rhs = [rhs] if isinstance(rhs, np.ndarray): rhs = rhs.ravel() diff --git a/pandas/computation/tests/test_eval.py b/pandas/computation/tests/test_eval.py index 5019dd392a567..066df0521fef6 100644 --- a/pandas/computation/tests/test_eval.py +++ b/pandas/computation/tests/test_eval.py @@ -13,6 +13,7 @@ from numpy.random import randn, rand, randint import numpy as np +from pandas.types.common import is_list_like, is_scalar import pandas as pd from pandas.core import common as com from pandas import DataFrame, Series, Panel, date_range @@ -200,7 +201,7 @@ def check_complex_cmp_op(self, lhs, cmp1, rhs, binop, cmp2): ex = '(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)'.format(cmp1=cmp1, binop=binop, cmp2=cmp2) - scalar_with_in_notin = (lib.isscalar(rhs) and (cmp1 in skip_these or + scalar_with_in_notin = (is_scalar(rhs) and (cmp1 in skip_these or cmp2 in skip_these)) if scalar_with_in_notin: with tm.assertRaises(TypeError): @@ -253,7 +254,7 @@ def check_operands(left, right, cmp_op): def check_simple_cmp_op(self, lhs, cmp1, rhs): ex = 'lhs {0} rhs'.format(cmp1) - if cmp1 in ('in', 'not in') and not com.is_list_like(rhs): + if cmp1 in ('in', 'not in') and not is_list_like(rhs): self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, parser=self.parser, local_dict={'lhs': lhs, 'rhs': rhs}) @@ -331,7 +332,7 @@ def check_pow(self, lhs, arith1, rhs): expected = self.get_expected_pow_result(lhs, rhs) result = pd.eval(ex, engine=self.engine, parser=self.parser) - if (lib.isscalar(lhs) and lib.isscalar(rhs) and + if (is_scalar(lhs) and is_scalar(rhs) and _is_py3_complex_incompat(result, expected)): self.assertRaises(AssertionError, tm.assert_numpy_array_equal, result, expected) @@ -364,16 +365,16 @@ def check_compound_invert_op(self, lhs, cmp1, rhs): skip_these = 'in', 'not in' ex = '~(lhs {0} rhs)'.format(cmp1) - if lib.isscalar(rhs) and cmp1 in skip_these: + if is_scalar(rhs) and cmp1 in skip_these: self.assertRaises(TypeError, pd.eval, ex, engine=self.engine, parser=self.parser, local_dict={'lhs': lhs, 'rhs': rhs}) else: # compound - if lib.isscalar(lhs) and lib.isscalar(rhs): + if is_scalar(lhs) and is_scalar(rhs): lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs)) expected = _eval_single_bin(lhs, cmp1, rhs, self.engine) - if lib.isscalar(expected): + if is_scalar(expected): expected = not expected else: expected = ~expected @@ -643,17 +644,17 @@ def test_identical(self): x = 1 result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, 1) - self.assertTrue(lib.isscalar(result)) + self.assertTrue(is_scalar(result)) x = 1.5 result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, 1.5) - self.assertTrue(lib.isscalar(result)) + self.assertTrue(is_scalar(result)) x = False result = pd.eval('x', engine=self.engine, parser=self.parser) self.assertEqual(result, False) - self.assertTrue(lib.isscalar(result)) + self.assertTrue(is_scalar(result)) x = np.array([1]) result = pd.eval('x', engine=self.engine, parser=self.parser) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4b40bce79cbb5..c3ba734353a8d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -7,10 +7,31 @@ import numpy as np from pandas import compat, lib, tslib, _np_version_under1p8 +from pandas.types.cast import _maybe_promote +from pandas.types.generic import ABCPeriodIndex, ABCDatetimeIndex +from pandas.types.common import (is_integer_dtype, + is_int64_dtype, + is_categorical_dtype, + is_extension_type, + is_datetimetz, + is_period_arraylike, + is_datetime_or_timedelta_dtype, + is_float_dtype, + needs_i8_conversion, + is_categorical, + is_datetime64_dtype, + is_timedelta64_dtype, + is_scalar, + _ensure_platform_int, + _ensure_object, + _ensure_float64, + _ensure_int64, + is_list_like) +from pandas.types.missing import isnull + import pandas.core.common as com import pandas.algos as algos import pandas.hashtable as htable -from pandas.types import api as gt from pandas.compat import string_types from pandas.tslib import iNaT @@ -105,12 +126,12 @@ def isin(comps, values): boolean array same length as comps """ - if not com.is_list_like(comps): + if not is_list_like(comps): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(comps).__name__)) comps = np.asarray(comps) - if not com.is_list_like(values): + if not is_list_like(values): raise TypeError("only list-like objects are allowed to be passed" " to isin(), you passed a " "[{0}]".format(type(values).__name__)) @@ -126,15 +147,15 @@ def isin(comps, values): f = lambda x, y: lib.ismember_int64(x, set(y)) # may need i8 conversion for proper membership testing - if com.is_datetime64_dtype(comps): + if is_datetime64_dtype(comps): from pandas.tseries.tools import to_datetime values = to_datetime(values)._values.view('i8') comps = comps.view('i8') - elif com.is_timedelta64_dtype(comps): + elif is_timedelta64_dtype(comps): from pandas.tseries.timedeltas import to_timedelta values = to_timedelta(values)._values.view('i8') comps = comps.view('i8') - elif com.is_int64_dtype(comps): + elif is_int64_dtype(comps): pass else: f = lambda x, y: lib.ismember(x, set(values)) @@ -171,20 +192,20 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): vals = np.asarray(values) # localize to UTC - is_datetimetz = com.is_datetimetz(values) - if is_datetimetz: + is_datetimetz_type = is_datetimetz(values) + if is_datetimetz_type: values = DatetimeIndex(values) vals = values.tz_localize(None) - is_datetime = com.is_datetime64_dtype(vals) - is_timedelta = com.is_timedelta64_dtype(vals) + is_datetime = is_datetime64_dtype(vals) + is_timedelta = is_timedelta64_dtype(vals) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) - labels = com._ensure_platform_int(labels) + labels = _ensure_platform_int(labels) uniques = uniques.to_array() @@ -194,7 +215,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): except: # unorderable in py3 if mixed str/int t = hash_klass(len(uniques)) - t.map_locations(com._ensure_object(uniques)) + t.map_locations(_ensure_object(uniques)) # order ints before strings ordered = np.concatenate([ @@ -202,8 +223,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): dtype=object)) for f in [lambda x: not isinstance(x, string_types), lambda x: isinstance(x, string_types)]]) - sorter = com._ensure_platform_int(t.lookup( - com._ensure_object(ordered))) + sorter = _ensure_platform_int(t.lookup( + _ensure_object(ordered))) reverse_indexer = np.empty(len(sorter), dtype=np.int_) reverse_indexer.put(sorter, np.arange(len(sorter))) @@ -214,7 +235,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): uniques = uniques.take(sorter) - if is_datetimetz: + if is_datetimetz_type: # reset tz uniques = DatetimeIndex(uniques.astype('M8[ns]')).tz_localize( @@ -267,7 +288,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, raise TypeError("bins argument only works with numeric data.") values = cat.codes - if com.is_extension_type(values) and not com.is_datetimetz(values): + if is_extension_type(values) and not is_datetimetz(values): # handle Categorical and sparse, # datetime tz can be handeled in ndarray path result = Series(values).values.value_counts(dropna=dropna) @@ -298,9 +319,9 @@ def value_counts(values, sort=True, ascending=False, normalize=False, def _value_counts_arraylike(values, dropna=True): - is_datetimetz = com.is_datetimetz(values) - is_period = (isinstance(values, gt.ABCPeriodIndex) or - com.is_period_arraylike(values)) + is_datetimetz_type = is_datetimetz(values) + is_period = (isinstance(values, ABCPeriodIndex) or + is_period_arraylike(values)) orig = values @@ -308,7 +329,7 @@ def _value_counts_arraylike(values, dropna=True): values = Series(values).values dtype = values.dtype - if com.is_datetime_or_timedelta_dtype(dtype) or is_period: + if is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex @@ -327,8 +348,8 @@ def _value_counts_arraylike(values, dropna=True): keys = keys.astype(dtype) # dtype handling - if is_datetimetz: - if isinstance(orig, gt.ABCDatetimeIndex): + if is_datetimetz_type: + if isinstance(orig, ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz @@ -336,15 +357,15 @@ def _value_counts_arraylike(values, dropna=True): if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) - elif com.is_integer_dtype(dtype): - values = com._ensure_int64(values) + elif is_integer_dtype(dtype): + values = _ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) - elif com.is_float_dtype(dtype): - values = com._ensure_float64(values) + elif is_float_dtype(dtype): + values = _ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: - values = com._ensure_object(values) - mask = com.isnull(values) + values = _ensure_object(values) + mask = isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) @@ -366,8 +387,8 @@ def mode(values): constructor = Series dtype = values.dtype - if com.is_integer_dtype(values): - values = com._ensure_int64(values) + if is_integer_dtype(values): + values = _ensure_int64(values) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) elif issubclass(values.dtype.type, (np.datetime64, np.timedelta64)): @@ -375,11 +396,11 @@ def mode(values): values = values.view(np.int64) result = constructor(sorted(htable.mode_int64(values)), dtype=dtype) - elif com.is_categorical_dtype(values): + elif is_categorical_dtype(values): result = constructor(values.mode()) else: - mask = com.isnull(values) - values = com._ensure_object(values) + mask = isnull(values) + values = _ensure_object(values) res = htable.mode_object(values, mask) try: res = sorted(res) @@ -459,7 +480,7 @@ def quantile(x, q, interpolation_method='fraction'): """ x = np.asarray(x) - mask = com.isnull(x) + mask = isnull(x) x = x[~mask] @@ -486,7 +507,7 @@ def _get_score(at): return score - if lib.isscalar(q): + if is_scalar(q): return _get_score(q) else: q = np.asarray(q, np.float64) @@ -593,18 +614,18 @@ def _hashtable_algo(f, dtype, return_dtype=None): """ f(HashTable, type_caster) -> result """ - if com.is_float_dtype(dtype): - return f(htable.Float64HashTable, com._ensure_float64) - elif com.is_integer_dtype(dtype): - return f(htable.Int64HashTable, com._ensure_int64) - elif com.is_datetime64_dtype(dtype): + if is_float_dtype(dtype): + return f(htable.Float64HashTable, _ensure_float64) + elif is_integer_dtype(dtype): + return f(htable.Int64HashTable, _ensure_int64) + elif is_datetime64_dtype(dtype): return_dtype = return_dtype or 'M8[ns]' - return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) - elif com.is_timedelta64_dtype(dtype): + return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) + elif is_timedelta64_dtype(dtype): return_dtype = return_dtype or 'm8[ns]' - return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) + return f(htable.Int64HashTable, _ensure_int64).view(return_dtype) else: - return f(htable.PyObjectHashTable, com._ensure_object) + return f(htable.PyObjectHashTable, _ensure_object) _hashtables = { 'float64': (htable.Float64HashTable, htable.Float64Vector), @@ -614,20 +635,20 @@ def _hashtable_algo(f, dtype, return_dtype=None): def _get_data_algo(values, func_map): - if com.is_float_dtype(values): + if is_float_dtype(values): f = func_map['float64'] - values = com._ensure_float64(values) + values = _ensure_float64(values) - elif com.needs_i8_conversion(values): + elif needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') - elif com.is_integer_dtype(values): + elif is_integer_dtype(values): f = func_map['int64'] - values = com._ensure_int64(values) + values = _ensure_int64(values) else: f = func_map['generic'] - values = com._ensure_object(values) + values = _ensure_object(values) return f, values @@ -689,7 +710,7 @@ def _take_nd_generic(arr, indexer, out, axis, fill_value, mask_info): if arr.dtype != out.dtype: arr = arr.astype(out.dtype) if arr.shape[axis] > 0: - arr.take(com._ensure_platform_int(indexer), axis=axis, out=out) + arr.take(_ensure_platform_int(indexer), axis=axis, out=out) if needs_masking: outindexer = [slice(None)] * arr.ndim outindexer[axis] = mask @@ -830,7 +851,7 @@ def _get_take_nd_function(ndim, arr_dtype, out_dtype, axis=0, mask_info=None): return func def func(arr, indexer, out, fill_value=np.nan): - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) _take_nd_generic(arr, indexer, out, axis=axis, fill_value=fill_value, mask_info=mask_info) @@ -854,7 +875,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, out : ndarray or None, default None Optional output array, must be appropriate type to hold input and fill_value together, if indexer has any -1 value entries; call - common._maybe_promote to determine this type for any fill_value + _maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with mask_info : tuple of (ndarray, boolean) @@ -868,24 +889,24 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, """ # dispatch to internal type takes - if com.is_categorical(arr): + if is_categorical(arr): return arr.take_nd(indexer, fill_value=fill_value, allow_fill=allow_fill) - elif com.is_datetimetz(arr): + elif is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) + dtype, fill_value = _maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: @@ -931,7 +952,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info) - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) func(arr, indexer, out, fill_value) if flip_order: @@ -957,11 +978,11 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, if row_idx is None: row_idx = np.arange(arr.shape[0], dtype=np.int64) else: - row_idx = com._ensure_int64(row_idx) + row_idx = _ensure_int64(row_idx) if col_idx is None: col_idx = np.arange(arr.shape[1], dtype=np.int64) else: - col_idx = com._ensure_int64(col_idx) + col_idx = _ensure_int64(col_idx) indexer = row_idx, col_idx if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() @@ -969,7 +990,7 @@ def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, else: # check for promotion based on types only (do this first because # it's faster than computing a mask) - dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) + dtype, fill_value = _maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: @@ -1032,7 +1053,7 @@ def diff(arr, n, axis=0): na = np.nan dtype = arr.dtype is_timedelta = False - if com.needs_i8_conversion(arr): + if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = tslib.iNaT diff --git a/pandas/core/api.py b/pandas/core/api.py index 0a6992bfebd70..579f21eb4ada8 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,7 +5,7 @@ import numpy as np from pandas.core.algorithms import factorize, match, unique, value_counts -from pandas.core.common import isnull, notnull +from pandas.types.missing import isnull, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.formats.format import set_eng_float_format diff --git a/pandas/core/base.py b/pandas/core/base.py index 13a6b4b7b4ce0..a0dfebdfde356 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,6 +4,12 @@ from pandas import compat from pandas.compat import builtins import numpy as np + +from pandas.types.missing import isnull +from pandas.types.generic import ABCDataFrame, ABCSeries, ABCIndex +from pandas.types.common import (_ensure_object, is_object_dtype, + is_list_like, is_scalar) + from pandas.core import common as com import pandas.core.nanops as nanops import pandas.lib as lib @@ -11,7 +17,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError -from pandas.types import api as gt from pandas.formats.printing import pprint_thing _shared_docs = dict() @@ -121,7 +126,7 @@ def __sizeof__(self): """ if hasattr(self, 'memory_usage'): mem = self.memory_usage(deep=True) - if not lib.isscalar(mem): + if not is_scalar(mem): mem = mem.sum() return int(mem) @@ -293,15 +298,15 @@ def name(self): @property def _selection_list(self): - if not isinstance(self._selection, (list, tuple, gt.ABCSeries, - gt.ABCIndex, np.ndarray)): + if not isinstance(self._selection, (list, tuple, ABCSeries, + ABCIndex, np.ndarray)): return [self._selection] return self._selection @cache_readonly def _selected_obj(self): - if self._selection is None or isinstance(self.obj, gt.ABCSeries): + if self._selection is None or isinstance(self.obj, ABCSeries): return self.obj else: return self.obj[self._selection] @@ -313,7 +318,7 @@ def ndim(self): @cache_readonly def _obj_with_exclusions(self): if self._selection is not None and isinstance(self.obj, - gt.ABCDataFrame): + ABCDataFrame): return self.obj.reindex(columns=self._selection_list) if len(self.exclusions) > 0: @@ -325,7 +330,7 @@ def __getitem__(self, key): if self._selection is not None: raise Exception('Column(s) %s already selected' % self._selection) - if isinstance(key, (list, tuple, gt.ABCSeries, gt.ABCIndex, + if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): if len(self.obj.columns.intersection(key)) != len(key): bad_keys = list(set(key).difference(self.obj.columns)) @@ -553,7 +558,7 @@ def _agg(arg, func): if isinstance(result, list): result = concat(result, keys=keys, axis=1) elif isinstance(list(compat.itervalues(result))[0], - gt.ABCDataFrame): + ABCDataFrame): result = concat([result[k] for k in keys], keys=keys, axis=1) else: from pandas import DataFrame @@ -682,7 +687,7 @@ def _gotitem(self, key, ndim, subset=None): **kwargs) self._reset_cache() if subset.ndim == 2: - if lib.isscalar(key) and key in subset or com.is_list_like(key): + if is_scalar(key) and key in subset or is_list_like(key): self._selection = key return self @@ -903,7 +908,7 @@ def argmin(self, axis=None): @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ - return com.isnull(self).any() + return isnull(self).any() def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): @@ -980,7 +985,7 @@ def nunique(self, dropna=True): """ uniqs = self.unique() n = len(uniqs) - if dropna and com.isnull(uniqs).any(): + if dropna and isnull(uniqs).any(): n -= 1 return n @@ -1053,7 +1058,7 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and com.is_object_dtype(self): + if deep and is_object_dtype(self): v += lib.memory_usage_of_objects(self.values) return v @@ -1195,7 +1200,7 @@ def drop_duplicates(self, keep='first', inplace=False): False: 'first'}) @Appender(_shared_docs['duplicated'] % _indexops_doc_kwargs) def duplicated(self, keep='first'): - keys = com._values_from_object(com._ensure_object(self.values)) + keys = com._values_from_object(_ensure_object(self.values)) duplicated = lib.duplicated(keys, keep=keep) try: return self._constructor(duplicated, diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index f4aeaf9184d09..79d8bfbf57f12 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -7,6 +7,22 @@ from pandas import compat, lib from pandas.compat import u +from pandas.types.generic import ABCSeries, ABCIndexClass, ABCCategoricalIndex +from pandas.types.missing import isnull, notnull +from pandas.types.cast import (_possibly_infer_to_datetimelike, + _coerce_indexer_dtype) +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import (_ensure_int64, + _ensure_object, + _ensure_platform_int, + is_dtype_equal, + is_datetimelike, + is_categorical_dtype, + is_integer_dtype, is_bool, + is_list_like, is_sequence, + is_scalar) +from pandas.core.common import is_null_slice + from pandas.core.algorithms import factorize, take_1d from pandas.core.base import (PandasObject, PandasDelegate, NoNewAttributesMixin, _shared_docs) @@ -16,13 +32,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) -from pandas.core.common import ( - ABCSeries, ABCIndexClass, ABCCategoricalIndex, isnull, notnull, - is_dtype_equal, is_categorical_dtype, is_integer_dtype, - _possibly_infer_to_datetimelike, is_list_like, - is_sequence, is_null_slice, is_bool, _ensure_object, _ensure_int64, - _coerce_indexer_dtype) -from pandas.types.api import CategoricalDtype from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option @@ -64,7 +73,7 @@ def f(self, other): # With cat[0], for example, being ``np.int64(1)`` by the time it gets # into this function would become ``np.array(1)``. other = lib.item_from_zerodim(other) - if lib.isscalar(other): + if is_scalar(other): if other in self.categories: i = self.categories.get_loc(other) return getattr(self._codes, op)(i) @@ -968,7 +977,7 @@ def shift(self, periods): if codes.ndim > 1: raise NotImplementedError("Categorical with ndim > 1.") if np.prod(codes.shape) and (periods != 0): - codes = np.roll(codes, com._ensure_platform_int(periods), axis=0) + codes = np.roll(codes, _ensure_platform_int(periods), axis=0) if periods > 0: codes[:periods] = -1 else: @@ -1148,7 +1157,7 @@ def value_counts(self, dropna=True): counts : Series """ from numpy import bincount - from pandas.core.common import isnull + from pandas.types.missing import isnull from pandas.core.series import Series from pandas.core.index import CategoricalIndex @@ -1182,7 +1191,7 @@ def get_values(self): Index if datetime / periods """ # if we are a datetime and period index, return Index to keep metadata - if com.is_datetimelike(self.categories): + if is_datetimelike(self.categories): return self.categories.take(self._codes, fill_value=np.nan) return np.array(self) @@ -1933,7 +1942,7 @@ def _convert_to_list_like(list_like): if (is_sequence(list_like) or isinstance(list_like, tuple) or isinstance(list_like, types.GeneratorType)): return list(list_like) - elif lib.isscalar(list_like): + elif is_scalar(list_like): return [list_like] else: # is this reached? diff --git a/pandas/core/common.py b/pandas/core/common.py index 28bae362a3411..99dd2e9f5b8a9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2,23 +2,66 @@ Misc tools for implementing data structures """ -import re -import collections -import numbers +import sys +import warnings from datetime import datetime, timedelta from functools import partial import numpy as np -import pandas as pd -import pandas.algos as algos import pandas.lib as lib import pandas.tslib as tslib from pandas import compat -from pandas.compat import (long, zip, map, string_types, - iteritems) -from pandas.types import api as gt -from pandas.types.api import * # noqa +from pandas.compat import long, zip, iteritems from pandas.core.config import get_option +from pandas.types.generic import ABCSeries +from pandas.types.common import _NS_DTYPE, is_integer +from pandas.types.inference import _iterable_not_string +from pandas.types.missing import isnull +from pandas.api import types +from pandas.types import common + +# back-compat of public API +# deprecate these functions +m = sys.modules['pandas.core.common'] +for t in [t for t in dir(types) if not t.startswith('_')]: + + def outer(t=t): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.core.common.{t} is deprecated. " + "import from the public API: " + "pandas.api.types.{t} instead".format(t=t), + FutureWarning, stacklevel=2) + return getattr(types, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) + +# back-compat for non-public functions +# deprecate these functions +for t in ['is_datetime_arraylike', + 'is_datetime_or_timedelta_dtype', + 'is_datetimelike', + 'is_datetimelike_v_numeric', + 'is_datetimelike_v_object', + 'is_datetimetz', + 'is_int_or_datetime_dtype', + 'is_period_arraylike', + 'is_string_like', + 'is_string_like_dtype']: + + def outer(t=t): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.core.common.{t} is deprecated. " + "These are not longer public API functions, " + "but can be imported from " + "pandas.types.common.{t} instead".format(t=t), + FutureWarning, stacklevel=2) + return getattr(common, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) class PandasError(Exception): @@ -58,322 +101,6 @@ def __str__(self): self.class_instance.__class__.__name__) -_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name - for t in ['O', 'int8', 'uint8', 'int16', 'uint16', - 'int32', 'uint32', 'int64', 'uint64']]) - -_NS_DTYPE = np.dtype('M8[ns]') -_TD_DTYPE = np.dtype('m8[ns]') -_INT64_DTYPE = np.dtype(np.int64) -_DATELIKE_DTYPES = set([np.dtype(t) - for t in ['M8[ns]', 'M8[ns]', - 'm8[ns]', 'm8[ns]']]) -_int8_max = np.iinfo(np.int8).max -_int16_max = np.iinfo(np.int16).max -_int32_max = np.iinfo(np.int32).max -_int64_max = np.iinfo(np.int64).max - - -def isnull(obj): - """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) - - Parameters - ---------- - arr : ndarray or object value - Object to check for null-ness - - Returns - ------- - isnulled : array-like of bool or bool - Array or bool indicating whether an object is null or if an array is - given which of the element is null. - - See also - -------- - pandas.notnull: boolean inverse of pandas.isnull - """ - return _isnull(obj) - - -def _isnull_new(obj): - if lib.isscalar(obj): - return lib.checknull(obj) - # hack (for now) because MI registers as ndarray - elif isinstance(obj, pd.MultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") - elif isinstance(obj, (gt.ABCSeries, np.ndarray, pd.Index)): - return _isnull_ndarraylike(obj) - elif isinstance(obj, gt.ABCGeneric): - return obj._constructor(obj._data.isnull(func=isnull)) - elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike(np.asarray(obj)) - else: - return obj is None - - -def _isnull_old(obj): - """Detect missing values. Treat None, NaN, INF, -INF as null. - - Parameters - ---------- - arr: ndarray or object value - - Returns - ------- - boolean ndarray or boolean - """ - if lib.isscalar(obj): - return lib.checknull_old(obj) - # hack (for now) because MI registers as ndarray - elif isinstance(obj, pd.MultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") - elif isinstance(obj, (gt.ABCSeries, np.ndarray, pd.Index)): - return _isnull_ndarraylike_old(obj) - elif isinstance(obj, gt.ABCGeneric): - return obj._constructor(obj._data.isnull(func=_isnull_old)) - elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike_old(np.asarray(obj)) - else: - return obj is None - - -_isnull = _isnull_new - - -def _use_inf_as_null(key): - """Option change callback for null/inf behaviour - Choose which replacement for numpy.isnan / ~numpy.isfinite is used. - - Parameters - ---------- - flag: bool - True means treat None, NaN, INF, -INF as null (old way), - False means None and NaN are null, but INF, -INF are not null - (new way). - - Notes - ----- - This approach to setting global module values is discussed and - approved here: - - * http://stackoverflow.com/questions/4859217/ - programmatically-creating-variables-in-python/4859312#4859312 - """ - flag = get_option(key) - if flag: - globals()['_isnull'] = _isnull_old - else: - globals()['_isnull'] = _isnull_new - - -def _isnull_ndarraylike(obj): - - values = getattr(obj, 'values', obj) - dtype = values.dtype - - if is_string_dtype(dtype): - if is_categorical_dtype(values): - from pandas import Categorical - if not isinstance(values, Categorical): - values = values.values - result = values.isnull() - else: - - # Working around NumPy ticket 1542 - shape = values.shape - - if is_string_like_dtype(dtype): - result = np.zeros(values.shape, dtype=bool) - else: - result = np.empty(shape, dtype=bool) - vec = lib.isnullobj(values.ravel()) - result[...] = vec.reshape(shape) - - elif is_datetimelike(obj): - # this is the NaT pattern - result = values.view('i8') == tslib.iNaT - else: - result = np.isnan(values) - - # box - if isinstance(obj, gt.ABCSeries): - from pandas import Series - result = Series(result, index=obj.index, name=obj.name, copy=False) - - return result - - -def _isnull_ndarraylike_old(obj): - values = getattr(obj, 'values', obj) - dtype = values.dtype - - if is_string_dtype(dtype): - # Working around NumPy ticket 1542 - shape = values.shape - - if is_string_like_dtype(dtype): - result = np.zeros(values.shape, dtype=bool) - else: - result = np.empty(shape, dtype=bool) - vec = lib.isnullobj_old(values.ravel()) - result[:] = vec.reshape(shape) - - elif dtype in _DATELIKE_DTYPES: - # this is the NaT pattern - result = values.view('i8') == tslib.iNaT - else: - result = ~np.isfinite(values) - - # box - if isinstance(obj, gt.ABCSeries): - from pandas import Series - result = Series(result, index=obj.index, name=obj.name, copy=False) - - return result - - -def notnull(obj): - """Replacement for numpy.isfinite / ~numpy.isnan which is suitable for use - on object arrays. - - Parameters - ---------- - arr : ndarray or object value - Object to check for *not*-null-ness - - Returns - ------- - isnulled : array-like of bool or bool - Array or bool indicating whether an object is *not* null or if an array - is given which of the element is *not* null. - - See also - -------- - pandas.isnull : boolean inverse of pandas.notnull - """ - res = isnull(obj) - if lib.isscalar(res): - return not res - return ~res - - -def is_null_datelike_scalar(other): - """ test whether the object is a null datelike, e.g. Nat - but guard against passing a non-scalar """ - if other is pd.NaT or other is None: - return True - elif lib.isscalar(other): - - # a timedelta - if hasattr(other, 'dtype'): - return other.view('i8') == tslib.iNaT - elif is_integer(other) and other == tslib.iNaT: - return True - return isnull(other) - return False - - -def array_equivalent(left, right, strict_nan=False): - """ - True if two arrays, left and right, have equal non-NaN elements, and NaNs - in corresponding locations. False otherwise. It is assumed that left and - right are NumPy arrays of the same dtype. The behavior of this function - (particularly with respect to NaNs) is not defined if the dtypes are - different. - - Parameters - ---------- - left, right : ndarrays - strict_nan : bool, default False - If True, consider NaN and None to be different. - - Returns - ------- - b : bool - Returns True if the arrays are equivalent. - - Examples - -------- - >>> array_equivalent( - ... np.array([1, 2, np.nan]), - ... np.array([1, 2, np.nan])) - True - >>> array_equivalent( - ... np.array([1, np.nan, 2]), - ... np.array([1, 2, np.nan])) - False - """ - - left, right = np.asarray(left), np.asarray(right) - - # shape compat - if left.shape != right.shape: - return False - - # Object arrays can contain None, NaN and NaT. - # string dtypes must be come to this path for NumPy 1.7.1 compat - if is_string_dtype(left) or is_string_dtype(right): - - if not strict_nan: - # pd.isnull considers NaN and None to be equivalent. - return lib.array_equivalent_object(_ensure_object(left.ravel()), - _ensure_object(right.ravel())) - - for left_value, right_value in zip(left, right): - if left_value is tslib.NaT and right_value is not tslib.NaT: - return False - - elif isinstance(left_value, float) and np.isnan(left_value): - if (not isinstance(right_value, float) or - not np.isnan(right_value)): - return False - else: - if left_value != right_value: - return False - return True - - # NaNs can occur in float and complex arrays. - if is_float_dtype(left) or is_complex_dtype(left): - return ((left == right) | (np.isnan(left) & np.isnan(right))).all() - - # numpy will will not allow this type of datetimelike vs integer comparison - elif is_datetimelike_v_numeric(left, right): - return False - - # M8/m8 - elif needs_i8_conversion(left) and needs_i8_conversion(right): - if not is_dtype_equal(left.dtype, right.dtype): - return False - - left = left.view('i8') - right = right.view('i8') - - # NaNs cannot occur otherwise. - try: - return np.array_equal(left, right) - except AttributeError: - # see gh-13388 - # - # NumPy v1.7.1 has a bug in its array_equal - # function that prevents it from correctly - # comparing two arrays with complex dtypes. - # This bug is corrected in v1.8.0, so remove - # this try-except block as soon as we stop - # supporting NumPy versions < 1.8.0 - if not is_dtype_equal(left.dtype, right.dtype): - return False - - left = left.tolist() - right = right.tolist() - - return left == right - - -def _iterable_not_string(x): - return (isinstance(x, collections.Iterable) and - not isinstance(x, compat.string_types)) - - def flatten(l): """Flatten an arbitrarily nested sequence. @@ -398,510 +125,6 @@ def flatten(l): yield el -def _coerce_indexer_dtype(indexer, categories): - """ coerce the indexer input array to the smallest dtype possible """ - l = len(categories) - if l < _int8_max: - return _ensure_int8(indexer) - elif l < _int16_max: - return _ensure_int16(indexer) - elif l < _int32_max: - return _ensure_int32(indexer) - return _ensure_int64(indexer) - - -def _coerce_to_dtypes(result, dtypes): - """ given a dtypes and a result set, coerce the result elements to the - dtypes - """ - if len(result) != len(dtypes): - raise AssertionError("_coerce_to_dtypes requires equal len arrays") - - from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type - - def conv(r, dtype): - try: - if isnull(r): - pass - elif dtype == _NS_DTYPE: - r = lib.Timestamp(r) - elif dtype == _TD_DTYPE: - r = _coerce_scalar_to_timedelta_type(r) - elif dtype == np.bool_: - # messy. non 0/1 integers do not get converted. - if is_integer(r) and r not in [0, 1]: - return int(r) - r = bool(r) - elif dtype.kind == 'f': - r = float(r) - elif dtype.kind == 'i': - r = int(r) - except: - pass - - return r - - return [conv(r, dtype) for r, dtype in zip(result, dtypes)] - - -def _infer_fill_value(val): - """ - infer the fill value for the nan/NaT from the provided - scalar/ndarray/list-like if we are a NaT, return the correct dtyped - element to provide proper block construction - """ - - if not is_list_like(val): - val = [val] - val = np.array(val, copy=False) - if is_datetimelike(val): - return np.array('NaT', dtype=val.dtype) - elif is_object_dtype(val.dtype): - dtype = lib.infer_dtype(_ensure_object(val)) - if dtype in ['datetime', 'datetime64']: - return np.array('NaT', dtype=_NS_DTYPE) - elif dtype in ['timedelta', 'timedelta64']: - return np.array('NaT', dtype=_TD_DTYPE) - return np.nan - - -def _infer_dtype_from_scalar(val): - """ interpret the dtype from a scalar """ - - dtype = np.object_ - - # a 1-element ndarray - if isinstance(val, np.ndarray): - if val.ndim != 0: - raise ValueError( - "invalid ndarray passed to _infer_dtype_from_scalar") - - dtype = val.dtype - val = val.item() - - elif isinstance(val, compat.string_types): - - # If we create an empty array using a string to infer - # the dtype, NumPy will only allocate one character per entry - # so this is kind of bad. Alternately we could use np.repeat - # instead of np.empty (but then you still don't want things - # coming out as np.str_! - - dtype = np.object_ - - elif isinstance(val, (np.datetime64, - datetime)) and getattr(val, 'tzinfo', None) is None: - val = lib.Timestamp(val).value - dtype = np.dtype('M8[ns]') - - elif isinstance(val, (np.timedelta64, timedelta)): - val = lib.Timedelta(val).value - dtype = np.dtype('m8[ns]') - - elif is_bool(val): - dtype = np.bool_ - - elif is_integer(val): - if isinstance(val, np.integer): - dtype = type(val) - else: - dtype = np.int64 - - elif is_float(val): - if isinstance(val, np.floating): - dtype = type(val) - else: - dtype = np.float64 - - elif is_complex(val): - dtype = np.complex_ - - return dtype, val - - -def _is_na_compat(arr, fill_value=np.nan): - """ - Parameters - ---------- - arr: a numpy array - fill_value: fill value, default to np.nan - - Returns - ------- - True if we can fill using this fill_value - """ - dtype = arr.dtype - if isnull(fill_value): - return not (is_bool_dtype(dtype) or - is_integer_dtype(dtype)) - return True - - -def _maybe_fill(arr, fill_value=np.nan): - """ - if we have a compatiable fill_value and arr dtype, then fill - """ - if _is_na_compat(arr, fill_value): - arr.fill(fill_value) - return arr - - -def _maybe_promote(dtype, fill_value=np.nan): - - # if we passed an array here, determine the fill value by dtype - if isinstance(fill_value, np.ndarray): - if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): - fill_value = tslib.iNaT - else: - - # we need to change to object type as our - # fill_value is of object type - if fill_value.dtype == np.object_: - dtype = np.dtype(np.object_) - fill_value = np.nan - - # returns tuple of (dtype, fill_value) - if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - # for now: refuse to upcast datetime64 - # (this is because datetime64 will not implicitly upconvert - # to object correctly as of numpy 1.6.1) - if isnull(fill_value): - fill_value = tslib.iNaT - else: - if issubclass(dtype.type, np.datetime64): - try: - fill_value = lib.Timestamp(fill_value).value - except: - # the proper thing to do here would probably be to upcast - # to object (but numpy 1.6.1 doesn't do this properly) - fill_value = tslib.iNaT - elif issubclass(dtype.type, np.timedelta64): - try: - fill_value = lib.Timedelta(fill_value).value - except: - # as for datetimes, cannot upcast to object - fill_value = tslib.iNaT - else: - fill_value = tslib.iNaT - elif is_datetimetz(dtype): - if isnull(fill_value): - fill_value = tslib.iNaT - elif is_float(fill_value): - if issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif issubclass(dtype.type, np.integer): - dtype = np.float64 - elif is_bool(fill_value): - if not issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif is_integer(fill_value): - if issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif issubclass(dtype.type, np.integer): - # upcast to prevent overflow - arr = np.asarray(fill_value) - if arr != arr.astype(dtype): - dtype = arr.dtype - elif is_complex(fill_value): - if issubclass(dtype.type, np.bool_): - dtype = np.object_ - elif issubclass(dtype.type, (np.integer, np.floating)): - dtype = np.complex128 - elif fill_value is None: - if is_float_dtype(dtype) or is_complex_dtype(dtype): - fill_value = np.nan - elif is_integer_dtype(dtype): - dtype = np.float64 - fill_value = np.nan - elif is_datetime_or_timedelta_dtype(dtype): - fill_value = tslib.iNaT - else: - dtype = np.object_ - else: - dtype = np.object_ - - # in case we have a string that looked like a number - if is_categorical_dtype(dtype): - pass - elif is_datetimetz(dtype): - pass - elif issubclass(np.dtype(dtype).type, compat.string_types): - dtype = np.object_ - - return dtype, fill_value - - -def _maybe_upcast_putmask(result, mask, other): - """ - A safe version of putmask that potentially upcasts the result - - Parameters - ---------- - result : ndarray - The destination array. This will be mutated in-place if no upcasting is - necessary. - mask : boolean ndarray - other : ndarray or scalar - The source array or value - - Returns - ------- - result : ndarray - changed : boolean - Set to true if the result array was upcasted - """ - - if mask.any(): - # Two conversions for date-like dtypes that can't be done automatically - # in np.place: - # NaN -> NaT - # integer or integer array -> date-like array - if result.dtype in _DATELIKE_DTYPES: - if lib.isscalar(other): - if isnull(other): - other = result.dtype.type('nat') - elif is_integer(other): - other = np.array(other, dtype=result.dtype) - elif is_integer_dtype(other): - other = np.array(other, dtype=result.dtype) - - def changeit(): - - # try to directly set by expanding our array to full - # length of the boolean - try: - om = other[mask] - om_at = om.astype(result.dtype) - if (om == om_at).all(): - new_result = result.values.copy() - new_result[mask] = om_at - result[:] = new_result - return result, False - except: - pass - - # we are forced to change the dtype of the result as the input - # isn't compatible - r, _ = _maybe_upcast(result, fill_value=other, copy=True) - np.place(r, mask, other) - - return r, True - - # we want to decide whether place will work - # if we have nans in the False portion of our mask then we need to - # upcast (possibly), otherwise we DON't want to upcast (e.g. if we - # have values, say integers, in the success portion then it's ok to not - # upcast) - new_dtype, _ = _maybe_promote(result.dtype, other) - if new_dtype != result.dtype: - - # we have a scalar or len 0 ndarray - # and its nan and we are changing some values - if (lib.isscalar(other) or - (isinstance(other, np.ndarray) and other.ndim < 1)): - if isnull(other): - return changeit() - - # we have an ndarray and the masking has nans in it - else: - - if isnull(other[mask]).any(): - return changeit() - - try: - np.place(result, mask, other) - except: - return changeit() - - return result, False - - -def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): - """ provide explict type promotion and coercion - - Parameters - ---------- - values : the ndarray that we want to maybe upcast - fill_value : what we want to fill with - dtype : if None, then use the dtype of the values, else coerce to this type - copy : if True always make a copy even if no upcast is required - """ - - if is_extension_type(values): - if copy: - values = values.copy() - else: - if dtype is None: - dtype = values.dtype - new_dtype, fill_value = _maybe_promote(dtype, fill_value) - if new_dtype != values.dtype: - values = values.astype(new_dtype) - elif copy: - values = values.copy() - - return values, fill_value - - -def _possibly_cast_item(obj, item, dtype): - chunk = obj[item] - - if chunk.values.dtype != dtype: - if dtype in (np.object_, np.bool_): - obj[item] = chunk.astype(np.object_) - elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover - raise ValueError("Unexpected dtype encountered: %s" % dtype) - - -def _possibly_downcast_to_dtype(result, dtype): - """ try to cast to the specified dtype (e.g. convert back to bool/int - or could be an astype of float64->float32 - """ - - if lib.isscalar(result): - return result - - def trans(x): - return x - - if isinstance(dtype, compat.string_types): - if dtype == 'infer': - inferred_type = lib.infer_dtype(_ensure_object(result.ravel())) - if inferred_type == 'boolean': - dtype = 'bool' - elif inferred_type == 'integer': - dtype = 'int64' - elif inferred_type == 'datetime64': - dtype = 'datetime64[ns]' - elif inferred_type == 'timedelta64': - dtype = 'timedelta64[ns]' - - # try to upcast here - elif inferred_type == 'floating': - dtype = 'int64' - if issubclass(result.dtype.type, np.number): - - def trans(x): # noqa - return x.round() - else: - dtype = 'object' - - if isinstance(dtype, compat.string_types): - dtype = np.dtype(dtype) - - try: - - # don't allow upcasts here (except if empty) - if dtype.kind == result.dtype.kind: - if (result.dtype.itemsize <= dtype.itemsize and - np.prod(result.shape)): - return result - - if issubclass(dtype.type, np.floating): - return result.astype(dtype) - elif dtype == np.bool_ or issubclass(dtype.type, np.integer): - - # if we don't have any elements, just astype it - if not np.prod(result.shape): - return trans(result).astype(dtype) - - # do a test on the first element, if it fails then we are done - r = result.ravel() - arr = np.array([r[0]]) - - # if we have any nulls, then we are done - if isnull(arr).any() or not np.allclose(arr, - trans(arr).astype(dtype)): - return result - - # a comparable, e.g. a Decimal may slip in here - elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, - float, bool)): - return result - - if (issubclass(result.dtype.type, (np.object_, np.number)) and - notnull(result).all()): - new_result = trans(result).astype(dtype) - try: - if np.allclose(new_result, result): - return new_result - except: - - # comparison of an object dtype with a number type could - # hit here - if (new_result == result).all(): - return new_result - - # a datetimelike - elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i']: - try: - result = result.astype(dtype) - except: - if dtype.tz: - # convert to datetime and change timezone - result = pd.to_datetime(result).tz_localize(dtype.tz) - - except: - pass - - return result - - -def _maybe_convert_string_to_object(values): - """ - - Convert string-like and string-like array to convert object dtype. - This is to avoid numpy to handle the array as str dtype. - """ - if isinstance(values, string_types): - values = np.array([values], dtype=object) - elif (isinstance(values, np.ndarray) and - issubclass(values.dtype.type, (np.string_, np.unicode_))): - values = values.astype(object) - return values - - -def _maybe_convert_scalar(values): - """ - Convert a python scalar to the appropriate numpy dtype if possible - This avoids numpy directly converting according to platform preferences - """ - if lib.isscalar(values): - dtype, values = _infer_dtype_from_scalar(values) - try: - values = dtype(values) - except TypeError: - pass - return values - - -def _lcd_dtypes(a_dtype, b_dtype): - """ return the lcd dtype to hold these types """ - - if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype): - return _NS_DTYPE - elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype): - return _TD_DTYPE - elif is_complex_dtype(a_dtype): - if is_complex_dtype(b_dtype): - return a_dtype - return np.float64 - elif is_integer_dtype(a_dtype): - if is_integer_dtype(b_dtype): - if a_dtype.itemsize == b_dtype.itemsize: - return a_dtype - return np.int64 - return np.float64 - elif is_float_dtype(a_dtype): - if is_float_dtype(b_dtype): - if a_dtype.itemsize == b_dtype.itemsize: - return a_dtype - else: - return np.float64 - elif is_integer(b_dtype): - return np.float64 - return np.object - - def _consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: @@ -909,66 +132,20 @@ def _consensus_name_attr(objs): return None return name -# ---------------------------------------------------------------------- -# Lots of little utilities - - -def _validate_date_like_dtype(dtype): - try: - typ = np.datetime_data(dtype)[0] - except ValueError as e: - raise TypeError('%s' % e) - if typ != 'generic' and typ != 'ns': - raise ValueError('%r is too specific of a frequency, try passing %r' % - (dtype.name, dtype.type.__name__)) - - -def _invalidate_string_dtypes(dtype_set): - """Change string like dtypes to object for - ``DataFrame.select_dtypes()``. - """ - non_string_dtypes = dtype_set - _string_dtypes - if non_string_dtypes != dtype_set: - raise TypeError("string dtypes are not allowed, use 'object' instead") - - -def _get_dtype_from_object(dtype): - """Get a numpy dtype.type-style object. This handles the datetime64[ns] - and datetime64[ns, TZ] compat - - Notes - ----- - If nothing can be found, returns ``object``. - """ - # type object from a dtype - if isinstance(dtype, type) and issubclass(dtype, np.generic): - return dtype - elif is_categorical(dtype): - return gt.CategoricalDtype().type - elif is_datetimetz(dtype): - return gt.DatetimeTZDtype(dtype).type - elif isinstance(dtype, np.dtype): # dtype object - try: - _validate_date_like_dtype(dtype) - except TypeError: - # should still pass if we don't have a datelike - pass - return dtype.type - elif isinstance(dtype, compat.string_types): - if dtype == 'datetime' or dtype == 'timedelta': - dtype += '64' - - try: - return _get_dtype_from_object(getattr(np, dtype)) - except (AttributeError, TypeError): - # handles cases like _get_dtype(int) - # i.e., python objects that are valid dtypes (unlike user-defined - # types, in general) - # TypeError handles the float16 typecode of 'e' - # further handle internal types - pass - return _get_dtype_from_object(np.dtype(dtype)) +def _maybe_match_name(a, b): + a_has = hasattr(a, 'name') + b_has = hasattr(b, 'name') + if a_has and b_has: + if a.name == b.name: + return a.name + else: + return None + elif a_has: + return a.name + elif b_has: + return b.name + return None def _get_info_slice(obj, indexer): @@ -1005,225 +182,8 @@ def _maybe_box_datetimelike(value): _values_from_object = lib.values_from_object -def _possibly_castable(arr): - # return False to force a non-fastpath - - # check datetime64[ns]/timedelta64[ns] are valid - # otherwise try to coerce - kind = arr.dtype.kind - if kind == 'M' or kind == 'm': - return arr.dtype in _DATELIKE_DTYPES - - return arr.dtype.name not in _POSSIBLY_CAST_DTYPES - - -def _possibly_convert_platform(values): - """ try to do platform conversion, allow ndarray or list here """ - - if isinstance(values, (list, tuple)): - values = lib.list_to_object_array(values) - if getattr(values, 'dtype', None) == np.object_: - if hasattr(values, '_values'): - values = values._values - values = lib.maybe_convert_objects(values) - - return values - - -def _possibly_cast_to_datetime(value, dtype, errors='raise'): - """ try to cast the array/value to a datetimelike dtype, converting float - nan to iNaT - """ - from pandas.tseries.timedeltas import to_timedelta - from pandas.tseries.tools import to_datetime - - if dtype is not None: - if isinstance(dtype, compat.string_types): - dtype = np.dtype(dtype) - - is_datetime64 = is_datetime64_dtype(dtype) - is_datetime64tz = is_datetime64tz_dtype(dtype) - is_timedelta64 = is_timedelta64_dtype(dtype) - - if is_datetime64 or is_datetime64tz or is_timedelta64: - - # force the dtype if needed - if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name == 'datetime64[ns]': - dtype = _NS_DTYPE - else: - raise TypeError("cannot convert datetimelike to " - "dtype [%s]" % dtype) - elif is_datetime64tz: - - # our NaT doesn't support tz's - # this will coerce to DatetimeIndex with - # a matching dtype below - if lib.isscalar(value) and isnull(value): - value = [value] - - elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name == 'timedelta64[ns]': - dtype = _TD_DTYPE - else: - raise TypeError("cannot convert timedeltalike to " - "dtype [%s]" % dtype) - - if lib.isscalar(value): - if value == tslib.iNaT or isnull(value): - value = tslib.iNaT - else: - value = np.array(value, copy=False) - - # have a scalar array-like (e.g. NaT) - if value.ndim == 0: - value = tslib.iNaT - - # we have an array of datetime or timedeltas & nulls - elif np.prod(value.shape) or not is_dtype_equal(value.dtype, - dtype): - try: - if is_datetime64: - value = to_datetime(value, errors=errors)._values - elif is_datetime64tz: - # input has to be UTC at this point, so just - # localize - value = to_datetime( - value, - errors=errors).tz_localize(dtype.tz) - elif is_timedelta64: - value = to_timedelta(value, errors=errors)._values - except (AttributeError, ValueError, TypeError): - pass - - # coerce datetimelike to object - elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): - if is_object_dtype(dtype): - ints = np.asarray(value).view('i8') - return tslib.ints_to_pydatetime(ints) - - # we have a non-castable dtype that was passed - raise TypeError('Cannot cast datetime64 to %s' % dtype) - - else: - - is_array = isinstance(value, np.ndarray) - - # catch a datetime/timedelta that is not of ns variety - # and no coercion specified - if is_array and value.dtype.kind in ['M', 'm']: - dtype = value.dtype - - if dtype.kind == 'M' and dtype != _NS_DTYPE: - value = value.astype(_NS_DTYPE) - - elif dtype.kind == 'm' and dtype != _TD_DTYPE: - value = to_timedelta(value) - - # only do this if we have an array and the dtype of the array is not - # setup already we are not an integer/object, so don't bother with this - # conversion - elif not (is_array and not (issubclass(value.dtype.type, np.integer) or - value.dtype == np.object_)): - value = _possibly_infer_to_datetimelike(value) - - return value - - -def _possibly_infer_to_datetimelike(value, convert_dates=False): - """ - we might have an array (or single object) that is datetime like, - and no dtype is passed don't change the value unless we find a - datetime/timedelta set - - this is pretty strict in that a datetime/timedelta is REQUIRED - in addition to possible nulls/string likes - - ONLY strings are NOT datetimelike - - Parameters - ---------- - value : np.array / Series / Index / list-like - convert_dates : boolean, default False - if True try really hard to convert dates (such as datetime.date), other - leave inferred dtype 'date' alone - - """ - - if isinstance(value, (gt.ABCDatetimeIndex, gt.ABCPeriodIndex)): - return value - elif isinstance(value, gt.ABCSeries): - if isinstance(value._values, gt.ABCDatetimeIndex): - return value._values - - v = value - if not is_list_like(v): - v = [v] - v = np.array(v, copy=False) - shape = v.shape - if not v.ndim == 1: - v = v.ravel() - - if len(v): - - def _try_datetime(v): - # safe coerce to datetime64 - try: - v = tslib.array_to_datetime(v, errors='raise') - except ValueError: - - # we might have a sequence of the same-datetimes with tz's - # if so coerce to a DatetimeIndex; if they are not the same, - # then these stay as object dtype - try: - from pandas import to_datetime - return to_datetime(v) - except: - pass - - except: - pass - - return v.reshape(shape) - - def _try_timedelta(v): - # safe coerce to timedelta64 - - # will try first with a string & object conversion - from pandas.tseries.timedeltas import to_timedelta - try: - return to_timedelta(v)._values.reshape(shape) - except: - return v - - # do a quick inference for perf - sample = v[:min(3, len(v))] - inferred_type = lib.infer_dtype(sample) - - if (inferred_type in ['datetime', 'datetime64'] or - (convert_dates and inferred_type in ['date'])): - value = _try_datetime(v) - elif inferred_type in ['timedelta', 'timedelta64']: - value = _try_timedelta(v) - - # It's possible to have nulls intermixed within the datetime or - # timedelta. These will in general have an inferred_type of 'mixed', - # so have to try both datetime and timedelta. - - # try timedelta first to avoid spurious datetime conversions - # e.g. '00:00:01' is a timedelta but technically is also a datetime - elif inferred_type in ['mixed']: - - if lib.is_possible_datetimelike_array(_ensure_object(v)): - value = _try_timedelta(v) - if lib.infer_dtype(value) in ['mixed']: - value = _try_datetime(v) - - return value - - def is_bool_indexer(key): - if isinstance(key, (gt.ABCSeries, np.ndarray)): + if isinstance(key, (ABCSeries, np.ndarray)): if key.dtype == np.object_: key = np.asarray(_values_from_object(key)) @@ -1250,12 +210,6 @@ def _default_index(n): return RangeIndex(0, n, name=None) -def ensure_float(arr): - if issubclass(arr.dtype.type, (np.integer, np.bool_)): - arr = arr.astype(float) - return arr - - def _mut_exclusive(**kwargs): item1, item2 = kwargs.items() label1, val1 = item1 @@ -1287,6 +241,10 @@ def _all_not_none(*args): return True +def _count_not_none(*args): + return sum(x is not None for x in args) + + def _try_sort(iterable): listed = list(iterable) try: @@ -1295,10 +253,6 @@ def _try_sort(iterable): return listed -def _count_not_none(*args): - return sum(x is not None for x in args) - - def iterpairs(seq): """ Parameters @@ -1451,349 +405,6 @@ def _maybe_make_list(obj): return [obj] return obj -# TYPE TESTING - -is_bool = lib.is_bool - -is_integer = lib.is_integer - -is_float = lib.is_float - -is_complex = lib.is_complex - - -def is_string_like(obj): - return isinstance(obj, (compat.text_type, compat.string_types)) - - -def is_iterator(obj): - # python 3 generators have __next__ instead of next - return hasattr(obj, 'next') or hasattr(obj, '__next__') - - -def is_number(obj): - return isinstance(obj, (numbers.Number, np.number)) - - -def is_period_arraylike(arr): - """ return if we are period arraylike / PeriodIndex """ - if isinstance(arr, pd.PeriodIndex): - return True - elif isinstance(arr, (np.ndarray, gt.ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'period' - return getattr(arr, 'inferred_type', None) == 'period' - - -def is_datetime_arraylike(arr): - """ return if we are datetime arraylike / DatetimeIndex """ - if isinstance(arr, gt.ABCDatetimeIndex): - return True - elif isinstance(arr, (np.ndarray, gt.ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' - return getattr(arr, 'inferred_type', None) == 'datetime' - - -def is_datetimelike(arr): - return (arr.dtype in _DATELIKE_DTYPES or - isinstance(arr, gt.ABCPeriodIndex) or - is_datetimetz(arr)) - - -def _coerce_to_dtype(dtype): - """ coerce a string / np.dtype to a dtype """ - if is_categorical_dtype(dtype): - dtype = gt.CategoricalDtype() - elif is_datetime64tz_dtype(dtype): - dtype = gt.DatetimeTZDtype(dtype) - else: - dtype = np.dtype(dtype) - return dtype - - -def _get_dtype(arr_or_dtype): - if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype) - elif isinstance(arr_or_dtype, gt.CategoricalDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, gt.DatetimeTZDtype): - return arr_or_dtype - elif isinstance(arr_or_dtype, compat.string_types): - if is_categorical_dtype(arr_or_dtype): - return gt.CategoricalDtype.construct_from_string(arr_or_dtype) - elif is_datetime64tz_dtype(arr_or_dtype): - return gt.DatetimeTZDtype.construct_from_string(arr_or_dtype) - - if hasattr(arr_or_dtype, 'dtype'): - arr_or_dtype = arr_or_dtype.dtype - return np.dtype(arr_or_dtype) - - -def _get_dtype_type(arr_or_dtype): - if isinstance(arr_or_dtype, np.dtype): - return arr_or_dtype.type - elif isinstance(arr_or_dtype, type): - return np.dtype(arr_or_dtype).type - elif isinstance(arr_or_dtype, gt.CategoricalDtype): - return gt.CategoricalDtypeType - elif isinstance(arr_or_dtype, gt.DatetimeTZDtype): - return gt.DatetimeTZDtypeType - elif isinstance(arr_or_dtype, compat.string_types): - if is_categorical_dtype(arr_or_dtype): - return gt.CategoricalDtypeType - elif is_datetime64tz_dtype(arr_or_dtype): - return gt.DatetimeTZDtypeType - return _get_dtype_type(np.dtype(arr_or_dtype)) - try: - return arr_or_dtype.dtype.type - except AttributeError: - return type(None) - - -def is_dtype_equal(source, target): - """ return a boolean if the dtypes are equal """ - try: - source = _get_dtype(source) - target = _get_dtype(target) - return source == target - except (TypeError, AttributeError): - - # invalid comparison - # object == category will hit this - return False - - -def is_any_int_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.integer) - - -def is_integer_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_int64_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.int64) - - -def is_int_or_datetime_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, np.integer) or - issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_datetime64_dtype(arr_or_dtype): - try: - tipo = _get_dtype_type(arr_or_dtype) - except TypeError: - return False - return issubclass(tipo, np.datetime64) - - -def is_datetime64tz_dtype(arr_or_dtype): - return gt.DatetimeTZDtype.is_dtype(arr_or_dtype) - - -def is_datetime64_any_dtype(arr_or_dtype): - return (is_datetime64_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype)) - - -def is_datetime64_ns_dtype(arr_or_dtype): - try: - tipo = _get_dtype(arr_or_dtype) - except TypeError: - return False - return tipo == _NS_DTYPE - - -def is_timedelta64_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.timedelta64) - - -def is_timedelta64_ns_dtype(arr_or_dtype): - tipo = _get_dtype(arr_or_dtype) - return tipo == _TD_DTYPE - - -def is_datetime_or_timedelta_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, (np.datetime64, np.timedelta64)) - - -def is_numeric_v_string_like(a, b): - """ - numpy doesn't like to compare numeric arrays vs scalar string-likes - - return a boolean result if this is the case for a,b or b,a - - """ - is_a_array = isinstance(a, np.ndarray) - is_b_array = isinstance(b, np.ndarray) - - is_a_numeric_array = is_a_array and is_numeric_dtype(a) - is_b_numeric_array = is_b_array and is_numeric_dtype(b) - is_a_string_array = is_a_array and is_string_like_dtype(a) - is_b_string_array = is_b_array and is_string_like_dtype(b) - - is_a_scalar_string_like = not is_a_array and is_string_like(a) - is_b_scalar_string_like = not is_b_array and is_string_like(b) - - return ((is_a_numeric_array and is_b_scalar_string_like) or - (is_b_numeric_array and is_a_scalar_string_like) or - (is_a_numeric_array and is_b_string_array) or - (is_b_numeric_array and is_a_string_array)) - - -def is_datetimelike_v_numeric(a, b): - # return if we have an i8 convertible and numeric comparison - if not hasattr(a, 'dtype'): - a = np.asarray(a) - if not hasattr(b, 'dtype'): - b = np.asarray(b) - - def is_numeric(x): - return is_integer_dtype(x) or is_float_dtype(x) - - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_numeric(b)) or - (is_datetimelike(b) and is_numeric(a))) - - -def is_datetimelike_v_object(a, b): - # return if we have an i8 convertible and object comparsion - if not hasattr(a, 'dtype'): - a = np.asarray(a) - if not hasattr(b, 'dtype'): - b = np.asarray(b) - - def f(x): - return is_object_dtype(x) - - def is_object(x): - return is_integer_dtype(x) or is_float_dtype(x) - - is_datetimelike = needs_i8_conversion - return ((is_datetimelike(a) and is_object(b)) or - (is_datetimelike(b) and is_object(a))) - - -def needs_i8_conversion(arr_or_dtype): - return (is_datetime_or_timedelta_dtype(arr_or_dtype) or - is_datetime64tz_dtype(arr_or_dtype)) - - -def is_numeric_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return (issubclass(tipo, (np.number, np.bool_)) and - not issubclass(tipo, (np.datetime64, np.timedelta64))) - - -def is_string_dtype(arr_or_dtype): - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('O', 'S', 'U') - - -def is_string_like_dtype(arr_or_dtype): - # exclude object as its a mixed dtype - dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('S', 'U') - - -def is_float_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.floating) - - -def is_floating_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return isinstance(tipo, np.floating) - - -def is_bool_dtype(arr_or_dtype): - try: - tipo = _get_dtype_type(arr_or_dtype) - except ValueError: - # this isn't even a dtype - return False - return issubclass(tipo, np.bool_) - - -def is_sparse(array): - """ return if we are a sparse array """ - return isinstance(array, (gt.ABCSparseArray, gt.ABCSparseSeries)) - - -def is_datetimetz(array): - """ return if we are a datetime with tz array """ - return ((isinstance(array, gt.ABCDatetimeIndex) and - getattr(array, 'tz', None) is not None) or - is_datetime64tz_dtype(array)) - - -def is_extension_type(value): - """ - if we are a klass that is preserved by the internals - these are internal klasses that we represent (and don't use a np.array) - """ - if is_categorical(value): - return True - elif is_sparse(value): - return True - elif is_datetimetz(value): - return True - return False - - -def is_categorical(array): - """ return if we are a categorical possibility """ - return isinstance(array, gt.ABCCategorical) or is_categorical_dtype(array) - - -def is_categorical_dtype(arr_or_dtype): - return gt.CategoricalDtype.is_dtype(arr_or_dtype) - - -def is_complex_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.complexfloating) - - -def is_object_dtype(arr_or_dtype): - tipo = _get_dtype_type(arr_or_dtype) - return issubclass(tipo, np.object_) - - -def is_re(obj): - return isinstance(obj, re._pattern_type) - - -def is_re_compilable(obj): - try: - re.compile(obj) - except TypeError: - return False - else: - return True - - -def is_list_like(arg): - return (hasattr(arg, '__iter__') and - not isinstance(arg, compat.string_and_binary_types)) - - -def is_dict_like(arg): - return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') - - -def is_named_tuple(arg): - return isinstance(arg, tuple) and hasattr(arg, '_fields') - def is_null_slice(obj): """ we have a null slice """ @@ -1807,47 +418,6 @@ def is_full_slice(obj, l): obj.step is None) -def is_hashable(arg): - """Return True if hash(arg) will succeed, False otherwise. - - Some types will pass a test against collections.Hashable but fail when they - are actually hashed with hash(). - - Distinguish between these and other types by trying the call to hash() and - seeing if they raise TypeError. - - Examples - -------- - >>> a = ([],) - >>> isinstance(a, collections.Hashable) - True - >>> is_hashable(a) - False - """ - # unfortunately, we can't use isinstance(arg, collections.Hashable), which - # can be faster than calling hash, because numpy scalars on Python 3 fail - # this test - - # reconsider this decision once this numpy bug is fixed: - # https://github.com/numpy/numpy/issues/5562 - - try: - hash(arg) - except TypeError: - return False - else: - return True - - -def is_sequence(x): - try: - iter(x) - len(x) # it has a length - return not isinstance(x, compat.string_and_binary_types) - except (TypeError, AttributeError): - return False - - def _get_callable_name(obj): # typical case has name if hasattr(obj, '__name__'): @@ -1875,74 +445,6 @@ def _apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -_string_dtypes = frozenset(map(_get_dtype_from_object, (compat.binary_type, - compat.text_type))) - -_ensure_float64 = algos.ensure_float64 -_ensure_float32 = algos.ensure_float32 -_ensure_int64 = algos.ensure_int64 -_ensure_int32 = algos.ensure_int32 -_ensure_int16 = algos.ensure_int16 -_ensure_int8 = algos.ensure_int8 -_ensure_platform_int = algos.ensure_platform_int -_ensure_object = algos.ensure_object - - -def _astype_nansafe(arr, dtype, copy=True): - """ return a view if copy is False, but - need to be very careful as the result shape could change! """ - if not isinstance(dtype, np.dtype): - dtype = _coerce_to_dtype(dtype) - - if issubclass(dtype.type, compat.text_type): - # in Py3 that's str, in Py2 that's unicode - return lib.astype_unicode(arr.ravel()).reshape(arr.shape) - elif issubclass(dtype.type, compat.string_types): - return lib.astype_str(arr.ravel()).reshape(arr.shape) - elif is_datetime64_dtype(arr): - if dtype == object: - return tslib.ints_to_pydatetime(arr.view(np.int64)) - elif dtype == np.int64: - return arr.view(dtype) - elif dtype != _NS_DTYPE: - raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % - (arr.dtype, dtype)) - return arr.astype(_NS_DTYPE) - elif is_timedelta64_dtype(arr): - if dtype == np.int64: - return arr.view(dtype) - elif dtype == object: - return tslib.ints_to_pytimedelta(arr.view(np.int64)) - - # in py3, timedelta64[ns] are int64 - elif ((compat.PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or - (not compat.PY3 and dtype != _TD_DTYPE)): - - # allow frequency conversions - if dtype.kind == 'm': - mask = isnull(arr) - result = arr.astype(dtype).astype(np.float64) - result[mask] = np.nan - return result - - raise TypeError("cannot astype a timedelta from [%s] to [%s]" % - (arr.dtype, dtype)) - - return arr.astype(_TD_DTYPE) - elif (np.issubdtype(arr.dtype, np.floating) and - np.issubdtype(dtype, np.integer)): - - if np.isnan(arr).any(): - raise ValueError('Cannot convert NA to integer') - elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): - # work around NumPy brokenness, #1987 - return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) - - if copy: - return arr.astype(dtype) - return arr.view(dtype) - - def _all_none(*args): for arg in args: if arg is not None: @@ -1988,6 +490,9 @@ class Sentinel(object): return Sentinel() +# ---------------------------------------------------------------------- +# Detect our environment + def in_interactive_session(): """ check if we're running in an interactive shell @@ -2055,21 +560,6 @@ def in_ipython_frontend(): return False -def _maybe_match_name(a, b): - a_has = hasattr(a, 'name') - b_has = hasattr(b, 'name') - if a_has and b_has: - if a.name == b.name: - return a.name - else: - return None - elif a_has: - return a.name - elif b_has: - return b.name - return None - - def _random_state(state=None): """ Helper function for processing random_state arguments. diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 3ca2c6cd014bc..5cbc968f06fa7 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -366,7 +366,7 @@ def mpl_style_cb(key): def use_inf_as_null_cb(key): - from pandas.core.common import _use_inf_as_null + from pandas.types.missing import _use_inf_as_null _use_inf_as_null(key) with cf.config_prefix('mode'): diff --git a/pandas/core/convert.py b/pandas/core/convert.py deleted file mode 100644 index 7f4fe73c688f8..0000000000000 --- a/pandas/core/convert.py +++ /dev/null @@ -1,127 +0,0 @@ -""" -Functions for converting object to other types -""" - -import numpy as np - -import pandas as pd -from pandas.core.common import (_possibly_cast_to_datetime, is_object_dtype, - isnull) -import pandas.lib as lib - - -# TODO: Remove in 0.18 or 2017, which ever is sooner -def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, - convert_timedeltas=True, copy=True): - """ if we have an object dtype, try to coerce dates and/or numbers """ - - # if we have passed in a list or scalar - if isinstance(values, (list, tuple)): - values = np.array(values, dtype=np.object_) - if not hasattr(values, 'dtype'): - values = np.array([values], dtype=np.object_) - - # convert dates - if convert_dates and values.dtype == np.object_: - - # we take an aggressive stance and convert to datetime64[ns] - if convert_dates == 'coerce': - new_values = _possibly_cast_to_datetime(values, 'M8[ns]', - errors='coerce') - - # if we are all nans then leave me alone - if not isnull(new_values).all(): - values = new_values - - else: - values = lib.maybe_convert_objects(values, - convert_datetime=convert_dates) - - # convert timedeltas - if convert_timedeltas and values.dtype == np.object_: - - if convert_timedeltas == 'coerce': - from pandas.tseries.timedeltas import to_timedelta - new_values = to_timedelta(values, coerce=True) - - # if we are all nans then leave me alone - if not isnull(new_values).all(): - values = new_values - - else: - values = lib.maybe_convert_objects( - values, convert_timedelta=convert_timedeltas) - - # convert to numeric - if values.dtype == np.object_: - if convert_numeric: - try: - new_values = lib.maybe_convert_numeric(values, set(), - coerce_numeric=True) - - # if we are all nans then leave me alone - if not isnull(new_values).all(): - values = new_values - - except: - pass - else: - # soft-conversion - values = lib.maybe_convert_objects(values) - - values = values.copy() if copy else values - - return values - - -def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, - coerce=False, copy=True): - """ if we have an object dtype, try to coerce dates and/or numbers """ - - conversion_count = sum((datetime, numeric, timedelta)) - if conversion_count == 0: - raise ValueError('At least one of datetime, numeric or timedelta must ' - 'be True.') - elif conversion_count > 1 and coerce: - raise ValueError("Only one of 'datetime', 'numeric' or " - "'timedelta' can be True when when coerce=True.") - - if isinstance(values, (list, tuple)): - # List or scalar - values = np.array(values, dtype=np.object_) - elif not hasattr(values, 'dtype'): - values = np.array([values], dtype=np.object_) - elif not is_object_dtype(values.dtype): - # If not object, do not attempt conversion - values = values.copy() if copy else values - return values - - # If 1 flag is coerce, ensure 2 others are False - if coerce: - # Immediate return if coerce - if datetime: - return pd.to_datetime(values, errors='coerce', box=False) - elif timedelta: - return pd.to_timedelta(values, errors='coerce', box=False) - elif numeric: - return pd.to_numeric(values, errors='coerce') - - # Soft conversions - if datetime: - values = lib.maybe_convert_objects(values, convert_datetime=datetime) - - if timedelta and is_object_dtype(values.dtype): - # Object check to ensure only run if previous did not convert - values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) - - if numeric and is_object_dtype(values.dtype): - try: - converted = lib.maybe_convert_numeric(values, set(), - coerce_numeric=True) - # If all NaNs, then do not-alter - values = converted if not isnull(converted).all() else values - values = values.copy() if copy else values - except: - pass - - return values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e01fc6dca6be3..334526b424be5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -23,12 +23,43 @@ import numpy as np import numpy.ma as ma -from pandas.core.common import ( - isnull, notnull, PandasError, _try_sort, _default_index, _maybe_upcast, - is_sequence, _infer_dtype_from_scalar, _values_from_object, is_list_like, - _maybe_box_datetimelike, is_categorical_dtype, is_object_dtype, - is_extension_type, is_datetimetz, _possibly_infer_to_datetimelike, - _dict_compat) +from pandas.types.cast import (_maybe_upcast, + _infer_dtype_from_scalar, + _possibly_cast_to_datetime, + _possibly_infer_to_datetimelike, + _possibly_convert_platform, + _possibly_downcast_to_dtype, + _invalidate_string_dtypes, + _coerce_to_dtypes, + _maybe_upcast_putmask) +from pandas.types.common import (is_categorical_dtype, + is_object_dtype, + is_extension_type, + is_datetimetz, + is_datetime64_dtype, + is_bool_dtype, + is_integer_dtype, + is_float_dtype, + is_integer, + is_scalar, + needs_i8_conversion, + _get_dtype_from_object, + _lcd_dtypes, + _ensure_float, + _ensure_float64, + _ensure_int64, + _ensure_platform_int, + is_list_like, + is_iterator, + is_sequence, + is_named_tuple) +from pandas.types.missing import isnull, notnull + +from pandas.core.common import (PandasError, _try_sort, + _default_index, + _values_from_object, + _maybe_box_datetimelike, + _dict_compat) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, @@ -268,7 +299,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, data = list(data) if len(data) > 0: if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1: - if com.is_named_tuple(data[0]) and columns is None: + if is_named_tuple(data[0]) and columns is None: columns = data[0]._fields arrays, columns = _to_arrays(data, columns, dtype=dtype) columns = _ensure_index(columns) @@ -940,7 +971,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, if columns is not None: columns = _ensure_index(columns) - if com.is_iterator(data): + if is_iterator(data): if nrows == 0: return cls() @@ -1051,7 +1082,7 @@ def to_records(self, index=True, convert_datetime64=True): y : recarray """ if index: - if com.is_datetime64_dtype(self.index) and convert_datetime64: + if is_datetime64_dtype(self.index) and convert_datetime64: ix_vals = [self.index.to_pydatetime()] else: if isinstance(self.index, MultiIndex): @@ -1920,7 +1951,7 @@ def _ixs(self, i, axis=0): copy = True else: new_values = self._data.fast_xs(i) - if lib.isscalar(new_values): + if is_scalar(new_values): return new_values # if we are a copy, mark as such @@ -2072,7 +2103,7 @@ def _getitem_multilevel(self, key): return self._get_item_cache(key) def _getitem_frame(self, key): - if key.values.size and not com.is_bool_dtype(key.values): + if key.values.size and not is_bool_dtype(key.values): raise ValueError('Must pass DataFrame with boolean values only') return self.where(key) @@ -2289,7 +2320,7 @@ def select_dtypes(self, include=None, exclude=None): 5 False """ include, exclude = include or (), exclude or () - if not (com.is_list_like(include) and com.is_list_like(exclude)): + if not (is_list_like(include) and is_list_like(exclude)): raise TypeError('include and exclude must both be non-string' ' sequences') selection = tuple(map(frozenset, (include, exclude))) @@ -2300,9 +2331,9 @@ def select_dtypes(self, include=None, exclude=None): # convert the myriad valid dtypes object to a single representation include, exclude = map( - lambda x: frozenset(map(com._get_dtype_from_object, x)), selection) + lambda x: frozenset(map(_get_dtype_from_object, x)), selection) for dtypes in (include, exclude): - com._invalidate_string_dtypes(dtypes) + _invalidate_string_dtypes(dtypes) # can't both include AND exclude! if not include.isdisjoint(exclude): @@ -2392,7 +2423,7 @@ def _setitem_array(self, key, value): def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. # df[df > df2] = 0 - if key.values.size and not com.is_bool_dtype(key.values): + if key.values.size and not is_bool_dtype(key.values): raise TypeError('Must pass DataFrame with boolean values only') self._check_inplace_setting(value) @@ -2586,7 +2617,7 @@ def reindexer(value): value = _sanitize_index(value, self.index, copy=False) if not isinstance(value, (np.ndarray, Index)): if isinstance(value, list) and len(value) > 0: - value = com._possibly_convert_platform(value) + value = _possibly_convert_platform(value) else: value = com._asarray_tuplesafe(value) elif value.ndim == 2: @@ -2602,7 +2633,7 @@ def reindexer(value): # upcast the scalar dtype, value = _infer_dtype_from_scalar(value) value = np.repeat(value, len(self.index)).astype(dtype) - value = com._possibly_cast_to_datetime(value, dtype) + value = _possibly_cast_to_datetime(value, dtype) # return internal types directly if is_extension_type(value): @@ -2916,8 +2947,8 @@ def _maybe_casted_values(index, labels=None): mask = labels == -1 values = values.take(labels) if mask.any(): - values, changed = com._maybe_upcast_putmask(values, mask, - np.nan) + values, changed = _maybe_upcast_putmask(values, mask, + np.nan) return values new_index = _default_index(len(new_obj)) @@ -3131,14 +3162,14 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, raise ValueError('When sorting by column, axis must be 0 (rows)') if not isinstance(by, list): by = [by] - if com.is_sequence(ascending) and len(by) != len(ascending): + if is_sequence(ascending) and len(by) != len(ascending): raise ValueError('Length of ascending (%d) != length of by (%d)' % (len(ascending), len(by))) if len(by) > 1: from pandas.core.groupby import _lexsort_indexer def trans(v): - if com.needs_i8_conversion(v): + if needs_i8_conversion(v): return v.view('i8') return v @@ -3151,7 +3182,7 @@ def trans(v): keys.append(trans(k)) indexer = _lexsort_indexer(keys, orders=ascending, na_position=na_position) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) else: from pandas.core.groupby import _nargsort @@ -3320,7 +3351,7 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False, inplace=inplace, sort_remaining=sort_remaining) def _nsorted(self, columns, n, method, keep): - if not com.is_list_like(columns): + if not is_list_like(columns): columns = [columns] columns = list(columns) ser = getattr(self[columns[0]], method)(n, keep=keep) @@ -3658,28 +3689,28 @@ def combine(self, other, func, fill_value=None, overwrite=True): # if we have different dtypes, possibily promote new_dtype = this_dtype if this_dtype != other_dtype: - new_dtype = com._lcd_dtypes(this_dtype, other_dtype) + new_dtype = _lcd_dtypes(this_dtype, other_dtype) series = series.astype(new_dtype) otherSeries = otherSeries.astype(new_dtype) # see if we need to be represented as i8 (datetimelike) # try to keep us at this dtype - needs_i8_conversion = com.needs_i8_conversion(new_dtype) - if needs_i8_conversion: + needs_i8_conversion_i = needs_i8_conversion(new_dtype) + if needs_i8_conversion_i: this_dtype = new_dtype arr = func(series, otherSeries, True) else: arr = func(series, otherSeries) if do_fill: - arr = com.ensure_float(arr) + arr = _ensure_float(arr) arr[this_mask & other_mask] = NA # try to downcast back to the original dtype - if needs_i8_conversion: - arr = com._possibly_cast_to_datetime(arr, this_dtype) + if needs_i8_conversion_i: + arr = _possibly_cast_to_datetime(arr, this_dtype) else: - arr = com._possibly_downcast_to_dtype(arr, this_dtype) + arr = _possibly_downcast_to_dtype(arr, this_dtype) result[col] = arr @@ -4581,7 +4612,7 @@ def _dict_round(df, decimals): yield vals def _series_round(s, decimals): - if com.is_integer_dtype(s) or com.is_float_dtype(s): + if is_integer_dtype(s) or is_float_dtype(s): return s.round(decimals) return s @@ -4592,7 +4623,7 @@ def _series_round(s, decimals): if not decimals.index.is_unique: raise ValueError("Index of decimals must be unique") new_cols = [col for col in _dict_round(self, decimals)] - elif com.is_integer(decimals): + elif is_integer(decimals): # Dispatch to Series.round new_cols = [_series_round(v, decimals) for _, v in self.iteritems()] @@ -4634,14 +4665,14 @@ def corr(self, method='pearson', min_periods=1): mat = numeric_df.values if method == 'pearson': - correl = _algos.nancorr(com._ensure_float64(mat), minp=min_periods) + correl = _algos.nancorr(_ensure_float64(mat), minp=min_periods) elif method == 'spearman': - correl = _algos.nancorr_spearman(com._ensure_float64(mat), + correl = _algos.nancorr_spearman(_ensure_float64(mat), minp=min_periods) else: if min_periods is None: min_periods = 1 - mat = com._ensure_float64(mat).T + mat = _ensure_float64(mat).T corrf = nanops.get_corr_func(method) K = len(cols) correl = np.empty((K, K), dtype=float) @@ -4696,7 +4727,7 @@ def cov(self, min_periods=None): baseCov = np.cov(mat.T) baseCov = baseCov.reshape((len(cols), len(cols))) else: - baseCov = _algos.nancorr(com._ensure_float64(mat), cov=True, + baseCov = _algos.nancorr(_ensure_float64(mat), cov=True, minp=min_periods) return self._constructor(baseCov, index=cols, columns=cols) @@ -4825,7 +4856,7 @@ def _count_level(self, level, axis=0, numeric_only=False): level = count_axis._get_level_number(level) level_index = count_axis.levels[level] - labels = com._ensure_int64(count_axis.labels[level]) + labels = _ensure_int64(count_axis.labels[level]) counts = lib.count_level_2d(mask, labels, len(level_index), axis=0) result = DataFrame(counts, index=level_index, columns=agg_axis) @@ -4906,7 +4937,7 @@ def f(x): # try to coerce to the original dtypes item by item if we can if axis == 0: - result = com._coerce_to_dtypes(result, self.dtypes) + result = _coerce_to_dtypes(result, self.dtypes) return Series(result, index=labels) @@ -5376,13 +5407,13 @@ def _prep_ndarray(values, copy=True): return np.empty((0, 0), dtype=object) def convert(v): - return com._possibly_convert_platform(v) + return _possibly_convert_platform(v) # we could have a 1-dim or 2-dim list here # this is equiv of np.asarray, but does object conversion # and platform dtype preservation try: - if com.is_list_like(values[0]) or hasattr(values[0], 'len'): + if is_list_like(values[0]) or hasattr(values[0], 'len'): values = np.array([convert(v) for v in values]) else: values = convert(values) @@ -5570,7 +5601,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def convert(arr): if dtype != object and dtype != np.object: arr = lib.maybe_convert_objects(arr, try_float=coerce_float) - arr = com._possibly_cast_to_datetime(arr, dtype) + arr = _possibly_cast_to_datetime(arr, dtype) return arr arrays = [convert(arr) for arr in content] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b4bcae47cbbdf..d6e6f571be53a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8,6 +8,29 @@ import pandas.lib as lib import pandas as pd + + +from pandas.types.common import (_coerce_to_dtype, + _ensure_int64, + needs_i8_conversion, + is_scalar, + is_integer, is_bool, + is_bool_dtype, + is_numeric_dtype, + is_datetime64_dtype, + is_timedelta64_dtype, + is_list_like, + is_dict_like, + is_re_compilable) +from pandas.types.cast import _maybe_promote, _maybe_upcast_putmask +from pandas.types.missing import isnull, notnull +from pandas.types.generic import ABCSeries, ABCPanel + +from pandas.core.common import (_values_from_object, + _maybe_box_datetimelike, + SettingWithCopyError, SettingWithCopyWarning, + AbstractMethodError) + from pandas.core.base import PandasObject from pandas.core.index import (Index, MultiIndex, _ensure_index, InvalidIndexError) @@ -25,11 +48,6 @@ from pandas.compat.numpy import function as nv from pandas.compat import (map, zip, lrange, string_types, isidentifier, set_function_name) -from pandas.core.common import (isnull, notnull, is_list_like, - _values_from_object, _maybe_promote, - _maybe_box_datetimelike, ABCSeries, - SettingWithCopyError, SettingWithCopyWarning, - AbstractMethodError) import pandas.core.nanops as nanops from pandas.util.decorators import Appender, Substitution, deprecate_kwarg from pandas.core import config @@ -46,10 +64,6 @@ Name or list of names which refer to the axis items.""") -def is_dictlike(x): - return isinstance(x, (dict, com.ABCSeries)) - - def _single_replace(self, to_replace, method, inplace, limit): if self.ndim != 1: raise TypeError('cannot replace {0} with method {1} on a {2}' @@ -116,7 +130,7 @@ def _validate_dtype(self, dtype): """ validate the passed dtype """ if dtype is not None: - dtype = com._coerce_to_dtype(dtype) + dtype = _coerce_to_dtype(dtype) # a compound dtype if dtype.kind == 'V': @@ -310,7 +324,7 @@ def _from_axes(cls, data, axes, **kwargs): def _get_axis_number(self, axis): axis = self._AXIS_ALIASES.get(axis, axis) - if com.is_integer(axis): + if is_integer(axis): if axis in self._AXIS_NAMES: return axis else: @@ -717,8 +731,8 @@ def rename_axis(self, mapper, axis=0, copy=True, inplace=False): 1 2 5 2 3 6 """ - non_mapper = lib.isscalar(mapper) or (com.is_list_like(mapper) and not - com.is_dict_like(mapper)) + non_mapper = is_scalar(mapper) or (is_list_like(mapper) and not + is_dict_like(mapper)) if non_mapper: return self._set_axis_name(mapper, axis=axis) else: @@ -912,7 +926,7 @@ def bool(self): v = self.squeeze() if isinstance(v, (bool, np.bool_)): return bool(v) - elif lib.isscalar(v): + elif is_scalar(v): raise ValueError("bool cannot act on a non-boolean single element " "{0}".format(self.__class__.__name__)) @@ -1764,10 +1778,10 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True): else: return self.take(loc, axis=axis, convert=True) - if not lib.isscalar(loc): + if not is_scalar(loc): new_index = self.index[loc] - if lib.isscalar(loc): + if is_scalar(loc): new_values = self._data.fast_xs(loc) # may need to box a datelike-scalar @@ -2340,7 +2354,7 @@ def _reindex_with_indexers(self, reindexers, fill_value=np.nan, copy=False, index = _ensure_index(index) if indexer is not None: - indexer = com._ensure_int64(indexer) + indexer = _ensure_int64(indexer) # TODO: speed up on homogeneous DataFrame objects new_data = new_data.reindex_indexer(index, indexer, axis=baxis, @@ -3202,10 +3216,10 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, return self if self.ndim == 1: - if isinstance(value, (dict, com.ABCSeries)): + if isinstance(value, (dict, ABCSeries)): from pandas import Series value = Series(value) - elif not com.is_list_like(value): + elif not is_list_like(value): pass else: raise ValueError("invalid fill value with a %s" % @@ -3215,7 +3229,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, downcast=downcast) - elif isinstance(value, (dict, com.ABCSeries)): + elif isinstance(value, (dict, ABCSeries)): if axis == 1: raise NotImplementedError('Currently only can fill ' 'with dict/Series column ' @@ -3228,7 +3242,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, obj = result[k] obj.fillna(v, limit=limit, inplace=True) return result - elif not com.is_list_like(value): + elif not is_list_like(value): new_data = self._data.fillna(value=value, limit=limit, inplace=inplace, downcast=downcast) @@ -3354,7 +3368,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, and play with this method to gain intuition about how it works. """ - if not com.is_bool(regex) and to_replace is not None: + if not is_bool(regex) and to_replace is not None: raise AssertionError("'to_replace' must be 'None' if 'regex' is " "not a bool") if axis is not None: @@ -3367,15 +3381,15 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if value is None: # passing a single value that is scalar like # when value is None (GH5319), for compat - if not is_dictlike(to_replace) and not is_dictlike(regex): + if not is_dict_like(to_replace) and not is_dict_like(regex): to_replace = [to_replace] if isinstance(to_replace, (tuple, list)): return _single_replace(self, to_replace, method, inplace, limit) - if not is_dictlike(to_replace): - if not is_dictlike(regex): + if not is_dict_like(to_replace): + if not is_dict_like(regex): raise TypeError('If "to_replace" and "value" are both None' ' and "to_replace" is not a list, then ' 'regex must be a mapping') @@ -3385,7 +3399,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, items = list(compat.iteritems(to_replace)) keys, values = zip(*items) - are_mappings = [is_dictlike(v) for v in values] + are_mappings = [is_dict_like(v) for v in values] if any(are_mappings): if not all(are_mappings): @@ -3418,8 +3432,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, return self new_data = self._data - if is_dictlike(to_replace): - if is_dictlike(value): # {'A' : NA} -> {'A' : 0} + if is_dict_like(to_replace): + if is_dict_like(value): # {'A' : NA} -> {'A' : 0} res = self if inplace else self.copy() for c, src in compat.iteritems(to_replace): if c in value and c in self: @@ -3429,7 +3443,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, return None if inplace else res # {'A': NA} -> 0 - elif not com.is_list_like(value): + elif not is_list_like(value): for k, src in compat.iteritems(to_replace): if k in self: new_data = new_data.replace(to_replace=src, @@ -3441,8 +3455,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, raise TypeError('value argument must be scalar, dict, or ' 'Series') - elif com.is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] - if com.is_list_like(value): + elif is_list_like(to_replace): # [NA, ''] -> [0, 'missing'] + if is_list_like(value): if len(to_replace) != len(value): raise ValueError('Replacement lists must match ' 'in length. Expecting %d got %d ' % @@ -3458,8 +3472,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, value=value, inplace=inplace, regex=regex) elif to_replace is None: - if not (com.is_re_compilable(regex) or - com.is_list_like(regex) or is_dictlike(regex)): + if not (is_re_compilable(regex) or + is_list_like(regex) or is_dict_like(regex)): raise TypeError("'regex' must be a string or a compiled " "regular expression or a list or dict of " "strings or regular expressions, you " @@ -3470,7 +3484,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, else: # dest iterable dict-like - if is_dictlike(value): # NA -> {'A' : 0, 'B' : -1} + if is_dict_like(value): # NA -> {'A' : 0, 'B' : -1} new_data = self._data for k, v in compat.iteritems(value): @@ -3480,7 +3494,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, inplace=inplace, regex=regex) - elif not com.is_list_like(value): # NA -> 0 + elif not is_list_like(value): # NA -> 0 new_data = self._data.replace(to_replace=to_replace, value=value, inplace=inplace, regex=regex) @@ -3792,14 +3806,14 @@ def clip(self, lower=None, upper=None, axis=None, *args, **kwargs): 3 0.230930 0.000000 4 1.100000 0.570967 """ - if isinstance(self, com.ABCPanel): + if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels") axis = nv.validate_clip_with_axis(axis, args, kwargs) # GH 2747 (arguments were reversed) if lower is not None and upper is not None: - if lib.isscalar(lower) and lib.isscalar(upper): + if is_scalar(lower) and is_scalar(upper): lower, upper = min(lower, upper), max(lower, upper) result = self @@ -4485,10 +4499,12 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, new_other = np.array(other, dtype=self.dtype) except ValueError: new_other = np.array(other) + except TypeError: + new_other = other # we can end up comparing integers and m8[ns] # which is a numpy no no - is_i8 = com.needs_i8_conversion(self.dtype) + is_i8 = needs_i8_conversion(self.dtype) if is_i8: matches = False else: @@ -4497,7 +4513,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if matches is False or not matches.all(): # coerce other to a common dtype if we can - if com.needs_i8_conversion(self.dtype): + if needs_i8_conversion(self.dtype): try: other = np.array(other, dtype=self.dtype) except: @@ -4550,7 +4566,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, dtype, fill_value = _maybe_promote(other.dtype) new_other = np.empty(len(icond), dtype=dtype) new_other.fill(fill_value) - com._maybe_upcast_putmask(new_other, icond, other) + _maybe_upcast_putmask(new_other, icond, other) other = new_other else: @@ -5058,7 +5074,7 @@ def describe_categorical_1d(data): if result[1] > 0: top, freq = objcounts.index[0], objcounts.iloc[0] - if com.is_datetime64_dtype(data): + if is_datetime64_dtype(data): asint = data.dropna().values.view('i8') names += ['top', 'freq', 'first', 'last'] result += [lib.Timestamp(top), freq, @@ -5071,11 +5087,11 @@ def describe_categorical_1d(data): return pd.Series(result, index=names, name=data.name) def describe_1d(data): - if com.is_bool_dtype(data): + if is_bool_dtype(data): return describe_categorical_1d(data) - elif com.is_numeric_dtype(data): + elif is_numeric_dtype(data): return describe_numeric_1d(data) - elif com.is_timedelta64_dtype(data): + elif is_timedelta64_dtype(data): return describe_numeric_1d(data) else: return describe_categorical_1d(data) @@ -5162,7 +5178,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) if freq is None: - mask = com.isnull(_values_from_object(self)) + mask = isnull(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) return rs diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 077acc1e81444..6179857978b7b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -13,6 +13,25 @@ from pandas import compat from pandas.compat.numpy import function as nv from pandas.compat.numpy import _np_version_under1p8 + +from pandas.types.common import (_DATELIKE_DTYPES, + is_numeric_dtype, + is_timedelta64_dtype, is_datetime64_dtype, + is_categorical_dtype, + is_datetime_or_timedelta_dtype, + is_bool, is_integer_dtype, + is_complex_dtype, + is_bool_dtype, + is_scalar, + _ensure_float64, + _ensure_platform_int, + _ensure_int64, + _ensure_object, + _ensure_float) +from pandas.types.cast import _possibly_downcast_to_dtype +from pandas.types.missing import isnull, notnull, _maybe_fill + +from pandas.core.common import _values_from_object, AbstractMethodError from pandas.core.base import (PandasObject, SelectionMixin, GroupByError, DataError, SpecificationError) from pandas.core.categorical import Categorical @@ -30,14 +49,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com -from pandas.core.common import(_possibly_downcast_to_dtype, isnull, - notnull, _DATELIKE_DTYPES, is_numeric_dtype, - is_timedelta64_dtype, is_datetime64_dtype, - is_categorical_dtype, _values_from_object, - is_datetime_or_timedelta_dtype, is_bool, - is_bool_dtype, AbstractMethodError, - _maybe_fill) -from pandas.core.config import option_context, is_callable +from pandas.core.config import option_context import pandas.lib as lib from pandas.lib import Timestamp import pandas.tslib as tslib @@ -662,7 +674,7 @@ def apply(self, func, *args, **kwargs): # resolve functions to their callable functions prior, this # wouldn't be needed if args or kwargs: - if is_callable(func): + if callable(func): @wraps(func) def f(g): @@ -752,7 +764,7 @@ def _try_cast(self, result, obj): else: dtype = obj.dtype - if not lib.isscalar(result): + if not is_scalar(result): result = _possibly_downcast_to_dtype(result, dtype) return result @@ -817,7 +829,7 @@ def _python_agg_general(self, func, *args, **kwargs): # since we are masking, make sure that we have a float object values = result if is_numeric_dtype(values.dtype): - values = com.ensure_float(values) + values = _ensure_float(values) output[name] = self._try_cast(values[mask], result) @@ -1595,7 +1607,7 @@ def size(self): """ ids, _, ngroup = self.group_info - ids = com._ensure_platform_int(ids) + ids = _ensure_platform_int(ids) out = np.bincount(ids[ids != -1], minlength=ngroup or None) return Series(out, index=self.result_index, dtype='int64') @@ -1631,7 +1643,7 @@ def group_info(self): comp_ids, obs_group_ids = self._get_compressed_labels() ngroups = len(obs_group_ids) - comp_ids = com._ensure_int64(comp_ids) + comp_ids = _ensure_int64(comp_ids) return comp_ids, obs_group_ids, ngroups def _get_compressed_labels(self): @@ -1671,7 +1683,7 @@ def get_group_levels(self): name_list = [] for ping, labels in zip(self.groupings, self.recons_labels): - labels = com._ensure_platform_int(labels) + labels = _ensure_platform_int(labels) levels = ping.group_index.take(labels) name_list.append(levels) @@ -1780,11 +1792,11 @@ def _cython_operation(self, kind, values, how, axis): values = values.view('int64') is_numeric = True elif is_bool_dtype(values.dtype): - values = _algos.ensure_float64(values) - elif com.is_integer_dtype(values): + values = _ensure_float64(values) + elif is_integer_dtype(values): values = values.astype('int64', copy=False) - elif is_numeric and not com.is_complex_dtype(values): - values = _algos.ensure_float64(values) + elif is_numeric and not is_complex_dtype(values): + values = _ensure_float64(values) else: values = values.astype(object) @@ -1793,7 +1805,7 @@ def _cython_operation(self, kind, values, how, axis): kind, how, values, is_numeric) except NotImplementedError: if is_numeric: - values = _algos.ensure_float64(values) + values = _ensure_float64(values) func, dtype_str = self._get_cython_function( kind, how, values, is_numeric) else: @@ -1821,7 +1833,7 @@ def _cython_operation(self, kind, values, how, axis): result = self._transform( result, accum, values, labels, func, is_numeric) - if com.is_integer_dtype(result): + if is_integer_dtype(result): if len(result[result == tslib.iNaT]) > 0: result = result.astype('float64') result[result == tslib.iNaT] = np.nan @@ -1834,7 +1846,7 @@ def _cython_operation(self, kind, values, how, axis): result, (counts > 0).view(np.uint8)) except ValueError: result = lib.row_bool_subset_object( - com._ensure_object(result), + _ensure_object(result), (counts > 0).view(np.uint8)) else: result = result[counts > 0] @@ -1996,7 +2008,7 @@ def generate_bins_generic(values, binner, closed): class BinGrouper(BaseGrouper): def __init__(self, bins, binlabels, filter_empty=False, mutated=False): - self.bins = com._ensure_int64(bins) + self.bins = _ensure_int64(bins) self.binlabels = _ensure_index(binlabels) self._filter_empty_groups = filter_empty self.mutated = mutated @@ -2061,7 +2073,7 @@ def group_info(self): obs_group_ids = np.arange(ngroups) rep = np.diff(np.r_[0, self.bins]) - rep = com._ensure_platform_int(rep) + rep = _ensure_platform_int(rep) if ngroups == len(self.bins): comp_ids = np.repeat(np.arange(ngroups), rep) else: @@ -2449,7 +2461,7 @@ def is_in_obj(gpr): def _is_label_like(val): return (isinstance(val, compat.string_types) or - (val is not None and lib.isscalar(val))) + (val is not None and is_scalar(val))) def _convert_grouper(axis, grouper): @@ -2671,7 +2683,7 @@ def _aggregate_multiple_funcs(self, arg, _level): results[name] = obj.aggregate(func) if isinstance(list(compat.itervalues(results))[0], - com.ABCDataFrame): + DataFrame): # let higher level handle if _level: @@ -2870,9 +2882,9 @@ def nunique(self, dropna=True): 'val.dtype must be object, got %s' % val.dtype val, _ = algos.factorize(val, sort=False) sorter = np.lexsort((val, ids)) - isnull = lambda a: a == -1 + _isnull = lambda a: a == -1 else: - isnull = com.isnull + _isnull = isnull ids, val = ids[sorter], val[sorter] @@ -2882,7 +2894,7 @@ def nunique(self, dropna=True): inc = np.r_[1, val[1:] != val[:-1]] # 1st item of each group is a new unique observation - mask = isnull(val) + mask = _isnull(val) if dropna: inc[idx] = 1 inc[mask] = 0 @@ -2998,8 +3010,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, mi = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) - if com.is_integer_dtype(out): - out = com._ensure_int64(out) + if is_integer_dtype(out): + out = _ensure_int64(out) return Series(out, index=mi, name=self.name) # for compat. with algos.value_counts need to ensure every @@ -3029,8 +3041,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False, mi = MultiIndex(levels=levels, labels=labels, names=names, verify_integrity=False) - if com.is_integer_dtype(out): - out = com._ensure_int64(out) + if is_integer_dtype(out): + out = _ensure_int64(out) return Series(out, index=mi, name=self.name) def count(self): @@ -3039,7 +3051,7 @@ def count(self): val = self.obj.get_values() mask = (ids != -1) & ~isnull(val) - ids = com._ensure_platform_int(ids) + ids = _ensure_platform_int(ids) out = np.bincount(ids[mask], minlength=ngroups or None) return Series(out, @@ -3616,7 +3628,7 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa pass # interpret the result of the filter - if is_bool(res) or (lib.isscalar(res) and isnull(res)): + if is_bool(res) or (is_scalar(res) and isnull(res)): if res and notnull(res): indices.append(self._get_index(name)) else: @@ -3813,7 +3825,7 @@ def count(self): """ Compute count of group, excluding missing values """ from functools import partial from pandas.lib import count_level_2d - from pandas.core.common import _isnull_ndarraylike as isnull + from pandas.types.missing import _isnull_ndarraylike as isnull data, _ = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info @@ -3934,7 +3946,7 @@ class DataSplitter(object): def __init__(self, data, labels, ngroups, axis=0): self.data = data - self.labels = com._ensure_int64(labels) + self.labels = _ensure_int64(labels) self.ngroups = ngroups self.axis = axis @@ -4115,7 +4127,7 @@ def loop(labels, shape): def maybe_lift(lab, size): # pormote nan values return (lab + 1, size + 1) if (lab == -1).any() else (lab, size) - labels = map(com._ensure_int64, labels) + labels = map(_ensure_int64, labels) if not xnull: labels, shape = map(list, zip(*map(maybe_lift, labels, shape))) @@ -4331,9 +4343,9 @@ def _get_group_index_sorter(group_index, ngroups): alpha = 0.0 # taking complexities literally; there may be beta = 1.0 # some room for fine-tuning these parameters if alpha + beta * ngroups < count * np.log(count): - sorter, _ = _algos.groupsort_indexer(com._ensure_int64(group_index), + sorter, _ = _algos.groupsort_indexer(_ensure_int64(group_index), ngroups) - return com._ensure_platform_int(sorter) + return _ensure_platform_int(sorter) else: return group_index.argsort(kind='mergesort') @@ -4348,7 +4360,7 @@ def _compress_group_index(group_index, sort=True): size_hint = min(len(group_index), _hash._SIZE_HINT_LIMIT) table = _hash.Int64HashTable(size_hint) - group_index = com._ensure_int64(group_index) + group_index = _ensure_int64(group_index) # note, group labels come out ascending (ie, 1,2,3 etc) comp_ids, obs_group_ids = table.get_labels_groupby(group_index) @@ -4390,7 +4402,7 @@ def _groupby_indices(values): _, counts = _hash.value_count_scalar64(codes, False) else: reverse, codes, counts = _algos.group_labels( - _values_from_object(com._ensure_object(values))) + _values_from_object(_ensure_object(values))) return _algos.groupby_indices(reverse, codes, counts) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 9485f50ed07f1..0cba8308c1c53 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,17 +1,24 @@ # pylint: disable=W0223 -from pandas.core.index import Index, MultiIndex +import numpy as np from pandas.compat import range, zip import pandas.compat as compat +from pandas.types.generic import ABCDataFrame, ABCPanel, ABCSeries +from pandas.types.common import (is_integer_dtype, + is_integer, is_float, + is_categorical_dtype, + is_list_like, + is_sequence, + is_scalar, + _ensure_platform_int) +from pandas.types.missing import isnull, _infer_fill_value + +from pandas.core.index import Index, MultiIndex + import pandas.core.common as com -import pandas.lib as lib -from pandas.core.common import (is_bool_indexer, is_integer_dtype, - _asarray_tuplesafe, is_list_like, isnull, - is_null_slice, is_full_slice, ABCSeries, - ABCDataFrame, ABCPanel, is_float, - _values_from_object, _infer_fill_value, - is_integer) -import numpy as np +from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, + is_null_slice, is_full_slice, + _values_from_object) # the supported indexers @@ -67,7 +74,7 @@ def __getitem__(self, key): key = tuple(com._apply_if_callable(x, self.obj) for x in key) try: values = self.obj.get_value(*key) - if lib.isscalar(values): + if is_scalar(values): return values except Exception: pass @@ -625,7 +632,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): # we have a frame, with multiple indexers on both axes; and a # series, so need to broadcast (see GH5206) if (sum_aligners == self.ndim and - all([com.is_sequence(_) for _ in indexer])): + all([is_sequence(_) for _ in indexer])): ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values # single indexer @@ -639,7 +646,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): ax = obj.axes[i] # multiple aligners (or null slices) - if com.is_sequence(idx) or isinstance(idx, slice): + if is_sequence(idx) or isinstance(idx, slice): if single_aligner and is_null_slice(idx): continue new_ix = ax[idx] @@ -685,7 +692,7 @@ def _align_series(self, indexer, ser, multiindex_indexer=False): return ser - elif lib.isscalar(indexer): + elif is_scalar(indexer): ax = self.obj._get_axis(1) if ser.index.equals(ax): @@ -710,7 +717,7 @@ def _align_frame(self, indexer, df): sindexers = [] for i, ix in enumerate(indexer): ax = self.obj.axes[i] - if com.is_sequence(ix) or isinstance(ix, slice): + if is_sequence(ix) or isinstance(ix, slice): if idx is None: idx = ax[ix].ravel() elif cols is None: @@ -761,7 +768,7 @@ def _align_frame(self, indexer, df): val = df.reindex(index=ax)._values return val - elif lib.isscalar(indexer) and is_panel: + elif is_scalar(indexer) and is_panel: idx = self.obj.axes[1] cols = self.obj.axes[2] @@ -857,7 +864,7 @@ def _convert_for_reindex(self, key, axis=0): keyarr = _asarray_tuplesafe(key) if is_integer_dtype(keyarr) and not labels.is_integer(): - keyarr = com._ensure_platform_int(keyarr) + keyarr = _ensure_platform_int(keyarr) return labels.take(keyarr) return keyarr @@ -968,7 +975,7 @@ def _getitem_nested_tuple(self, tup): axis += 1 # if we have a scalar, we are done - if lib.isscalar(obj) or not hasattr(obj, 'ndim'): + if is_scalar(obj) or not hasattr(obj, 'ndim'): break # has the dim of the obj changed? @@ -1038,7 +1045,7 @@ def _getitem_iterable(self, key, axis=0): # asarray can be unsafe, NumPy strings are weird keyarr = _asarray_tuplesafe(key) - if com.is_categorical_dtype(labels): + if is_categorical_dtype(labels): keyarr = labels._shallow_copy(keyarr) # have the index handle the indexer and possibly return @@ -1799,7 +1806,7 @@ def check_bool_indexer(ax, key): result = key if isinstance(key, ABCSeries) and not key.index.equals(ax): result = result.reindex(ax) - mask = com.isnull(result._values) + mask = isnull(result._values) if mask.any(): raise IndexingError('Unalignable boolean Series key provided') @@ -1941,9 +1948,9 @@ def _non_reducing_slice(slice_): def pred(part): # true when slice does *not* reduce - return isinstance(part, slice) or com.is_list_like(part) + return isinstance(part, slice) or is_list_like(part) - if not com.is_list_like(slice_): + if not is_list_like(slice_): if not isinstance(slice_, slice): # a 1-d slice, like df.loc[1] slice_ = [[slice_]] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 1ea567f15cb7f..363ac8249eb06 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -10,29 +10,48 @@ from pandas.core.base import PandasObject -from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE, - _TD_DTYPE, ABCSeries, is_list_like, - _infer_dtype_from_scalar, is_null_slice, - is_dtype_equal, is_null_datelike_scalar, - _maybe_promote, is_timedelta64_dtype, - is_datetime64_dtype, is_datetimetz, is_sparse, - array_equivalent, _is_na_compat, - _maybe_convert_string_to_object, - _maybe_convert_scalar, - is_categorical, is_datetimelike_v_numeric, - is_numeric_v_string_like, is_extension_type) +from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.common import (_TD_DTYPE, _NS_DTYPE, + _ensure_int64, _ensure_platform_int, + is_integer, + is_dtype_equal, + is_timedelta64_dtype, + is_datetime64_dtype, is_datetimetz, is_sparse, + is_categorical, is_categorical_dtype, + is_integer_dtype, + is_datetime64tz_dtype, + is_object_dtype, + is_datetimelike_v_numeric, + is_numeric_v_string_like, is_extension_type, + is_list_like, + is_re, + is_re_compilable, + is_scalar, + _get_dtype) +from pandas.types.cast import (_possibly_downcast_to_dtype, + _maybe_convert_string_to_object, + _maybe_upcast, + _maybe_convert_scalar, _maybe_promote, + _infer_dtype_from_scalar, + _soft_convert_objects, + _possibly_convert_objects, + _astype_nansafe) +from pandas.types.missing import (isnull, array_equivalent, + _is_na_compat, + is_null_datelike_scalar) +import pandas.types.concat as _concat + +from pandas.types.generic import ABCSeries +from pandas.core.common import is_null_slice import pandas.core.algorithms as algos -from pandas.types.api import DatetimeTZDtype from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import maybe_convert_indices, length_of_indexer from pandas.core.categorical import Categorical, maybe_to_categorical from pandas.tseries.index import DatetimeIndex from pandas.formats.printing import pprint_thing -import pandas.core.common as com -import pandas.types.concat as _concat + import pandas.core.missing as missing -import pandas.core.convert as convert from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib import pandas.tslib as tslib @@ -112,8 +131,8 @@ def is_categorical_astype(self, dtype): validate that we have a astypeable to categorical, returns a boolean if we are a categorical """ - if com.is_categorical_dtype(dtype): - if dtype == com.CategoricalDtype(): + if is_categorical_dtype(dtype): + if dtype == CategoricalDtype(): return True # this is a pd.Categorical, but is not @@ -137,7 +156,7 @@ def get_values(self, dtype=None): return an internal format, currently just the ndarray this is often overriden to handle to_dense like operations """ - if com.is_object_dtype(dtype): + if is_object_dtype(dtype): return self.values.astype(object) return self.values @@ -481,7 +500,7 @@ def _astype(self, dtype, copy=False, raise_on_error=True, values=None, values = self.get_values(dtype=dtype) # _astype_nansafe works fine with 1-d only - values = com._astype_nansafe(values.ravel(), dtype, copy=True) + values = _astype_nansafe(values.ravel(), dtype, copy=True) values = values.reshape(self.shape) newb = make_block(values, placement=self.mgr_locs, dtype=dtype, @@ -651,7 +670,7 @@ def setitem(self, indexer, value, mgr=None): # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): - dtype, _ = com._maybe_promote(arr_value.dtype) + dtype, _ = _maybe_promote(arr_value.dtype) values = values.astype(dtype) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) @@ -684,7 +703,7 @@ def _is_scalar_indexer(indexer): if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) - return all([lib.isscalar(idx) for idx in indexer]) + return all([is_scalar(idx) for idx in indexer]) return False def _is_empty_indexer(indexer): @@ -724,7 +743,7 @@ def _is_empty_indexer(indexer): if hasattr(value, 'dtype') and is_dtype_equal(values.dtype, value.dtype): dtype = value.dtype - elif lib.isscalar(value): + elif is_scalar(value): dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' @@ -838,7 +857,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, n = np.array(new) # type of the new block - dtype, _ = com._maybe_promote(n.dtype) + dtype, _ = _maybe_promote(n.dtype) # we need to explicitly astype here to make a copy n = n.astype(dtype) @@ -1027,7 +1046,7 @@ def shift(self, periods, axis=0, mgr=None): # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = com._maybe_upcast(self.values) + new_values, fill_value = _maybe_upcast(self.values) # make sure array sent to np.roll is c_contiguous f_ordered = new_values.flags.f_contiguous @@ -1036,7 +1055,7 @@ def shift(self, periods, axis=0, mgr=None): axis = new_values.ndim - axis - 1 if np.prod(new_values.shape): - new_values = np.roll(new_values, com._ensure_platform_int(periods), + new_values = np.roll(new_values, _ensure_platform_int(periods), axis=axis) axis_indexer = [slice(None)] * self.ndim @@ -1306,7 +1325,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None): from pandas import Float64Index is_empty = values.shape[axis] == 0 - if com.is_list_like(qs): + if is_list_like(qs): ax = Float64Index(qs) if is_empty: @@ -1350,7 +1369,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None): ndim = getattr(result, 'ndim', None) or 0 result = self._try_coerce_result(result) - if lib.isscalar(result): + if is_scalar(result): return ax, self.make_block_scalar(result) return ax, make_block(result, placement=np.arange(len(result)), @@ -1591,7 +1610,7 @@ def _can_hold_element(self, element): tipo = element.dtype.type return (issubclass(tipo, np.integer) and not issubclass(tipo, (np.datetime64, np.timedelta64))) - return com.is_integer(element) + return is_integer(element) def _try_cast(self, element): try: @@ -1600,7 +1619,7 @@ def _try_cast(self, element): return element def should_store(self, value): - return com.is_integer_dtype(value) and value.dtype == self.dtype + return is_integer_dtype(value) and value.dtype == self.dtype class DatetimeLikeBlockMixin(object): @@ -1621,7 +1640,7 @@ def get_values(self, dtype=None): """ return object dtype as boxed values, such as Timestamps/Timedelta """ - if com.is_object_dtype(dtype): + if is_object_dtype(dtype): return lib.map_infer(self.values.ravel(), self._box_func).reshape(self.values.shape) return self.values @@ -1641,7 +1660,7 @@ def fillna(self, value, **kwargs): # allow filling with integers to be # interpreted as seconds - if not isinstance(value, np.timedelta64) and com.is_integer(value): + if not isinstance(value, np.timedelta64) and is_integer(value): value = Timedelta(value, unit='s') return super(TimeDeltaBlock, self).fillna(value, **kwargs) @@ -1795,10 +1814,10 @@ def convert(self, *args, **kwargs): new_style |= kw in kwargs if new_style: - fn = convert._soft_convert_objects + fn = _soft_convert_objects fn_inputs = new_inputs else: - fn = convert._possibly_convert_objects + fn = _possibly_convert_objects fn_inputs = ['convert_dates', 'convert_numeric', 'convert_timedeltas'] fn_inputs += ['copy'] @@ -1884,15 +1903,15 @@ def should_store(self, value): def replace(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None): - to_rep_is_list = com.is_list_like(to_replace) - value_is_list = com.is_list_like(value) + to_rep_is_list = is_list_like(to_replace) + value_is_list = is_list_like(value) both_lists = to_rep_is_list and value_is_list either_list = to_rep_is_list or value_is_list result_blocks = [] blocks = [self] - if not either_list and com.is_re(to_replace): + if not either_list and is_re(to_replace): return self._replace_single(to_replace, value, inplace=inplace, filter=filter, regex=True, convert=convert, mgr=mgr) @@ -1930,10 +1949,10 @@ def replace(self, to_replace, value, inplace=False, filter=None, def _replace_single(self, to_replace, value, inplace=False, filter=None, regex=False, convert=True, mgr=None): # to_replace is regex compilable - to_rep_re = regex and com.is_re_compilable(to_replace) + to_rep_re = regex and is_re_compilable(to_replace) # regex is regex compilable - regex_re = com.is_re_compilable(regex) + regex_re = is_re_compilable(regex) # only one will survive if to_rep_re and regex_re: @@ -2046,7 +2065,7 @@ def _try_coerce_result(self, result): # GH12564: CategoricalBlock is 1-dim only # while returned results could be any dim - if ((not com.is_categorical_dtype(result)) and + if ((not is_categorical_dtype(result)) and isinstance(result, np.ndarray)): result = _block_shape(result, ndim=self.ndim) @@ -2151,7 +2170,7 @@ def _astype(self, dtype, mgr=None, **kwargs): """ # if we are passed a datetime64[ns, tz] - if com.is_datetime64tz_dtype(dtype): + if is_datetime64tz_dtype(dtype): dtype = DatetimeTZDtype(dtype) values = self.values @@ -2167,7 +2186,7 @@ def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 - return (com.is_integer(element) or isinstance(element, datetime) or + return (is_integer(element) or isinstance(element, datetime) or isnull(element)) def _try_cast(self, element): @@ -2209,7 +2228,7 @@ def _try_coerce_args(self, values, other): "naive Block") other_mask = isnull(other) other = other.asm8.view('i8') - elif hasattr(other, 'dtype') and com.is_integer_dtype(other): + elif hasattr(other, 'dtype') and is_integer_dtype(other): other = other.view('i8') else: try: @@ -2315,7 +2334,7 @@ def external_values(self): def get_values(self, dtype=None): # return object dtype as Timestamps with the zones - if com.is_object_dtype(dtype): + if is_object_dtype(dtype): f = lambda x: lib.Timestamp(x, tz=self.values.tz) return lib.map_infer( self.values.ravel(), f).reshape(self.values.shape) @@ -2561,7 +2580,7 @@ def shift(self, periods, axis=0, mgr=None): new_values = self.values.to_dense().take(indexer) # convert integer to float if necessary. need to do a lot more than # that, handle boolean etc also - new_values, fill_value = com._maybe_upcast(new_values) + new_values, fill_value = _maybe_upcast(new_values) if periods > 0: new_values[:periods] = fill_value else: @@ -3491,7 +3510,7 @@ def get(self, item, fastpath=True): indexer = np.arange(len(self.items))[isnull(self.items)] # allow a single nan location indexer - if not lib.isscalar(indexer): + if not is_scalar(indexer): if len(indexer) == 1: loc = indexer.item() else: @@ -3823,7 +3842,7 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_tuple[0] is None: - _, fill_value = com._maybe_promote(blk.dtype) + _, fill_value = _maybe_promote(blk.dtype) fill_tuple = (fill_value, ) return [blk.take_nd(slobj, axis=0, @@ -3881,7 +3900,7 @@ def _make_na_block(self, placement, fill_value=None): block_shape = list(self.shape) block_shape[0] = len(placement) - dtype, fill_value = com._infer_dtype_from_scalar(fill_value) + dtype, fill_value = _infer_dtype_from_scalar(fill_value) block_values = np.empty(block_shape, dtype=dtype) block_values.fill(fill_value) return make_block(block_values, placement=placement) @@ -4560,7 +4579,7 @@ def _possibly_compare(a, b, op): else: result = op(a, b) - if lib.isscalar(result) and (is_a_array or is_b_array): + if is_scalar(result) and (is_a_array or is_b_array): type_names = [type(a).__name__, type(b).__name__] if is_a_array: @@ -4611,7 +4630,7 @@ def _factor_indexer(shape, labels): expanded label indexer """ mult = np.array(shape)[::-1].cumprod()[::-1] - return com._ensure_platform_int( + return _ensure_platform_int( np.sum(np.array(labels).T * np.append(mult, [1]), axis=1).T) @@ -4631,7 +4650,7 @@ def _get_blkno_placements(blknos, blk_count, group=True): """ - blknos = com._ensure_int64(blknos) + blknos = _ensure_int64(blknos) # FIXME: blk_count is unused, but it may avoid the use of dicts in cython for blkno, indexer in lib.get_blkno_indexers(blknos, group): @@ -4721,7 +4740,7 @@ def _putmask_smart(v, m, n): pass # change the dtype - dtype, _ = com._maybe_promote(n.dtype) + dtype, _ = _maybe_promote(n.dtype) nv = v.astype(dtype) try: nv[m] = n[m] @@ -4787,9 +4806,9 @@ def get_empty_dtype_and_na(join_units): if dtype is None: continue - if com.is_categorical_dtype(dtype): + if is_categorical_dtype(dtype): upcast_cls = 'category' - elif com.is_datetimetz(dtype): + elif is_datetimetz(dtype): upcast_cls = 'datetimetz' elif issubclass(dtype.type, np.bool_): upcast_cls = 'bool' @@ -5062,8 +5081,8 @@ def dtype(self): if not self.needs_filling: return self.block.dtype else: - return com._get_dtype(com._maybe_promote(self.block.dtype, - self.block.fill_value)[0]) + return _get_dtype(_maybe_promote(self.block.dtype, + self.block.fill_value)[0]) return self._dtype diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 911fcaf529f98..b847415f274db 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -5,10 +5,15 @@ import numpy as np from distutils.version import LooseVersion -import pandas.core.common as com import pandas.algos as algos import pandas.lib as lib from pandas.compat import range, string_types +from pandas.types.common import (is_numeric_v_string_like, + is_float_dtype, is_datetime64_dtype, + is_integer_dtype, _ensure_float64, + is_scalar, + _DATELIKE_DTYPES) +from pandas.types.missing import isnull def mask_missing(arr, values_to_mask): @@ -24,7 +29,7 @@ def mask_missing(arr, values_to_mask): except Exception: values_to_mask = np.array(values_to_mask, dtype=object) - na_mask = com.isnull(values_to_mask) + na_mask = isnull(values_to_mask) nonna = values_to_mask[~na_mask] mask = None @@ -32,28 +37,28 @@ def mask_missing(arr, values_to_mask): if mask is None: # numpy elementwise comparison warning - if com.is_numeric_v_string_like(arr, x): + if is_numeric_v_string_like(arr, x): mask = False else: mask = arr == x # if x is a string and arr is not, then we get False and we must # expand the mask to size arr.shape - if lib.isscalar(mask): + if is_scalar(mask): mask = np.zeros(arr.shape, dtype=bool) else: # numpy elementwise comparison warning - if com.is_numeric_v_string_like(arr, x): + if is_numeric_v_string_like(arr, x): mask |= False else: mask |= arr == x if na_mask.any(): if mask is None: - mask = com.isnull(arr) + mask = isnull(arr) else: - mask |= com.isnull(arr) + mask |= isnull(arr) return mask @@ -110,7 +115,7 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, """ # Treat the original, non-scipy methods first. - invalid = com.isnull(yvalues) + invalid = isnull(yvalues) valid = ~invalid if not valid.any(): @@ -442,12 +447,12 @@ def pad_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _pad_1d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.pad_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_inplace_object @@ -456,7 +461,7 @@ def pad_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values @@ -467,12 +472,12 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _backfill_1d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.backfill_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_inplace_object @@ -481,7 +486,7 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) @@ -493,12 +498,12 @@ def pad_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _pad_2d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.pad_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.pad_2d_inplace_object @@ -507,7 +512,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): @@ -523,12 +528,12 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None - if com.is_float_dtype(values): + if is_float_dtype(values): _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) - elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): + elif dtype in _DATELIKE_DTYPES or is_datetime64_dtype(values): _method = _backfill_2d_datetime - elif com.is_integer_dtype(values): - values = com._ensure_float64(values) + elif is_integer_dtype(values): + values = _ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object @@ -537,7 +542,7 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: - mask = com.isnull(values) + mask = isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): @@ -570,22 +575,22 @@ def fill_zeros(result, x, y, name, fill): mask the nan's from x """ - if fill is None or com.is_float_dtype(result): + if fill is None or is_float_dtype(result): return result if name.startswith(('r', '__r')): x, y = y, x - is_typed_variable = (hasattr(y, 'dtype') or hasattr(y, 'type')) - is_scalar = lib.isscalar(y) + is_variable_type = (hasattr(y, 'dtype') or hasattr(y, 'type')) + is_scalar_type = is_scalar(y) - if not is_typed_variable and not is_scalar: + if not is_variable_type and not is_scalar_type: return result - if is_scalar: + if is_scalar_type: y = np.array(y) - if com.is_integer_dtype(y): + if is_integer_dtype(y): if (y == 0).any(): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index f390e3f04a6c3..7b89373dda7ba 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,16 +11,19 @@ import pandas.hashtable as _hash from pandas import compat, lib, algos, tslib -from pandas.core.common import (isnull, notnull, _values_from_object, - _maybe_upcast_putmask, _ensure_float64, - _ensure_int64, _ensure_object, is_float, - is_integer, is_complex, is_float_dtype, - is_complex_dtype, is_integer_dtype, - is_bool_dtype, is_object_dtype, - is_datetime64_dtype, is_timedelta64_dtype, - is_datetime_or_timedelta_dtype, _get_dtype, - is_int_or_datetime_dtype, is_any_int_dtype, - _int64_max) +from pandas.types.common import (_ensure_int64, _ensure_object, + _ensure_float64, _get_dtype, + is_float, is_scalar, + is_integer, is_complex, is_float_dtype, + is_complex_dtype, is_integer_dtype, + is_bool_dtype, is_object_dtype, + is_datetime64_dtype, is_timedelta64_dtype, + is_datetime_or_timedelta_dtype, + is_int_or_datetime_dtype, is_any_int_dtype) +from pandas.types.cast import _int64_max, _maybe_upcast_putmask +from pandas.types.missing import isnull, notnull + +from pandas.core.common import _values_from_object class disallow(object): @@ -351,7 +354,7 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float): d = count - dtype.type(ddof) # always return NaN, never inf - if lib.isscalar(count): + if is_scalar(count): if count <= ddof: count = np.nan d = np.nan @@ -623,7 +626,7 @@ def _get_counts(mask, axis, dtype=float): return dtype.type(mask.size - mask.sum()) count = mask.shape[axis] - mask.sum(axis) - if lib.isscalar(count): + if is_scalar(count): return dtype.type(count) try: return count.astype(dtype) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 3aaca1eea486e..d76f011df3dd8 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -13,21 +13,25 @@ from pandas import compat, lib, tslib import pandas.index as _index from pandas.util.decorators import Appender -import pandas.core.common as com import pandas.computation.expressions as expressions from pandas.lib import isscalar from pandas.tslib import iNaT from pandas.compat import bind_method import pandas.core.missing as missing import pandas.algos as _algos -from pandas.core.common import (is_list_like, notnull, isnull, - _values_from_object, _maybe_match_name, - needs_i8_conversion, is_datetimelike_v_numeric, - is_integer_dtype, is_categorical_dtype, - is_object_dtype, is_timedelta64_dtype, - is_datetime64_dtype, is_datetime64tz_dtype, - is_bool_dtype, PerformanceWarning, - ABCSeries, ABCIndex) +from pandas.core.common import (_values_from_object, _maybe_match_name, + PerformanceWarning) +from pandas.types.missing import notnull, isnull +from pandas.types.common import (needs_i8_conversion, + is_datetimelike_v_numeric, + is_integer_dtype, is_categorical_dtype, + is_object_dtype, is_timedelta64_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_bool_dtype, is_datetimetz, + is_list_like, + _ensure_object) +from pandas.types.cast import _maybe_upcast_putmask +from pandas.types.generic import ABCSeries, ABCIndex # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory @@ -446,7 +450,7 @@ def _convert_to_array(self, values, name=None, other=None): supplied_dtype = values.dtype inferred_type = supplied_dtype or lib.infer_dtype(values) if (inferred_type in ('datetime64', 'datetime', 'date', 'time') or - com.is_datetimetz(inferred_type)): + is_datetimetz(inferred_type)): # if we have a other of timedelta, but use pd.NaT here we # we are in the wrong path if (supplied_dtype is None and other is not None and @@ -463,7 +467,7 @@ def _convert_to_array(self, values, name=None, other=None): hasattr(ovalues, 'tz')): values = pd.DatetimeIndex(values) # datetime array with tz - elif com.is_datetimetz(values): + elif is_datetimetz(values): if isinstance(values, ABCSeries): values = values._values elif not (isinstance(values, (np.ndarray, ABCSeries)) and @@ -625,7 +629,7 @@ def na_op(x, y): "{op}".format(typ=type(x).__name__, op=str_rep)) - result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result @@ -820,8 +824,8 @@ def na_op(x, y): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: - x = com._ensure_object(x) - y = com._ensure_object(y) + x = _ensure_object(x) + y = _ensure_object(y) result = lib.vec_binop(x, y, op) else: try: @@ -1095,7 +1099,7 @@ def na_op(x, y): "objects of type {x} and {y}".format( op=name, x=type(x), y=type(y))) - result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, name, fill_zeros) @@ -1220,7 +1224,7 @@ def na_op(x, y): result = np.empty(len(x), dtype=x.dtype) mask = notnull(x) result[mask] = op(x[mask], y) - result, changed = com._maybe_upcast_putmask(result, ~mask, np.nan) + result, changed = _maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, name, fill_zeros) return result diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 7d0bedcc2b381..4d61563cccce5 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -8,17 +8,21 @@ import numpy as np +from pandas.types.cast import (_infer_dtype_from_scalar, + _possibly_cast_item) +from pandas.types.common import (is_integer, is_list_like, + is_string_like, is_scalar) +from pandas.types.missing import notnull + import pandas.computation.expressions as expressions import pandas.core.common as com import pandas.core.ops as ops import pandas.core.missing as missing from pandas import compat -from pandas import lib from pandas.compat import (map, zip, range, u, OrderedDict, OrderedDefaultdict) from pandas.compat.numpy import function as nv from pandas.core.categorical import Categorical -from pandas.core.common import (PandasError, _try_sort, _default_index, - _infer_dtype_from_scalar, is_list_like) +from pandas.core.common import PandasError, _try_sort, _default_index from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, @@ -168,7 +172,7 @@ def _init_data(self, data, copy, dtype, **kwargs): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None - elif lib.isscalar(data) and all(x is not None for x in passed_axes): + elif is_scalar(data) and all(x is not None for x in passed_axes): if dtype is None: dtype, data = _infer_dtype_from_scalar(data) values = np.empty([len(x) for x in passed_axes], dtype=dtype) @@ -552,7 +556,7 @@ def set_value(self, *args, **kwargs): made_bigger = not np.array_equal(axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: - com._possibly_cast_item(result, args[0], likely_dtype) + _possibly_cast_item(result, args[0], likely_dtype) return result.set_value(*args) @@ -582,7 +586,7 @@ def __setitem__(self, key, value): 'object was {1}'.format( shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) - elif lib.isscalar(value): + elif is_scalar(value): dtype, value = _infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) @@ -653,7 +657,7 @@ def round(self, decimals=0, *args, **kwargs): """ nv.validate_round(args, kwargs) - if com.is_integer(decimals): + if is_integer(decimals): result = np.apply_along_axis(np.round, 0, self.values) return self._wrap_result(result, axis=0) raise TypeError("decimals must be an integer") @@ -687,7 +691,7 @@ def dropna(self, axis=0, how='any', inplace=False): axis = self._get_axis_number(axis) values = self.values - mask = com.notnull(values) + mask = notnull(values) for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) @@ -711,7 +715,7 @@ def _combine(self, other, func, axis=0): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) - elif lib.isscalar(other): + elif is_scalar(other): return self._combine_const(other, func) else: raise NotImplementedError("%s is not supported in combine " @@ -924,7 +928,7 @@ def to_frame(self, filter_observations=True): if filter_observations: # shaped like the return DataFrame - mask = com.notnull(self.values).all(axis=0) + mask = notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: @@ -1218,7 +1222,7 @@ def transpose(self, *args, **kwargs): # check if a list of axes was passed in instead as a # single *args element if (len(args) == 1 and hasattr(args[0], '__iter__') and - not com.is_string_like(args[0])): + not is_string_like(args[0])): axes = args[0] else: axes = args diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 8d237016d1b33..4f601a2d377a6 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -6,6 +6,11 @@ import numpy as np +from pandas.types.common import _ensure_platform_int, is_list_like +from pandas.types.cast import _maybe_promote +from pandas.types.missing import notnull +import pandas.types.concat as _concat + from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -14,11 +19,8 @@ from pandas._sparse import IntIndex from pandas.core.categorical import Categorical -from pandas.core.common import notnull, _ensure_platform_int, _maybe_promote from pandas.core.groupby import get_group_index, _compress_group_index -import pandas.core.common as com -import pandas.types.concat as _concat import pandas.core.algorithms as algos import pandas.algos as _algos @@ -1063,7 +1065,7 @@ def check_len(item, name): length_msg = ("Length of '{0}' ({1}) did not match the length of " "the columns being encoded ({2}).") - if com.is_list_like(item): + if is_list_like(item): if not len(item) == len(columns_to_encode): raise ValueError(length_msg.format(name, len(item), len(columns_to_encode))) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8015670212181..2c7f298dde2ec 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,18 +13,33 @@ import numpy as np import numpy.ma as ma -from pandas.core.common import (isnull, notnull, is_bool_indexer, - _default_index, _maybe_upcast, - _asarray_tuplesafe, _infer_dtype_from_scalar, - is_list_like, _values_from_object, - is_categorical_dtype, - _possibly_cast_to_datetime, - _possibly_castable, _possibly_convert_platform, - _try_sort, is_extension_type, is_datetimetz, - _maybe_match_name, ABCSparseArray, - _coerce_to_dtype, SettingWithCopyError, - _maybe_box_datetimelike, ABCDataFrame, - _dict_compat, is_integer) +from pandas.types.common import (_coerce_to_dtype, is_categorical_dtype, + is_integer, is_integer_dtype, + is_float_dtype, + is_extension_type, is_datetimetz, + is_datetimelike, + is_timedelta64_dtype, + is_list_like, + is_hashable, + is_iterator, + is_dict_like, + is_scalar, + _ensure_platform_int) +from pandas.types.generic import ABCSparseArray, ABCDataFrame +from pandas.types.cast import (_maybe_upcast, _infer_dtype_from_scalar, + _possibly_convert_platform, + _possibly_cast_to_datetime, _possibly_castable) +from pandas.types.missing import isnull, notnull + +from pandas.core.common import (is_bool_indexer, + _default_index, + _asarray_tuplesafe, + _values_from_object, + _try_sort, + _maybe_match_name, + SettingWithCopyError, + _maybe_box_datetimelike, + _dict_compat) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, Float64Index, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -303,7 +318,7 @@ def name(self): @name.setter def name(self, value): - if value is not None and not com.is_hashable(value): + if value is not None and not is_hashable(value): raise TypeError('Series.name must be a hashable type') object.__setattr__(self, '_name', value) @@ -580,7 +595,7 @@ def __getitem__(self, key): try: result = self.index.get_value(self, key) - if not lib.isscalar(result): + if not is_scalar(result): if is_list_like(result) and not isinstance(result, Series): # we need to box if we have a non-unique index here @@ -613,10 +628,10 @@ def __getitem__(self, key): except Exception: raise - if com.is_iterator(key): + if is_iterator(key): key = list(key) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) return self._get_with(key) @@ -710,9 +725,9 @@ def setitem(key, value): elif key is Ellipsis: self[:] = value return - elif is_bool_indexer(key): + elif com.is_bool_indexer(key): pass - elif com.is_timedelta64_dtype(self.dtype): + elif is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT if isnull(value): value = tslib.iNaT @@ -736,7 +751,7 @@ def setitem(key, value): if 'unorderable' in str(e): # pragma: no cover raise IndexError(key) - if is_bool_indexer(key): + if com.is_bool_indexer(key): key = check_bool_indexer(self.index, key) try: self._where(~key, value, inplace=True) @@ -1060,7 +1075,7 @@ def _get_repr(self, name=False, header=True, index=True, length=True, def __iter__(self): """ provide iteration over the values of the Series box values if necessary """ - if com.is_datetimelike(self): + if is_datetimelike(self): return (_maybe_box_datetimelike(x) for x in self._values) else: return iter(self._values) @@ -1349,7 +1364,7 @@ def quantile(self, q=0.5, interpolation='linear'): result = self._data.quantile(qs=q, interpolation=interpolation) - if com.is_list_like(q): + if is_list_like(q): return self._constructor(result, index=Float64Index(q), name=self.name) @@ -1481,7 +1496,7 @@ def dot(self, other): @Appender(base._shared_docs['searchsorted']) def searchsorted(self, v, side='left', sorter=None): if sorter is not None: - sorter = com._ensure_platform_int(sorter) + sorter = _ensure_platform_int(sorter) return self._values.searchsorted(Series(v)._values, side=side, sorter=sorter) @@ -1727,7 +1742,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(index, MultiIndex): from pandas.core.groupby import _lexsort_indexer indexer = _lexsort_indexer(index.labels, orders=ascending) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) new_index = index.take(indexer) else: new_index, indexer = index.sort_values(return_indexer=True, @@ -2265,8 +2280,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, @Appender(generic._shared_docs['rename'] % _shared_doc_kwargs) def rename(self, index=None, **kwargs): - non_mapping = lib.isscalar(index) or (com.is_list_like(index) and - not com.is_dict_like(index)) + non_mapping = is_scalar(index) or (is_list_like(index) and + not is_dict_like(index)) if non_mapping: return self._set_name(index, inplace=kwargs.get('inplace')) return super(Series, self).rename(index=index, **kwargs) @@ -2345,7 +2360,7 @@ def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs): if convert: indices = maybe_convert_indices(indices, len(self._get_axis(axis))) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) new_index = self.index.take(indices) new_values = self._values.take(indices) return self._constructor(new_values, @@ -2771,7 +2786,7 @@ def _try_cast(arr, take_fast_path): subarr = np.array(data, copy=False) # possibility of nan -> garbage - if com.is_float_dtype(data.dtype) and com.is_integer_dtype(dtype): + if is_float_dtype(data.dtype) and is_integer_dtype(dtype): if not isnull(data).any(): subarr = _try_cast(data, True) elif copy: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a3f687b7fd73c..6ec28f9735850 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,14 +1,19 @@ import numpy as np from pandas.compat import zip -from pandas.core.common import (isnull, notnull, _values_from_object, - is_bool_dtype, - is_list_like, is_categorical_dtype, - is_object_dtype, is_string_like) +from pandas.types.generic import ABCSeries, ABCIndex +from pandas.types.missing import isnull, notnull +from pandas.types.common import (is_bool_dtype, + is_categorical_dtype, + is_object_dtype, + is_string_like, + is_list_like, + is_scalar) +from pandas.core.common import _values_from_object + from pandas.core.algorithms import take_1d import pandas.compat as compat from pandas.core.base import AccessorProperty, NoNewAttributesMixin -from pandas.types import api as gt from pandas.util.decorators import Appender, deprecate_kwarg import re import pandas.lib as lib @@ -152,7 +157,7 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): if not len(arr): return np.ndarray(0, dtype=dtype) - if isinstance(arr, gt.ABCSeries): + if isinstance(arr, ABCSeries): arr = arr.values if not isinstance(arr, np.ndarray): arr = np.asarray(arr, dtype=object) @@ -343,7 +348,7 @@ def str_repeat(arr, repeats): ------- repeated : Series/Index of objects """ - if lib.isscalar(repeats): + if is_scalar(repeats): def rep(x): try: @@ -696,7 +701,7 @@ def str_extractall(arr, pat, flags=0): if regex.groups == 0: raise ValueError("pattern contains no capture groups") - if isinstance(arr, gt.ABCIndex): + if isinstance(arr, ABCIndex): arr = arr.to_series().reset_index(drop=True) names = dict(zip(regex.groupindex.values(), regex.groupindex.keys())) @@ -1538,7 +1543,7 @@ def rjust(self, width, fillchar=' '): return self.pad(width, side='left', fillchar=fillchar) def zfill(self, width): - """" + """ Filling left side of strings in the Series/Index with 0. Equivalent to :meth:`str.zfill`. @@ -1820,7 +1825,7 @@ class StringAccessorMixin(object): def _make_str_accessor(self): from pandas.core.index import Index - if (isinstance(self, gt.ABCSeries) and + if (isinstance(self, ABCSeries) and not ((is_categorical_dtype(self.dtype) and is_object_dtype(self.values.categories)) or (is_object_dtype(self.dtype)))): diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e34d18fe3e54..bc4d34529287b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -11,6 +11,15 @@ import numpy as np from collections import defaultdict +from pandas.types.generic import ABCSeries, ABCDataFrame +from pandas.types.common import (is_integer, + is_bool, + is_float_dtype, + is_integer_dtype, + needs_i8_conversion, + is_timedelta64_dtype, + is_list_like, + _ensure_float64) import pandas as pd from pandas.lib import isscalar from pandas.core.base import (PandasObject, SelectionMixin, @@ -64,10 +73,10 @@ def _constructor(self): return Window def validate(self): - if self.center is not None and not com.is_bool(self.center): + if self.center is not None and not is_bool(self.center): raise ValueError("center must be a boolean") if self.min_periods is not None and not \ - com.is_integer(self.min_periods): + is_integer(self.min_periods): raise ValueError("min_periods must be an integer") def _convert_freq(self, how=None): @@ -75,7 +84,7 @@ def _convert_freq(self, how=None): obj = self._selected_obj if (self.freq is not None and - isinstance(obj, (com.ABCSeries, com.ABCDataFrame))): + isinstance(obj, (ABCSeries, ABCDataFrame))): if how is not None: warnings.warn("The how kw argument is deprecated and removed " "in a future version. You can resample prior " @@ -111,7 +120,7 @@ def _gotitem(self, key, ndim, subset=None): self = self._shallow_copy(subset) self._reset_cache() if subset.ndim == 2: - if isscalar(key) and key in subset or com.is_list_like(key): + if isscalar(key) and key in subset or is_list_like(key): self._selection = key return self @@ -150,11 +159,11 @@ def _prep_values(self, values=None, kill_inf=True, how=None): # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 - if com.is_float_dtype(values.dtype): - values = com._ensure_float64(values) - elif com.is_integer_dtype(values.dtype): - values = com._ensure_float64(values) - elif com.needs_i8_conversion(values.dtype): + if is_float_dtype(values.dtype): + values = _ensure_float64(values) + elif is_integer_dtype(values.dtype): + values = _ensure_float64(values) + elif needs_i8_conversion(values.dtype): raise NotImplementedError("ops for {action} for this " "dtype {dtype} are not " "implemented".format( @@ -162,7 +171,7 @@ def _prep_values(self, values=None, kill_inf=True, how=None): dtype=values.dtype)) else: try: - values = com._ensure_float64(values) + values = _ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) @@ -184,7 +193,7 @@ def _wrap_result(self, result, block=None, obj=None): # coerce if necessary if block is not None: - if com.is_timedelta64_dtype(block.values.dtype): + if is_timedelta64_dtype(block.values.dtype): result = pd.to_timedelta( result.ravel(), unit='ns').values.reshape(result.shape) @@ -345,7 +354,7 @@ def _prep_window(self, **kwargs): window = self._get_window() if isinstance(window, (list, tuple, np.ndarray)): return com._asarray_tuplesafe(window).astype(float) - elif com.is_integer(window): + elif is_integer(window): import scipy.signal as sig # the below may pop from kwargs @@ -543,7 +552,7 @@ def _apply(self, func, name=None, window=None, center=None, def func(arg, window, min_periods=None): minp = check_minp(min_periods, window) # GH #12373: rolling functions error on float32 data - return cfunc(com._ensure_float64(arg), + return cfunc(_ensure_float64(arg), window, minp, **kwargs) # calculation function @@ -586,7 +595,7 @@ def count(self): results = [] for b in blocks: - if com.needs_i8_conversion(b.values): + if needs_i8_conversion(b.values): result = b.notnull().astype(int) else: try: @@ -850,7 +859,7 @@ class Rolling(_Rolling_and_Expanding): def validate(self): super(Rolling, self).validate() - if not com.is_integer(self.window): + if not is_integer(self.window): raise ValueError("window must be an integer") elif self.window < 0: raise ValueError("window must be non-negative") @@ -1484,7 +1493,7 @@ def _get_center_of_mass(com, span, halflife, alpha): def _offset(window, center): - if not com.is_integer(window): + if not is_integer(window): window = len(window) offset = (window - 1) / 2. if center else 0 try: diff --git a/pandas/formats/format.py b/pandas/formats/format.py index cc46ed57aeff0..436a9d5d5d4c8 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -10,8 +10,19 @@ import sys +from pandas.types.missing import isnull, notnull +from pandas.types.common import (is_categorical_dtype, + is_float_dtype, + is_period_arraylike, + is_integer_dtype, + is_datetimetz, + is_integer, + is_float, + is_numeric_dtype, + is_datetime64_dtype, + is_timedelta64_dtype) + from pandas.core.base import PandasObject -from pandas.core.common import isnull, notnull, is_numeric_dtype from pandas.core.index import Index, MultiIndex, _ensure_index from pandas import compat from pandas.compat import (StringIO, lzip, range, map, zip, reduce, u, @@ -194,7 +205,7 @@ def _get_footer(self): # level infos are added to the end and in a new line, like it is done # for Categoricals - if com.is_categorical_dtype(self.tr_series.dtype): + if is_categorical_dtype(self.tr_series.dtype): level_info = self.tr_series._values._repr_categories_info() if footer: footer += "\n" @@ -316,12 +327,12 @@ def should_show_dimensions(self): def _get_formatter(self, i): if isinstance(self.formatters, (list, tuple)): - if com.is_integer(i): + if is_integer(i): return self.formatters[i] else: return None else: - if com.is_integer(i) and i not in self.columns: + if is_integer(i) and i not in self.columns: i = self.columns[i] return self.formatters.get(i, None) @@ -1646,7 +1657,7 @@ def __init__(self, df, na_rep='', float_format=None, cols=None, def _format_value(self, val): if lib.checknull(val): val = self.na_rep - elif com.is_float(val): + elif is_float(val): if lib.isposinf_scalar(val): val = self.inf_rep elif lib.isneginf_scalar(val): @@ -1867,19 +1878,19 @@ def get_formatted_cells(self): def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right', decimal='.'): - if com.is_categorical_dtype(values): + if is_categorical_dtype(values): fmt_klass = CategoricalArrayFormatter - elif com.is_float_dtype(values.dtype): + elif is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter - elif com.is_period_arraylike(values): + elif is_period_arraylike(values): fmt_klass = PeriodArrayFormatter - elif com.is_integer_dtype(values.dtype): + elif is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter - elif com.is_datetimetz(values): + elif is_datetimetz(values): fmt_klass = Datetime64TZFormatter - elif com.is_datetime64_dtype(values.dtype): + elif is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter - elif com.is_timedelta64_dtype(values.dtype): + elif is_timedelta64_dtype(values.dtype): fmt_klass = Timedelta64Formatter else: fmt_klass = GenericArrayFormatter @@ -1949,14 +1960,14 @@ def _format(x): if isinstance(vals, Index): vals = vals._values - is_float = lib.map_infer(vals, com.is_float) & notnull(vals) - leading_space = is_float.any() + is_float_type = lib.map_infer(vals, is_float) & notnull(vals) + leading_space = is_float_type.any() fmt_values = [] for i, v in enumerate(vals): - if not is_float[i] and leading_space: + if not is_float_type[i] and leading_space: fmt_values.append(' %s' % _format(v)) - elif is_float[i]: + elif is_float_type[i]: fmt_values.append(float_format(v)) else: fmt_values.append(' %s' % _format(v)) diff --git a/pandas/formats/printing.py b/pandas/formats/printing.py index a4eaec8d5334b..37bd4b63d6f7a 100644 --- a/pandas/formats/printing.py +++ b/pandas/formats/printing.py @@ -2,9 +2,9 @@ printing tools """ +from pandas.types.inference import is_sequence from pandas import compat from pandas.compat import u -import pandas.core.common as com from pandas.core.config import get_option @@ -213,7 +213,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) - elif (com.is_sequence(thing) and + elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, diff --git a/pandas/formats/style.py b/pandas/formats/style.py index 477ecccc03f4f..472fd958d35eb 100644 --- a/pandas/formats/style.py +++ b/pandas/formats/style.py @@ -17,10 +17,11 @@ "or `pip install Jinja2`" raise ImportError(msg) +from pandas.types.common import is_float, is_string_like + import numpy as np import pandas as pd from pandas.compat import lzip, range -import pandas.core.common as com from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice try: import matplotlib.pyplot as plt @@ -153,7 +154,7 @@ def __init__(self, data, precision=None, table_styles=None, uuid=None, # display_funcs maps (row, col) -> formatting function def default_display_func(x): - if com.is_float(x): + if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x @@ -893,7 +894,7 @@ def _highlight_extrema(data, color='yellow', max_=True): def _maybe_wrap_formatter(formatter): - if com.is_string_like(formatter): + if is_string_like(formatter): return lambda x: formatter.format(x) elif callable(formatter): return formatter diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 0bb80be013275..5c9938c932da2 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -12,6 +12,28 @@ from pandas.compat import range, u from pandas.compat.numpy import function as nv from pandas import compat + + +from pandas.types.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex +from pandas.types.missing import isnull, array_equivalent +from pandas.types.common import (_ensure_int64, _ensure_object, + _ensure_platform_int, + is_datetimetz, + is_integer, + is_float, + is_dtype_equal, + is_object_dtype, + is_categorical_dtype, + is_bool_dtype, + is_integer_dtype, is_float_dtype, + needs_i8_conversion, + is_iterator, is_list_like, + is_scalar) +from pandas.types.cast import _coerce_indexer_dtype +from pandas.core.common import (is_bool_indexer, + _values_from_object, + _asarray_tuplesafe) + from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin) import pandas.core.base as base @@ -22,15 +44,6 @@ import pandas.core.missing as missing import pandas.core.algorithms as algos from pandas.formats.printing import pprint_thing -from pandas.core.common import (isnull, array_equivalent, - is_object_dtype, is_datetimetz, ABCSeries, - ABCPeriodIndex, ABCMultiIndex, - _values_from_object, is_float, is_integer, - is_iterator, is_categorical_dtype, - _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, - is_integer_dtype, is_float_dtype, - needs_i8_conversion) from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin @@ -223,7 +236,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): subarr = data.astype('object') else: - subarr = com._asarray_tuplesafe(data, dtype=object) + subarr = _asarray_tuplesafe(data, dtype=object) # _asarray_tuplesafe does not always copy underlying data, # so need to make sure that this happens @@ -264,7 +277,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif hasattr(data, '__array__'): return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) - elif data is None or lib.isscalar(data): + elif data is None or is_scalar(data): cls._scalar_data_error(data) else: if (tupleize_cols and isinstance(data, list) and data and @@ -284,7 +297,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # python2 - MultiIndex fails on mixed types pass # other iterable of some kind - subarr = com._asarray_tuplesafe(data, dtype=object) + subarr = _asarray_tuplesafe(data, dtype=object) return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) """ @@ -539,7 +552,7 @@ def _coerce_to_ndarray(cls, data): """ if not isinstance(data, (np.ndarray, Index)): - if data is None or lib.isscalar(data): + if data is None or is_scalar(data): cls._scalar_data_error(data) # other iterable of some kind @@ -841,7 +854,7 @@ def to_datetime(self, dayfirst=False): return DatetimeIndex(self.values) def _assert_can_do_setop(self, other): - if not com.is_list_like(other): + if not is_list_like(other): raise TypeError('Input must be Index or array-like') return True @@ -1325,7 +1338,7 @@ def __getitem__(self, key): getitem = self._data.__getitem__ promote = self._shallow_copy - if lib.isscalar(key): + if is_scalar(key): return getitem(key) if isinstance(key, slice): @@ -1338,7 +1351,7 @@ def __getitem__(self, key): key = _values_from_object(key) result = getitem(key) - if not lib.isscalar(result): + if not is_scalar(result): return promote(result) else: return result @@ -1426,7 +1439,7 @@ def _ensure_compat_concat(indexes): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) if self._can_hold_na: taken = self._assert_take_fillable(self.values, indices, allow_fill=allow_fill, @@ -1442,7 +1455,7 @@ def take(self, indices, axis=0, allow_fill=True, def _assert_take_fillable(self, values, indices, allow_fill=True, fill_value=None, na_value=np.nan): """ Internal method to handle NA filling of take """ - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) # only fill if we are passing a non-None fill_value if allow_fill and fill_value is not None: @@ -1491,7 +1504,7 @@ def _convert_for_op(self, value): def _assert_can_do_op(self, value): """ Check value is valid for scalar op """ - if not lib.isscalar(value): + if not is_scalar(value): msg = "'value' must be a scalar, passed: {0}" raise TypeError(msg.format(type(value).__name__)) @@ -1706,7 +1719,7 @@ def argsort(self, *args, **kwargs): return result.argsort(*args, **kwargs) def __add__(self, other): - if com.is_list_like(other): + if is_list_like(other): warnings.warn("using '+' to provide set union with Indexes is " "deprecated, use '|' or .union()", FutureWarning, stacklevel=2) @@ -1783,7 +1796,7 @@ def union(self, other): if len(self) == 0: return other._get_consensus_name(self) - if not com.is_dtype_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.union(other) @@ -1866,7 +1879,7 @@ def intersection(self, other): if self.equals(other): return self._get_consensus_name(other) - if not com.is_dtype_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.intersection(other) @@ -2028,7 +2041,7 @@ def get_value(self, series, key): # if we have something that is Index-like, then # use this, e.g. DatetimeIndex s = getattr(series, '_values', None) - if isinstance(s, Index) and lib.isscalar(key): + if isinstance(s, Index) and is_scalar(key): try: return s[key] except (IndexError, ValueError): @@ -2061,7 +2074,7 @@ def get_value(self, series, key): raise e1 except TypeError: # python 3 - if lib.isscalar(key): # pragma: no cover + if is_scalar(key): # pragma: no cover raise IndexError(key) raise InvalidIndexError(key) @@ -2137,7 +2150,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): return pself.get_indexer(ptarget, method=method, limit=limit, tolerance=tolerance) - if not com.is_dtype_equal(self.dtype, target.dtype): + if not is_dtype_equal(self.dtype, target.dtype): this = self.astype(object) target = target.astype(object) return this.get_indexer(target, method=method, limit=limit, @@ -2161,7 +2174,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): indexer = self._engine.get_indexer(target._values) - return com._ensure_platform_int(indexer) + return _ensure_platform_int(indexer) def _convert_tolerance(self, tolerance): # override this method on subclasses @@ -2443,7 +2456,7 @@ def _reindex_non_unique(self, target): if len(missing): l = np.arange(len(indexer)) - missing = com._ensure_platform_int(missing) + missing = _ensure_platform_int(missing) missing_labels = target.take(missing) missing_indexer = _ensure_int64(l[~check]) cur_labels = self.take(indexer[check])._values @@ -2541,7 +2554,7 @@ def join(self, other, how='left', level=None, return_indexers=False): result = x, z, y return result - if not com.is_dtype_equal(self.dtype, other.dtype): + if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') other = other.astype('O') return this.join(other, how=how, return_indexers=return_indexers) @@ -2637,8 +2650,8 @@ def _join_non_unique(self, other, how='left', return_indexers=False): [other._values], how=how, sort=True) - left_idx = com._ensure_platform_int(left_idx) - right_idx = com._ensure_platform_int(right_idx) + left_idx = _ensure_platform_int(left_idx) + right_idx = _ensure_platform_int(right_idx) join_index = self.values.take(left_idx) mask = left_idx == -1 @@ -2850,9 +2863,9 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): kind=kind) # return a slice - if not lib.isscalar(start_slice): + if not is_scalar(start_slice): raise AssertionError("Start slice bound is non-scalar") - if not lib.isscalar(end_slice): + if not is_scalar(end_slice): raise AssertionError("End slice bound is non-scalar") return slice(start_slice, end_slice, step) @@ -3483,7 +3496,7 @@ def _get_na_value(dtype): def _ensure_frozen(array_like, categories, copy=False): - array_like = com._coerce_indexer_dtype(array_like, categories) + array_like = _coerce_indexer_dtype(array_like, categories) array_like = array_like.view(FrozenNDArray) if copy: array_like = array_like.copy() diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 84b8926f4177f..f1d4fe2f26bdd 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -1,15 +1,21 @@ import numpy as np -import pandas.lib as lib import pandas.index as _index from pandas import compat from pandas.compat.numpy import function as nv +from pandas.types.generic import ABCCategorical, ABCSeries +from pandas.types.common import (is_categorical_dtype, + _ensure_platform_int, + is_list_like, + is_scalar) +from pandas.types.missing import array_equivalent + + from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg) from pandas.core.config import get_option from pandas.indexes.base import Index, _index_shared_docs import pandas.core.base as base -import pandas.core.common as com import pandas.core.missing as missing import pandas.indexes.base as ibase @@ -49,7 +55,7 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None, if name is None and hasattr(data, 'name'): name = data.name - if isinstance(data, com.ABCCategorical): + if isinstance(data, ABCCategorical): data = cls._create_categorical(cls, data, categories, ordered) elif isinstance(data, CategoricalIndex): data = data._data @@ -58,7 +64,7 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None, # don't allow scalars # if data is None, then categories must be provided - if lib.isscalar(data): + if is_scalar(data): if data is not None or categories is None: cls._scalar_data_error(data) data = [] @@ -116,7 +122,7 @@ def _create_categorical(self, data, categories=None, ordered=None): ------- Categorical """ - if not isinstance(data, com.ABCCategorical): + if not isinstance(data, ABCCategorical): from pandas.core.categorical import Categorical data = Categorical(data, categories=categories, ordered=ordered) else: @@ -164,7 +170,7 @@ def _is_dtype_compat(self, other): ------ TypeError if the dtypes are not compatible """ - if com.is_categorical_dtype(other): + if is_categorical_dtype(other): if isinstance(other, CategoricalIndex): other = other._values if not other.is_dtype_equal(self): @@ -172,7 +178,7 @@ def _is_dtype_compat(self, other): "when appending") else: values = other - if not com.is_list_like(values): + if not is_list_like(values): values = [values] other = CategoricalIndex(self._create_categorical( self, other, categories=self.categories, ordered=self.ordered)) @@ -191,7 +197,7 @@ def equals(self, other): try: other = self._is_dtype_compat(other) - return com.array_equivalent(self._data, other) + return array_equivalent(self._data, other) except (TypeError, ValueError): pass @@ -360,7 +366,7 @@ def reindex(self, target, method=None, level=None, limit=None, target = ibase._ensure_index(target) - if not com.is_categorical_dtype(target) and not target.is_unique: + if not is_categorical_dtype(target) and not target.is_unique: raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) @@ -388,7 +394,7 @@ def reindex(self, target, method=None, level=None, limit=None, # unless we had an inital Categorical to begin with # in which case we are going to conform to the passed Categorical new_target = np.asarray(new_target) - if com.is_categorical_dtype(target): + if is_categorical_dtype(target): new_target = target._shallow_copy(new_target, name=self.name) else: new_target = Index(new_target, name=self.name) @@ -460,7 +466,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): codes = self.categories.get_indexer(target) indexer, _ = self._engine.get_indexer_non_unique(codes) - return com._ensure_platform_int(indexer) + return _ensure_platform_int(indexer) def get_indexer_non_unique(self, target): """ this is the same for a CategoricalIndex for get_indexer; the API @@ -491,7 +497,7 @@ def _convert_list_indexer(self, keyarr, kind=None): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) taken = self._assert_take_fillable(self.codes, indices, allow_fill=allow_fill, fill_value=fill_value, @@ -591,12 +597,12 @@ def _evaluate_compare(self, other): self, other._values, categories=self.categories, ordered=self.ordered) - if isinstance(other, (com.ABCCategorical, np.ndarray, - com.ABCSeries)): + if isinstance(other, (ABCCategorical, np.ndarray, + ABCSeries)): if len(self.values) != len(other): raise ValueError("Lengths must match to compare") - if isinstance(other, com.ABCCategorical): + if isinstance(other, ABCCategorical): if not self.values.is_dtype_equal(other): raise TypeError("categorical index comparisions must " "have the same categories and ordered " @@ -619,7 +625,7 @@ def _delegate_method(self, name, *args, **kwargs): if 'inplace' in kwargs: raise ValueError("cannot use inplace with CategoricalIndex") res = method(*args, **kwargs) - if lib.isscalar(res): + if is_scalar(res): return res return CategoricalIndex(res, name=self.name) diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 05b2045a4850f..365a971f82a3b 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -13,6 +13,21 @@ from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv from pandas import compat + + +from pandas.types.common import (_ensure_int64, + _ensure_platform_int, + is_object_dtype, + is_iterator, + is_list_like, + is_scalar) +from pandas.types.missing import isnull, array_equivalent +from pandas.core.common import (_values_from_object, + is_bool_indexer, + is_null_slice, + PerformanceWarning) + + from pandas.core.base import FrozenList import pandas.core.base as base from pandas.util.decorators import (Appender, cache_readonly, @@ -21,13 +36,6 @@ import pandas.core.missing as missing import pandas.core.algorithms as algos from pandas.formats.printing import pprint_thing -from pandas.core.common import (isnull, array_equivalent, - is_object_dtype, - _values_from_object, - is_iterator, - _ensure_int64, is_bool_indexer, - is_list_like, is_null_slice, - PerformanceWarning) from pandas.core.config import get_option @@ -798,7 +806,7 @@ def lexsort_depth(self): else: return 0 - int64_labels = [com._ensure_int64(lab) for lab in self.labels] + int64_labels = [_ensure_int64(lab) for lab in self.labels] for k in range(self.nlevels, 0, -1): if lib.is_lexsorted(int64_labels[:k]): return k @@ -984,7 +992,7 @@ def __setstate__(self, state): self._reset_identity() def __getitem__(self, key): - if lib.isscalar(key): + if is_scalar(key): retval = [] for lev, lab in zip(self.levels, self.labels): if lab[key] == -1: @@ -1011,7 +1019,7 @@ def __getitem__(self, key): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) taken = self._assert_take_fillable(self.labels, indices, allow_fill=allow_fill, fill_value=fill_value, @@ -1313,7 +1321,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): if not ascending: indexer = indexer[::-1] - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) new_labels = [lab.take(indexer) for lab in self.labels] new_index = MultiIndex(labels=new_labels, levels=self.levels, @@ -1377,7 +1385,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): else: indexer = self_index._engine.get_indexer(target._values) - return com._ensure_platform_int(indexer) + return _ensure_platform_int(indexer) def reindex(self, target, method=None, level=None, limit=None, tolerance=None): @@ -1759,7 +1767,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # selected from pandas import Series mapper = Series(indexer) - indexer = labels.take(com._ensure_platform_int(indexer)) + indexer = labels.take(_ensure_platform_int(indexer)) result = Series(Index(indexer).isin(r).nonzero()[0]) m = result.map(mapper)._values diff --git a/pandas/indexes/numeric.py b/pandas/indexes/numeric.py index 89fc05fdcc5f5..86d22e141f781 100644 --- a/pandas/indexes/numeric.py +++ b/pandas/indexes/numeric.py @@ -3,13 +3,15 @@ import pandas.algos as _algos import pandas.index as _index +from pandas.types.common import (is_dtype_equal, pandas_dtype, + is_float_dtype, is_object_dtype, + is_integer_dtype, is_scalar) +from pandas.types.missing import array_equivalent, isnull +from pandas.core.common import _values_from_object + from pandas import compat from pandas.indexes.base import Index, InvalidIndexError, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly -import pandas.core.common as com -from pandas.core.common import (is_dtype_equal, isnull, pandas_dtype, - is_float_dtype, is_object_dtype, - is_integer_dtype) import pandas.indexes.base as ibase @@ -164,8 +166,8 @@ def equals(self, other): if self.is_(other): return True - return com.array_equivalent(com._values_from_object(self), - com._values_from_object(other)) + return array_equivalent(_values_from_object(self), + _values_from_object(other)) def _wrap_joined_index(self, joined, other): name = self.name if self.name == other.name else None @@ -287,17 +289,17 @@ def _format_native_types(self, na_rep='', float_format=None, decimal='.', def get_value(self, series, key): """ we always want to get an index value, never a value """ - if not lib.isscalar(key): + if not is_scalar(key): raise InvalidIndexError from pandas.core.indexing import maybe_droplevels from pandas.core.series import Series - k = com._values_from_object(key) + k = _values_from_object(key) loc = self.get_loc(k) - new_values = com._values_from_object(series)[loc] + new_values = _values_from_object(series)[loc] - if lib.isscalar(new_values) or new_values is None: + if is_scalar(new_values) or new_values is None: return new_values new_index = self[loc] diff --git a/pandas/indexes/range.py b/pandas/indexes/range.py index 168143fdea047..f680d2da0161e 100644 --- a/pandas/indexes/range.py +++ b/pandas/indexes/range.py @@ -4,14 +4,16 @@ import numpy as np import pandas.index as _index +from pandas.types.common import (is_integer, + is_scalar, + is_int64_dtype) + from pandas import compat from pandas.compat import lrange, range from pandas.compat.numpy import function as nv from pandas.indexes.base import Index, _index_shared_docs from pandas.util.decorators import Appender, cache_readonly -import pandas.core.common as com import pandas.indexes.base as ibase -import pandas.lib as lib from pandas.indexes.numeric import Int64Index @@ -120,7 +122,7 @@ def _simple_new(cls, start, stop=None, step=None, name=None, result = object.__new__(cls) # handle passed None, non-integers - if start is None or not com.is_integer(start): + if start is None or not is_integer(start): try: return RangeIndex(start, stop, step, name=name, **kwargs) except TypeError: @@ -139,7 +141,7 @@ def _simple_new(cls, start, stop=None, step=None, name=None, @staticmethod def _validate_dtype(dtype): """ require dtype to be None or int64 """ - if not (dtype is None or com.is_int64_dtype(dtype)): + if not (dtype is None or is_int64_dtype(dtype)): raise TypeError('Invalid to pass a non-int64 dtype to RangeIndex') @cache_readonly @@ -448,7 +450,7 @@ def __getitem__(self, key): """ super_getitem = super(RangeIndex, self).__getitem__ - if lib.isscalar(key): + if is_scalar(key): n = int(key) if n != key: return super_getitem(key) @@ -510,7 +512,7 @@ def __getitem__(self, key): return super_getitem(key) def __floordiv__(self, other): - if com.is_integer(other): + if is_integer(other): if (len(self) == 0 or self._start % other == 0 and self._step % other == 0): @@ -560,7 +562,7 @@ def _evaluate_numeric_binop(self, other): # we don't have a representable op # so return a base index - if not com.is_integer(rstep) or not rstep: + if not is_integer(rstep) or not rstep: raise ValueError else: @@ -577,7 +579,7 @@ def _evaluate_numeric_binop(self, other): # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return # as a Float64Index if we have float-like descriptors - if not all([com.is_integer(x) for x in + if not all([is_integer(x) for x in [rstart, rstop, rstep]]): result = result.astype('float64') diff --git a/pandas/io/common.py b/pandas/io/common.py index 76395928eb011..6f9bddd0fdf9b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -11,8 +11,8 @@ from pandas.compat import StringIO, BytesIO, string_types, text_type from pandas import compat from pandas.formats.printing import pprint_thing -from pandas.core.common import is_number, AbstractMethodError - +from pandas.core.common import AbstractMethodError +from pandas.types.common import is_number try: import pathlib diff --git a/pandas/io/data.py b/pandas/io/data.py index 5fa440e7bb1ff..68151fbb091fa 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -19,7 +19,9 @@ ) import pandas.compat as compat from pandas import Panel, DataFrame, Series, read_csv, concat, to_datetime, DatetimeIndex, DateOffset -from pandas.core.common import is_list_like, PandasError + +from pandas.types.common import is_list_like +from pandas.core.common import PandasError from pandas.io.common import urlopen, ZipFile, urlencode from pandas.tseries.offsets import MonthEnd from pandas.util.testing import _network_error_classes diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 775465ea9372d..703cdbeaa7a8f 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -10,6 +10,9 @@ import abc import numpy as np +from pandas.types.common import (is_integer, is_float, + is_bool, is_list_like) + from pandas.core.frame import DataFrame from pandas.io.parsers import TextParser from pandas.io.common import (_is_url, _urlopen, _validate_header_arg, @@ -22,7 +25,6 @@ from pandas.formats.printing import pprint_thing import pandas.compat as compat import pandas.compat.openpyxl_compat as openpyxl_compat -import pandas.core.common as com from warnings import warn from distutils.version import LooseVersion @@ -423,17 +425,17 @@ def _parse_cell(cell_contents, cell_typ): output[asheetname] = DataFrame() continue - if com.is_list_like(header) and len(header) == 1: + if is_list_like(header) and len(header) == 1: header = header[0] # forward fill and pull out names for MultiIndex column header_names = None if header is not None: - if com.is_list_like(header): + if is_list_like(header): header_names = [] control_row = [True for x in data[0]] for row in header: - if com.is_integer(skiprows): + if is_integer(skiprows): row += skiprows data[row], control_row = _fill_mi_header( @@ -444,9 +446,9 @@ def _parse_cell(cell_contents, cell_typ): else: data[header] = _trim_excel_header(data[header]) - if com.is_list_like(index_col): + if is_list_like(index_col): # forward fill values for MultiIndex index - if not com.is_list_like(header): + if not is_list_like(header): offset = 1 + header else: offset = 1 + max(header) @@ -459,7 +461,7 @@ def _parse_cell(cell_contents, cell_typ): else: last = data[row][col] - if com.is_list_like(header) and len(header) > 1: + if is_list_like(header) and len(header) > 1: has_index_names = True # GH 12292 : error when read one empty column from excel file @@ -556,21 +558,21 @@ def _pop_header_name(row, index_col): return none_fill(row[0]), row[1:] else: # pop out header name and fill w/ blank - i = index_col if not com.is_list_like(index_col) else max(index_col) + i = index_col if not is_list_like(index_col) else max(index_col) return none_fill(row[i]), row[:i] + [''] + row[i + 1:] def _conv_value(val): # Convert numpy types to Python types for the Excel writers. - if com.is_integer(val): + if is_integer(val): val = int(val) - elif com.is_float(val): + elif is_float(val): val = float(val) - elif com.is_bool(val): + elif is_bool(val): val = bool(val) elif isinstance(val, Period): val = "%s" % val - elif com.is_list_like(val): + elif is_list_like(val): val = str(val) return val diff --git a/pandas/io/html.py b/pandas/io/html.py index 609642e248eda..e0d84a9617ae4 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -12,12 +12,12 @@ import numpy as np +from pandas.types.common import is_list_like from pandas.io.common import (EmptyDataError, _is_url, urlopen, parse_url, _validate_header_arg) from pandas.io.parsers import TextParser from pandas.compat import (lrange, lmap, u, string_types, iteritems, raise_with_traceback, binary_type) -from pandas.core import common as com from pandas import Series from pandas.core.common import AbstractMethodError from pandas.formats.printing import pprint_thing @@ -107,7 +107,7 @@ def _get_skiprows(skiprows): """ if isinstance(skiprows, slice): return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) - elif isinstance(skiprows, numbers.Integral) or com.is_list_like(skiprows): + elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows): return skiprows elif skiprows is None: return 0 diff --git a/pandas/io/packers.py b/pandas/io/packers.py index ff06a5f212f8b..14e2c9b371296 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -47,6 +47,10 @@ import numpy as np from pandas import compat from pandas.compat import u, u_safe + +from pandas.types.common import (is_categorical_dtype, is_object_dtype, + needs_i8_conversion, pandas_dtype) + from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, @@ -55,9 +59,7 @@ from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex from pandas.core.generic import NDFrame -from pandas.core.common import (PerformanceWarning, - is_categorical_dtype, is_object_dtype, - needs_i8_conversion, pandas_dtype) +from pandas.core.common import PerformanceWarning from pandas.io.common import get_filepath_or_buffer from pandas.core.internals import BlockManager, make_block import pandas.core.internals as internals diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index dc9455289b757..84ea2a92b8026 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2,20 +2,22 @@ Module contains tools for processing files into DataFrames or other objects """ from __future__ import print_function -from pandas.compat import (range, lrange, StringIO, lzip, zip, - string_types, map, OrderedDict) -from pandas import compat from collections import defaultdict import re import csv import warnings +import datetime import numpy as np +from pandas import compat +from pandas.compat import range, lrange, StringIO, lzip, zip, string_types, map +from pandas.types.common import (is_integer, _ensure_object, + is_list_like, is_integer_dtype, + is_float, + is_scalar) from pandas.core.index import Index, MultiIndex from pandas.core.frame import DataFrame -import datetime -import pandas.core.common as com from pandas.core.common import AbstractMethodError from pandas.core.config import get_option from pandas.io.date_converters import generic_parser @@ -326,11 +328,11 @@ def _validate_nrows(nrows): msg = "'nrows' must be an integer" if nrows is not None: - if com.is_float(nrows): + if is_float(nrows): if int(nrows) != nrows: raise ValueError(msg) nrows = int(nrows) - elif not com.is_integer(nrows): + elif not is_integer(nrows): raise ValueError(msg) return nrows @@ -869,7 +871,7 @@ def _clean_options(self, options, engine): # handle skiprows; this is internally handled by the # c-engine, so only need for python parsers if engine != 'c': - if com.is_integer(skiprows): + if is_integer(skiprows): skiprows = lrange(skiprows) skiprows = set() if skiprows is None else set(skiprows) @@ -961,7 +963,7 @@ def _validate_parse_dates_arg(parse_dates): "for the 'parse_dates' parameter") if parse_dates is not None: - if lib.isscalar(parse_dates): + if is_scalar(parse_dates): if not lib.is_bool(parse_dates): raise TypeError(msg) @@ -1021,8 +1023,8 @@ def __init__(self, kwds): is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) if not (is_sequence and - all(map(com.is_integer, self.index_col)) or - com.is_integer(self.index_col)): + all(map(is_integer, self.index_col)) or + is_integer(self.index_col)): raise ValueError("index_col must only contain row numbers " "when specifying a multi-index header") @@ -1047,7 +1049,7 @@ def _should_parse_dates(self, i): name = self.index_names[i] j = self.index_col[i] - if lib.isscalar(self.parse_dates): + if is_scalar(self.parse_dates): return (j == self.parse_dates) or (name == self.parse_dates) else: return (j in self.parse_dates) or (name in self.parse_dates) @@ -1281,7 +1283,7 @@ def _convert_types(self, values, na_values, try_num_bool=True): mask = lib.ismember(values, na_values) na_count = mask.sum() if na_count > 0: - if com.is_integer_dtype(values): + if is_integer_dtype(values): values = values.astype(np.float64) np.putmask(values, mask, np.nan) return values, na_count @@ -1407,10 +1409,10 @@ def _set_noconvert_columns(self): usecols = self.usecols def _set(x): - if usecols and com.is_integer(x): + if usecols and is_integer(x): x = list(usecols)[x] - if not com.is_integer(x): + if not is_integer(x): x = names.index(x) self._reader.set_noconvert(x) @@ -1790,7 +1792,7 @@ def _set_no_thousands_columns(self): noconvert_columns = set() def _set(x): - if com.is_integer(x): + if is_integer(x): noconvert_columns.add(x) else: noconvert_columns.add(self.columns.index(x)) @@ -1954,7 +1956,7 @@ def _convert_data(self, data): def _to_recarray(self, data, columns): dtypes = [] - o = OrderedDict() + o = compat.OrderedDict() # use the columns to "order" the keys # in the unordered 'data' dictionary @@ -2439,7 +2441,7 @@ def converter(*date_cols): try: return tools._to_datetime( - com._ensure_object(strs), + _ensure_object(strs), utc=None, box=False, dayfirst=dayfirst, @@ -2492,7 +2494,7 @@ def _isindex(colspec): if isinstance(parse_spec, list): # list of column lists for colspec in parse_spec: - if lib.isscalar(colspec): + if is_scalar(colspec): if isinstance(colspec, int) and colspec not in data_dict: colspec = orig_names[colspec] if _isindex(colspec): @@ -2569,7 +2571,7 @@ def _clean_na_values(na_values, keep_default_na=True): (k, _floatify_na_values(v)) for k, v in na_values.items() # noqa ]) else: - if not com.is_list_like(na_values): + if not is_list_like(na_values): na_values = [na_values] na_values = _stringify_na_values(na_values) if keep_default_na: @@ -2622,7 +2624,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): if not isinstance(dtype, dict): dtype = defaultdict(lambda: dtype) # Convert column indexes to column names. - dtype = dict((columns[k] if com.is_integer(k) else k, v) + dtype = dict((columns[k] if is_integer(k) else k, v) for k, v in compat.iteritems(dtype)) if index_col is None or index_col is False: diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index c19dae7f3545e..2358c296f782e 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -3,7 +3,7 @@ import numpy as np from numpy.lib.format import read_array, write_array from pandas.compat import BytesIO, cPickle as pkl, pickle_compat as pc, PY3 -import pandas.core.common as com +from pandas.types.common import is_datetime64_dtype, _NS_DTYPE def to_pickle(obj, path): @@ -86,7 +86,7 @@ def _unpickle_array(bytes): # All datetimes should be stored as M8[ns]. When unpickling with # numpy1.6, it will read these as M8[us]. So this ensures all # datetime64 types are read as MS[ns] - if com.is_datetime64_dtype(arr): - arr = arr.view(com._NS_DTYPE) + if is_datetime64_dtype(arr): + arr = arr.view(_NS_DTYPE) return arr diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d4ca717ddbc4e..038ca7ac7775b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -12,11 +12,21 @@ import warnings import os +from pandas.types.common import (is_list_like, + is_categorical_dtype, + is_timedelta64_dtype, + is_datetime64tz_dtype, + is_datetime64_dtype, + _ensure_object, + _ensure_int64, + _ensure_platform_int) +from pandas.types.missing import array_equivalent + import numpy as np import pandas as pd from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index) + MultiIndex, Int64Index, isnull) from pandas.core import config from pandas.io.common import _stringify_path from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel @@ -32,7 +42,6 @@ _block2d_to_blocknd, _factor_indexer, _block_shape) from pandas.core.index import _ensure_index -import pandas.core.common as com from pandas.tools.merge import concat from pandas import compat from pandas.compat import u_safe as u, PY3, range, lrange, string_types, filter @@ -1677,7 +1686,7 @@ def validate_metadata(self, handler): new_metadata = self.metadata cur_metadata = handler.read_metadata(self.cname) if new_metadata is not None and cur_metadata is not None \ - and not com.array_equivalent(new_metadata, cur_metadata): + and not array_equivalent(new_metadata, cur_metadata): raise ValueError("cannot append a categorical with " "different categories to the existing") @@ -2566,7 +2575,7 @@ def write_array(self, key, value, items=None): empty_array = self._is_empty_array(value.shape) transposed = False - if com.is_categorical_dtype(value): + if is_categorical_dtype(value): raise NotImplementedError('Cannot store a category dtype in ' 'a HDF5 dataset that uses format=' '"fixed". Use format="table".') @@ -2621,12 +2630,12 @@ def write_array(self, key, value, items=None): if empty_array: self.write_array_empty(key, value) else: - if com.is_datetime64_dtype(value.dtype): + if is_datetime64_dtype(value.dtype): self._handle.create_array( self.group, key, value.view('i8')) getattr( self.group, key)._v_attrs.value_type = 'datetime64' - elif com.is_datetime64tz_dtype(value.dtype): + elif is_datetime64tz_dtype(value.dtype): # store as UTC # with a zone self._handle.create_array(self.group, key, @@ -2635,7 +2644,7 @@ def write_array(self, key, value, items=None): node = getattr(self.group, key) node._v_attrs.tz = _get_tz(value.tz) node._v_attrs.value_type = 'datetime64' - elif com.is_timedelta64_dtype(value.dtype): + elif is_timedelta64_dtype(value.dtype): self._handle.create_array( self.group, key, value.view('i8')) getattr( @@ -3756,8 +3765,8 @@ def read(self, where=None, columns=None, **kwargs): if len(unique(key)) == len(key): sorter, _ = algos.groupsort_indexer( - com._ensure_int64(key), np.prod(N)) - sorter = com._ensure_platform_int(sorter) + _ensure_int64(key), np.prod(N)) + sorter = _ensure_platform_int(sorter) # create the objs for c in self.values_axes: @@ -3802,7 +3811,7 @@ def read(self, where=None, columns=None, **kwargs): unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) @@ -3903,7 +3912,7 @@ def write_data(self, chunksize, dropna=False): # figure the mask: only do if we can successfully process this # column, otherwise ignore the mask - mask = com.isnull(a.data).all(axis=0) + mask = isnull(a.data).all(axis=0) if isinstance(mask, np.ndarray): masks.append(mask.astype('u1', copy=False)) @@ -4522,7 +4531,7 @@ def _convert_string_array(data, encoding, itemsize=None): # create the sized dtype if itemsize is None: - itemsize = lib.max_len_string_array(com._ensure_object(data.ravel())) + itemsize = lib.max_len_string_array(_ensure_object(data.ravel())) data = np.asarray(data, dtype="S%d" % itemsize) return data @@ -4551,7 +4560,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None): encoding = _ensure_encoding(encoding) if encoding is not None and len(data): - itemsize = lib.max_len_string_array(com._ensure_object(data)) + itemsize = lib.max_len_string_array(_ensure_object(data)) if compat.PY3: dtype = "U{0}".format(itemsize) else: @@ -4619,7 +4628,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs): self.terms = None self.coordinates = None - if com.is_list_like(where): + if is_list_like(where): # see if we have a passed coordinate like try: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 324988360c9fe..8485a3f13f047 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -13,13 +13,15 @@ import numpy as np import pandas.lib as lib -import pandas.core.common as com +from pandas.types.missing import isnull +from pandas.types.dtypes import DatetimeTZDtype +from pandas.types.common import (is_list_like, + is_datetime64tz_dtype) + from pandas.compat import (lzip, map, zip, raise_with_traceback, string_types, text_type) from pandas.core.api import DataFrame, Series -from pandas.core.common import isnull from pandas.core.base import PandasObject -from pandas.types.api import DatetimeTZDtype from pandas.tseries.tools import to_datetime from contextlib import contextmanager @@ -90,7 +92,7 @@ def _handle_date_column(col, format=None): # parse dates as timestamp format = 's' if format is None else format return to_datetime(col, errors='coerce', unit=format, utc=True) - elif com.is_datetime64tz_dtype(col): + elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 return (to_datetime(col, errors='coerce') @@ -123,7 +125,7 @@ def _parse_date_columns(data_frame, parse_dates): # we could in theory do a 'nice' conversion from a FixedOffset tz # GH11216 for col_name, df_col in data_frame.iteritems(): - if com.is_datetime64tz_dtype(df_col): + if is_datetime64tz_dtype(df_col): data_frame[col_name] = _handle_date_column(df_col) return data_frame @@ -876,7 +878,7 @@ def _create_table_setup(self): for name, typ, is_index in column_names_and_types] if self.keys is not None: - if not com.is_list_like(self.keys): + if not is_list_like(self.keys): keys = [self.keys] else: keys = self.keys @@ -1465,7 +1467,7 @@ def _create_table_setup(self): for cname, ctype, _ in column_names_and_types] if self.keys is not None and len(self.keys): - if not com.is_list_like(self.keys): + if not is_list_like(self.keys): keys = [self.keys] else: keys = self.keys diff --git a/pandas/io/stata.py b/pandas/io/stata.py index c7390cf240f8a..bd19102c7f18c 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -14,6 +14,10 @@ import sys import struct from dateutil.relativedelta import relativedelta + +from pandas.types.common import (is_categorical_dtype, is_datetime64_dtype, + _ensure_object) + from pandas.core.base import StringMixin from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame @@ -24,7 +28,7 @@ zip, BytesIO from pandas.util.decorators import Appender import pandas as pd -import pandas.core.common as com + from pandas.io.common import get_filepath_or_buffer, BaseIterator from pandas.lib import max_len_string_array, infer_dtype from pandas.tslib import NaT, Timestamp @@ -358,7 +362,7 @@ def _datetime_to_stata_elapsed_vec(dates, fmt): def parse_dates_safe(dates, delta=False, year=False, days=False): d = {} - if com.is_datetime64_dtype(dates.values): + if is_datetime64_dtype(dates.values): if delta: delta = dates - stata_epoch d['delta'] = delta.values.astype( @@ -396,7 +400,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): index = dates.index if bad_loc.any(): dates = Series(dates) - if com.is_datetime64_dtype(dates): + if is_datetime64_dtype(dates): dates[bad_loc] = to_datetime(stata_epoch) else: dates[bad_loc] = stata_epoch @@ -1746,7 +1750,7 @@ def _dtype_to_stata_type(dtype, column): elif dtype.type == np.object_: # try to coerce it to the biggest string # not memory efficient, what else could we # do? - itemsize = max_len_string_array(com._ensure_object(column.values)) + itemsize = max_len_string_array(_ensure_object(column.values)) return chr(max(itemsize, 1)) elif dtype == np.float64: return chr(255) @@ -1784,7 +1788,7 @@ def _dtype_to_default_stata_fmt(dtype, column): if not (inferred_dtype in ('string', 'unicode') or len(column) == 0): raise ValueError('Writing general object arrays is not supported') - itemsize = max_len_string_array(com._ensure_object(column.values)) + itemsize = max_len_string_array(_ensure_object(column.values)) if itemsize > 244: raise ValueError(excessive_string_length_error % column.name) return "%" + str(max(itemsize, 1)) + "s" @@ -1880,7 +1884,7 @@ def _prepare_categoricals(self, data): """Check for categorical columns, retain categorical information for Stata file and convert categorical data to int""" - is_cat = [com.is_categorical_dtype(data[col]) for col in data] + is_cat = [is_categorical_dtype(data[col]) for col in data] self._is_col_cat = is_cat self._value_labels = [] if not any(is_cat): diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 9a995c17f0445..e5a49c5213a48 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -31,11 +31,12 @@ from datetime import datetime, date, time +from pandas.types.common import (is_object_dtype, is_datetime64_dtype, + is_datetime64tz_dtype) from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat from pandas.compat import StringIO, range, lrange, string_types -from pandas.core import common as com from pandas.core.datetools import format as date_format import pandas.io.sql as sql @@ -1275,7 +1276,7 @@ def test_datetime_with_timezone(self): def check(col): # check that a column is either datetime64[ns] # or datetime64[ns, UTC] - if com.is_datetime64_dtype(col.dtype): + if is_datetime64_dtype(col.dtype): # "2000-01-01 00:00:00-08:00" should convert to # "2000-01-01 08:00:00" @@ -1285,7 +1286,7 @@ def check(col): # "2000-06-01 07:00:00" self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00')) - elif com.is_datetime64tz_dtype(col.dtype): + elif is_datetime64tz_dtype(col.dtype): self.assertTrue(str(col.dt.tz) == 'UTC') # "2000-01-01 00:00:00-08:00" should convert to @@ -1311,9 +1312,9 @@ def check(col): # even with the same versions of psycopg2 & sqlalchemy, possibly a # Postgrsql server version difference col = df.DateColWithTz - self.assertTrue(com.is_object_dtype(col.dtype) or - com.is_datetime64_dtype(col.dtype) or - com.is_datetime64tz_dtype(col.dtype), + self.assertTrue(is_object_dtype(col.dtype) or + is_datetime64_dtype(col.dtype) or + is_datetime64tz_dtype(col.dtype), "DateCol loaded with incorrect type -> {0}" .format(col.dtype)) @@ -1327,7 +1328,7 @@ def check(col): self.conn, chunksize=1)), ignore_index=True) col = df.DateColWithTz - self.assertTrue(com.is_datetime64tz_dtype(col.dtype), + self.assertTrue(is_datetime64tz_dtype(col.dtype), "DateCol loaded with incorrect type -> {0}" .format(col.dtype)) self.assertTrue(str(col.dt.tz) == 'UTC') diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 830c68d62efad..5f45d1b547e62 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -15,7 +15,7 @@ import pandas as pd from pandas.compat import iterkeys from pandas.core.frame import DataFrame, Series -from pandas.core.common import is_categorical_dtype +from pandas.types.common import is_categorical_dtype from pandas.io.parsers import read_csv from pandas.io.stata import (read_stata, StataReader, InvalidColumnName, PossiblePrecisionLoss, StataMissingValue) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 0312fb023f7fd..35233d1b6ba94 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -15,6 +15,14 @@ from pandas.compat import range from pandas.compat.numpy import function as nv +from pandas.types.generic import ABCSparseArray, ABCSparseSeries +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, _ensure_platform_int, + is_list_like, + is_scalar) +from pandas.types.cast import _possibly_convert_platform +from pandas.types.missing import isnull, notnull + from pandas._sparse import SparseIndex, BlockIndex, IntIndex import pandas._sparse as splib import pandas.index as _index @@ -40,13 +48,13 @@ def wrapper(self, other): if len(self) != len(other): raise AssertionError("length mismatch: %d vs. %d" % (len(self), len(other))) - if not isinstance(other, com.ABCSparseArray): + if not isinstance(other, ABCSparseArray): other = SparseArray(other, fill_value=self.fill_value) if name[0] == 'r': return _sparse_array_op(other, self, op, name[1:]) else: return _sparse_array_op(self, other, op, name) - elif lib.isscalar(other): + elif is_scalar(other): new_fill_value = op(np.float64(self.fill_value), np.float64(other)) return _wrap_result(name, op(self.sp_values, other), @@ -120,7 +128,7 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', if index is not None: if data is None: data = np.nan - if not lib.isscalar(data): + if not is_scalar(data): raise Exception("must only pass scalars with an index ") values = np.empty(len(index), dtype='float64') values.fill(data) @@ -177,7 +185,7 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer', @classmethod def _simple_new(cls, data, sp_index, fill_value): - if (com.is_integer_dtype(data) and com.is_float(fill_value) and + if (is_integer_dtype(data) and is_float(fill_value) and sp_index.ngaps > 0): # if float fill_value is being included in dense repr, # convert values to float @@ -288,7 +296,7 @@ def __getitem__(self, key): """ """ - if com.is_integer(key): + if is_integer(key): return self._get_val_at(key) elif isinstance(key, tuple): data_slice = self.values[key] @@ -340,11 +348,11 @@ def take(self, indices, axis=0, allow_fill=True, if axis: raise ValueError("axis must be 0, input was {0}".format(axis)) - if com.is_integer(indices): + if is_integer(indices): # return scalar return self[indices] - indices = com._ensure_platform_int(indices) + indices = _ensure_platform_int(indices) n = len(self) if allow_fill and fill_value is not None: # allow -1 to indicate self.fill_value, @@ -380,7 +388,7 @@ def take(self, indices, axis=0, allow_fill=True, return self._simple_new(new_values, sp_index, self.fill_value) def __setitem__(self, key, value): - # if com.is_integer(key): + # if is_integer(key): # self.values[key] = value # else: # raise Exception("SparseArray does not support seting non-scalars @@ -395,7 +403,7 @@ def __setslice__(self, i, j, value): j = 0 slobj = slice(i, j) # noqa - # if not lib.isscalar(value): + # if not is_scalar(value): # raise Exception("SparseArray does not support seting non-scalars # via slices") @@ -445,12 +453,12 @@ def count(self): @property def _null_fill_value(self): - return com.isnull(self.fill_value) + return isnull(self.fill_value) @property def _valid_sp_values(self): sp_vals = self.sp_values - mask = com.notnull(sp_vals) + mask = notnull(sp_vals) return sp_vals[mask] @Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs) @@ -466,7 +474,7 @@ def fillna(self, value, downcast=None): fill_value=value) else: new_values = self.sp_values.copy() - new_values[com.isnull(new_values)] = value + new_values[isnull(new_values)] = value return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value) @@ -498,7 +506,7 @@ def cumsum(self, axis=0, *args, **kwargs): nv.validate_cumsum(args, kwargs) # TODO: gh-12855 - return a SparseArray here - if com.notnull(self.fill_value): + if notnull(self.fill_value): return self.to_dense().cumsum() # TODO: what if sp_values contains NaN?? @@ -569,7 +577,7 @@ def _maybe_to_dense(obj): def _maybe_to_sparse(array): - if isinstance(array, com.ABCSparseSeries): + if isinstance(array, ABCSparseSeries): array = SparseArray(array.values, sparse_index=array.sp_index, fill_value=array.fill_value, copy=True) if not isinstance(array, SparseArray): @@ -588,15 +596,15 @@ def _sanitize_values(arr): else: # scalar - if lib.isscalar(arr): + if is_scalar(arr): arr = [arr] # ndarray if isinstance(arr, np.ndarray): pass - elif com.is_list_like(arr) and len(arr) > 0: - arr = com._possibly_convert_platform(arr) + elif is_list_like(arr) and len(arr) > 0: + arr = _possibly_convert_platform(arr) else: arr = np.asarray(arr) @@ -624,8 +632,8 @@ def make_sparse(arr, kind='block', fill_value=nan): if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") - if com.isnull(fill_value): - mask = com.notnull(arr) + if isnull(fill_value): + mask = notnull(arr) else: mask = arr != fill_value diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 52a6e6edf0896..811d8019c7fee 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -10,13 +10,15 @@ from pandas import compat import numpy as np +from pandas.types.missing import isnull, notnull +from pandas.types.common import _ensure_platform_int + +from pandas.core.common import _try_sort from pandas.compat.numpy import function as nv -from pandas.core.common import isnull, _try_sort from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) -import pandas.core.common as com import pandas.core.algorithms as algos from pandas.core.internals import (BlockManager, create_block_manager_from_arrays) @@ -520,7 +522,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, return SparseDataFrame(index=index, columns=self.columns) indexer = self.index.get_indexer(index, method, limit=limit) - indexer = com._ensure_platform_int(indexer) + indexer = _ensure_platform_int(indexer) mask = indexer == -1 need_mask = mask.any() @@ -546,7 +548,7 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None, if level is not None: raise TypeError('Reindex by level not supported for sparse') - if com.notnull(fill_value): + if notnull(fill_value): raise NotImplementedError("'fill_value' argument is not supported") if limit: diff --git a/pandas/sparse/list.py b/pandas/sparse/list.py index bc10b73a47723..666dae8071053 100644 --- a/pandas/sparse/list.py +++ b/pandas/sparse/list.py @@ -2,9 +2,9 @@ from pandas.core.base import PandasObject from pandas.formats.printing import pprint_thing +from pandas.types.common import is_scalar from pandas.sparse.array import SparseArray import pandas._sparse as splib -import pandas.lib as lib class SparseList(PandasObject): @@ -121,7 +121,7 @@ def append(self, value): ---------- value: scalar or array-like """ - if lib.isscalar(value): + if is_scalar(value): value = [value] sparr = SparseArray(value, fill_value=self.fill_value) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 88f396d20a91e..0996cd3bd826a 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -10,6 +10,7 @@ from pandas import compat import numpy as np +from pandas.types.common import is_list_like, is_scalar from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.frame import DataFrame from pandas.core.panel import Panel @@ -18,7 +19,6 @@ import pandas.core.common as com import pandas.core.ops as ops -import pandas.lib as lib class SparsePanelAxis(object): @@ -186,7 +186,7 @@ def _ixs(self, i, axis=0): key = self._get_axis(axis)[i] # xs cannot handle a non-scalar key, so just reindex here - if com.is_list_like(key): + if is_list_like(key): return self.reindex(**{self._get_axis_name(axis): key}) return self.xs(key, axis=axis) @@ -393,7 +393,7 @@ def _combine(self, other, func, axis=0): return self._combineFrame(other, func, axis=axis) elif isinstance(other, Panel): return self._combinePanel(other, func) - elif lib.isscalar(other): + elif is_scalar(other): new_frames = dict((k, func(v, other)) for k, v in self.iteritems()) return self._new_like(new_frames) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 5c7762c56ec6d..951c2ae0c0d5a 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -8,8 +8,11 @@ import numpy as np import warnings +from pandas.types.missing import isnull +from pandas.types.common import is_scalar +from pandas.core.common import _values_from_object, _maybe_match_name + from pandas.compat.numpy import function as nv -from pandas.core.common import isnull, _values_from_object, _maybe_match_name from pandas.core.index import Index, _ensure_index, InvalidIndexError from pandas.core.series import Series from pandas.core.frame import DataFrame @@ -18,7 +21,6 @@ import pandas.core.common as com import pandas.core.ops as ops import pandas.index as _index -import pandas.lib as lib from pandas.util.decorators import Appender from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray, @@ -54,7 +56,7 @@ def wrapper(self, other): return _sparse_series_op(self, other, op, name) elif isinstance(other, DataFrame): return NotImplemented - elif lib.isscalar(other): + elif is_scalar(other): if isnull(other) or isnull(self.fill_value): new_fill_value = np.nan else: diff --git a/pandas/src/testing.pyx b/pandas/src/testing.pyx index 6780cf311c244..e9563d9168206 100644 --- a/pandas/src/testing.pyx +++ b/pandas/src/testing.pyx @@ -1,7 +1,8 @@ import numpy as np from pandas import compat -from pandas.core.common import isnull, array_equivalent, is_dtype_equal +from pandas.types.missing import isnull, array_equivalent +from pandas.types.common import is_dtype_equal cdef NUMERIC_TYPES = ( bool, @@ -145,8 +146,15 @@ cpdef assert_almost_equal(a, b, if na != nb: from pandas.util.testing import raise_assert_detail + + # if we have a small diff set, print it + if abs(na-nb) < 10: + r = list(set(a) ^ set(b)) + else: + r = None + raise_assert_detail(obj, '{0} length are different'.format(obj), - na, nb) + na, nb, r) for i in xrange(len(a)): try: diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py index 46d30ab7fe313..bb475e47206c2 100644 --- a/pandas/stats/moments.py +++ b/pandas/stats/moments.py @@ -6,7 +6,7 @@ import warnings import numpy as np -from pandas import lib +from pandas.types.common import is_scalar from pandas.core.api import DataFrame, Series from pandas.util.decorators import Substitution, Appender @@ -226,7 +226,7 @@ def ensure_compat(dispatch, name, arg, func_kw=None, *args, **kwargs): aargs += ',' def f(a, b): - if lib.isscalar(b): + if is_scalar(b): return "{a}={b}".format(a=a, b=b) return "{a}=<{b}>".format(a=a, b=type(b).__name__) aargs = ','.join([f(a, b) for a, b in kwds.items() if b is not None]) diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 678689f2d2b30..b533d255bd196 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -13,7 +13,7 @@ from pandas.core.api import DataFrame, Series, isnull from pandas.core.base import StringMixin -from pandas.core.common import _ensure_float64 +from pandas.types.common import _ensure_float64 from pandas.core.index import MultiIndex from pandas.core.panel import Panel from pandas.util.decorators import cache_readonly diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 2b619b84a5994..020b7f1f1ab9d 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -10,7 +10,7 @@ from pandas import (notnull, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) import pandas as pd -import pandas.core.common as com +from pandas.types.dtypes import CategoricalDtype from pandas.util.testing import (assert_series_equal, assert_frame_equal) import pandas.util.testing as tm @@ -45,8 +45,8 @@ def test_apply(self): 'c1': ['C', 'C', 'D', 'D']}) df = df.apply(lambda ts: ts.astype('category')) self.assertEqual(df.shape, (4, 2)) - self.assertTrue(isinstance(df['c0'].dtype, com.CategoricalDtype)) - self.assertTrue(isinstance(df['c1'].dtype, com.CategoricalDtype)) + self.assertTrue(isinstance(df['c0'].dtype, CategoricalDtype)) + self.assertTrue(isinstance(df['c1'].dtype, CategoricalDtype)) def test_apply_mixed_datetimelike(self): # mixed datetimelike diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b42aef9447373..d21db5ba52a45 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -14,6 +14,7 @@ import numpy.ma as ma import numpy.ma.mrecords as mrecords +from pandas.types.common import is_integer_dtype from pandas.compat import (lmap, long, zip, range, lrange, lzip, OrderedDict, is_platform_little_endian) from pandas import compat @@ -809,7 +810,7 @@ def test_constructor_list_of_lists(self): # GH #484 l = [[1, 'a'], [2, 'b']] df = DataFrame(data=l, columns=["num", "str"]) - self.assertTrue(com.is_integer_dtype(df['num'])) + self.assertTrue(is_integer_dtype(df['num'])) self.assertEqual(df['str'].dtype, np.object_) # GH 4851 diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 5f95ff6b6b601..c650436eefaf3 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -1,15 +1,13 @@ # -*- coding: utf-8 -*- from __future__ import print_function - from datetime import timedelta import numpy as np - from pandas import (DataFrame, Series, date_range, Timedelta, Timestamp, compat, option_context) from pandas.compat import u -from pandas.core import common as com +from pandas.types.dtypes import DatetimeTZDtype from pandas.tests.frame.common import TestData from pandas.util.testing import (assert_series_equal, assert_frame_equal, @@ -84,8 +82,8 @@ def test_datetime_with_tz_dtypes(self): tzframe.iloc[1, 2] = pd.NaT result = tzframe.dtypes.sort_index() expected = Series([np.dtype('datetime64[ns]'), - com.DatetimeTZDtype('datetime64[ns, US/Eastern]'), - com.DatetimeTZDtype('datetime64[ns, CET]')], + DatetimeTZDtype('datetime64[ns, US/Eastern]'), + DatetimeTZDtype('datetime64[ns, CET]')], ['A', 'B', 'C']) assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index d7fed8131a4f4..578df5ba9101e 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -17,6 +17,9 @@ date_range) import pandas as pd +from pandas.types.common import (is_float_dtype, + is_integer, + is_scalar) from pandas.util.testing import (assert_almost_equal, assert_numpy_array_equal, assert_series_equal, @@ -26,7 +29,6 @@ from pandas.core.indexing import IndexingError import pandas.util.testing as tm -import pandas.lib as lib from pandas.tests.frame.common import TestData @@ -1419,15 +1421,15 @@ def test_setitem_single_column_mixed_datetime(self): # set an allowable datetime64 type from pandas import tslib df.ix['b', 'timestamp'] = tslib.iNaT - self.assertTrue(com.isnull(df.ix['b', 'timestamp'])) + self.assertTrue(isnull(df.ix['b', 'timestamp'])) # allow this syntax df.ix['c', 'timestamp'] = nan - self.assertTrue(com.isnull(df.ix['c', 'timestamp'])) + self.assertTrue(isnull(df.ix['c', 'timestamp'])) # allow this syntax df.ix['d', :] = nan - self.assertTrue(com.isnull(df.ix['c', :]).all() == False) # noqa + self.assertTrue(isnull(df.ix['c', :]).all() == False) # noqa # as of GH 3216 this will now work! # try to set with a list like item @@ -1619,7 +1621,7 @@ def test_set_value_resize(self): res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) - self.assertTrue(com.is_float_dtype(res3['baz'])) + self.assertTrue(is_float_dtype(res3['baz'])) self.assertTrue(isnull(res3['baz'].drop(['foobar'])).all()) self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') @@ -1662,7 +1664,7 @@ def test_single_element_ix_dont_upcast(self): (int, np.integer))) result = self.frame.ix[self.frame.index[5], 'E'] - self.assertTrue(com.is_integer(result)) + self.assertTrue(is_integer(result)) def test_irow(self): df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) @@ -2268,7 +2270,7 @@ def _check_align(df, cond, other, check_dtypes=True): d = df[k].values c = cond[k].reindex(df[k].index).fillna(False).values - if lib.isscalar(other): + if is_scalar(other): o = other else: if isinstance(other, np.ndarray): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index a6246790f83cb..44c7f2277293d 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -6,6 +6,9 @@ import warnings from datetime import datetime +from pandas.types.common import (is_integer_dtype, + is_float_dtype, + is_scalar) from pandas.compat import range, lrange, lzip, StringIO, lmap, map from pandas.tslib import NaT from numpy import nan @@ -22,7 +25,7 @@ assert_frame_equal, assert_panel_equal, assert_attr_equal, slow) from pandas.formats.printing import pprint_thing -from pandas import concat, lib +from pandas import concat from pandas.core.common import PerformanceWarning import pandas.util.testing as tm @@ -200,7 +203,7 @@ def _print(result, error=None): return try: - if lib.isscalar(rs) and lib.isscalar(xp): + if is_scalar(rs) and is_scalar(xp): self.assertEqual(rs, xp) elif xp.ndim == 1: assert_series_equal(rs, xp) @@ -775,7 +778,7 @@ def test_ix_loc_consistency(self): # this is not an exhaustive case def compare(result, expected): - if lib.isscalar(expected): + if is_scalar(expected): self.assertEqual(result, expected) else: self.assertTrue(expected.equals(result)) @@ -2888,8 +2891,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) assert_frame_equal(left, right) - self.assertTrue(com.is_integer_dtype(left['foo'])) - self.assertTrue(com.is_integer_dtype(left['baz'])) + self.assertTrue(is_integer_dtype(left['foo'])) + self.assertTrue(is_integer_dtype(left['baz'])) left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), @@ -2900,8 +2903,8 @@ def test_setitem_dtype_upcast(self): columns=['foo', 'bar', 'baz']) assert_frame_equal(left, right) - self.assertTrue(com.is_float_dtype(left['foo'])) - self.assertTrue(com.is_float_dtype(left['baz'])) + self.assertTrue(is_float_dtype(left['foo'])) + self.assertTrue(is_float_dtype(left['baz'])) def test_setitem_iloc(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2a7e8a957977f..b7ec4d570f18b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -8,10 +8,11 @@ import numpy.ma as ma import pandas as pd +from pandas.types.common import is_categorical_dtype, is_datetime64tz_dtype from pandas import Index, Series, isnull, date_range, period_range from pandas.core.index import MultiIndex from pandas.tseries.index import Timestamp, DatetimeIndex -import pandas.core.common as com + import pandas.lib as lib from pandas.compat import lrange, range, zip, OrderedDict, long @@ -144,11 +145,11 @@ def test_constructor_categorical(self): ValueError, lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64')) cat = Series(pd.Categorical([1, 2, 3]), dtype='category') - self.assertTrue(com.is_categorical_dtype(cat)) - self.assertTrue(com.is_categorical_dtype(cat.dtype)) + self.assertTrue(is_categorical_dtype(cat)) + self.assertTrue(is_categorical_dtype(cat.dtype)) s = Series([1, 2, 3], dtype='category') - self.assertTrue(com.is_categorical_dtype(s)) - self.assertTrue(com.is_categorical_dtype(s.dtype)) + self.assertTrue(is_categorical_dtype(s)) + self.assertTrue(is_categorical_dtype(s.dtype)) def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype=float) @@ -429,7 +430,7 @@ def test_constructor_with_datetime_tz(self): s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') - self.assertTrue(com.is_datetime64tz_dtype(s.dtype)) + self.assertTrue(is_datetime64tz_dtype(s.dtype)) self.assertTrue('datetime64[ns, US/Eastern]' in str(s)) # export diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 6e82f81f901a9..c25895548dcb9 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd +from pandas.types.common import is_integer_dtype, is_list_like from pandas import (Index, Series, DataFrame, bdate_range, date_range, period_range, timedelta_range) from pandas.tseries.period import PeriodIndex @@ -49,16 +50,16 @@ def test_dt_namespace_accessor(self): def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): - if com.is_integer_dtype(result): + if is_integer_dtype(result): result = result.astype('int64') - elif not com.is_list_like(result): + elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) - if not (com.is_list_like(a) and com.is_list_like(b)): + if not (is_list_like(a) and is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b) diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 15ca238ee32a0..64ebaa63cc10f 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -7,16 +7,14 @@ import numpy as np import pandas as pd +from pandas.types.common import is_integer, is_scalar from pandas import Index, Series, DataFrame, isnull, date_range from pandas.core.index import MultiIndex from pandas.core.indexing import IndexingError from pandas.tseries.index import Timestamp from pandas.tseries.tdi import Timedelta -import pandas.core.common as com import pandas.core.datetools as datetools -import pandas.lib as lib - from pandas.compat import lrange, range from pandas import compat from pandas.util.testing import assert_series_equal, assert_almost_equal @@ -375,7 +373,7 @@ def test_getitem_ambiguous_keyerror(self): def test_getitem_unordered_dup(self): obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) - self.assertTrue(lib.isscalar(obj['c'])) + self.assertTrue(is_scalar(obj['c'])) self.assertEqual(obj['c'], 0) def test_getitem_dups_with_missing(self): @@ -1174,23 +1172,23 @@ def test_where_numeric_with_string(self): s = pd.Series([1, 2, 3]) w = s.where(s > 1, 'X') - self.assertFalse(com.is_integer(w[0])) - self.assertTrue(com.is_integer(w[1])) - self.assertTrue(com.is_integer(w[2])) + self.assertFalse(is_integer(w[0])) + self.assertTrue(is_integer(w[1])) + self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') w = s.where(s > 1, ['X', 'Y', 'Z']) - self.assertFalse(com.is_integer(w[0])) - self.assertTrue(com.is_integer(w[1])) - self.assertTrue(com.is_integer(w[2])) + self.assertFalse(is_integer(w[0])) + self.assertTrue(is_integer(w[1])) + self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) - self.assertFalse(com.is_integer(w[0])) - self.assertTrue(com.is_integer(w[1])) - self.assertTrue(com.is_integer(w[2])) + self.assertFalse(is_integer(w[0])) + self.assertTrue(is_integer(w[1])) + self.assertTrue(is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py index e0bff7fbd39e4..7d2517987e526 100644 --- a/pandas/tests/series/test_quantile.py +++ b/pandas/tests/series/test_quantile.py @@ -7,7 +7,7 @@ from pandas import (Index, Series, _np_version_under1p9) from pandas.tseries.index import Timestamp -import pandas.core.common as com +from pandas.types.common import is_integer import pandas.util.testing as tm from .common import TestData @@ -96,11 +96,11 @@ def test_quantile_interpolation_dtype(self): # interpolation = linear (default case) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower') self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(com.is_integer(q)) + self.assertTrue(is_integer(q)) q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher') self.assertEqual(q, percentile(np.array([1, 3, 4]), 50)) - self.assertTrue(com.is_integer(q)) + self.assertTrue(is_integer(q)) def test_quantile_interpolation_np_lt_1p9(self): # GH #10174 diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 77ae3ca20d123..2721d8d0e5e69 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -9,7 +9,7 @@ import pandas as pd import pandas.compat as compat -import pandas.core.common as com +from pandas.types.common import is_object_dtype, is_datetimetz import pandas.util.testing as tm from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, Timedelta) @@ -517,7 +517,7 @@ def test_value_counts_unique_nunique(self): continue # special assign to the numpy array - if com.is_datetimetz(o): + if is_datetimetz(o): if isinstance(o, DatetimeIndex): v = o.asi8 v[0:2] = pd.tslib.iNaT @@ -982,8 +982,8 @@ def test_memory_usage(self): res = o.memory_usage() res_deep = o.memory_usage(deep=True) - if (com.is_object_dtype(o) or (isinstance(o, Series) and - com.is_object_dtype(o.index))): + if (is_object_dtype(o) or (isinstance(o, Series) and + is_object_dtype(o.index))): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 90876a4541da6..2ca1fc71df20a 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -8,12 +8,17 @@ import numpy as np +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import (is_categorical_dtype, + is_object_dtype, + is_float_dtype, + is_integer_dtype) + import pandas as pd import pandas.compat as compat -import pandas.core.common as com import pandas.util.testing as tm from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, - Timestamp, CategoricalIndex) + Timestamp, CategoricalIndex, isnull) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context @@ -195,18 +200,18 @@ def f(): # This should result in integer categories, not float! cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - self.assertTrue(com.is_integer_dtype(cat.categories)) + self.assertTrue(is_integer_dtype(cat.categories)) # https://github.com/pydata/pandas/issues/3678 cat = pd.Categorical([np.nan, 1, 2, 3]) - self.assertTrue(com.is_integer_dtype(cat.categories)) + self.assertTrue(is_integer_dtype(cat.categories)) # this should result in floats cat = pd.Categorical([np.nan, 1, 2., 3]) - self.assertTrue(com.is_float_dtype(cat.categories)) + self.assertTrue(is_float_dtype(cat.categories)) cat = pd.Categorical([np.nan, 1., 2., 3.]) - self.assertTrue(com.is_float_dtype(cat.categories)) + self.assertTrue(is_float_dtype(cat.categories)) # Deprecating NaNs in categoires (GH #10748) # preserve int as far as possible by converting to object if NaN is in @@ -214,23 +219,23 @@ def f(): with tm.assert_produces_warning(FutureWarning): cat = pd.Categorical([np.nan, 1, 2, 3], categories=[np.nan, 1, 2, 3]) - self.assertTrue(com.is_object_dtype(cat.categories)) + self.assertTrue(is_object_dtype(cat.categories)) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notnull()]) - # self.assertTrue(com.is_integer_dtype(vals)) + # self.assertTrue(is_integer_dtype(vals)) with tm.assert_produces_warning(FutureWarning): cat = pd.Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) - self.assertTrue(com.is_object_dtype(cat.categories)) + self.assertTrue(is_object_dtype(cat.categories)) # but don't do it for floats with tm.assert_produces_warning(FutureWarning): cat = pd.Categorical([np.nan, 1., 2., 3.], categories=[np.nan, 1., 2., 3.]) - self.assertTrue(com.is_float_dtype(cat.categories)) + self.assertTrue(is_float_dtype(cat.categories)) # corner cases cat = pd.Categorical([1]) @@ -552,7 +557,7 @@ def test_na_flags_int_categories(self): cat = Categorical(labels, categories, fastpath=True) repr(cat) - self.assert_numpy_array_equal(com.isnull(cat), labels == -1) + self.assert_numpy_array_equal(isnull(cat), labels == -1) def test_categories_none(self): factor = Categorical(['a', 'b', 'b', 'a', @@ -2076,15 +2081,15 @@ def test_assignment_to_dataframe(self): result = df.dtypes expected = Series( - [np.dtype('int32'), com.CategoricalDtype()], index=['value', 'D']) + [np.dtype('int32'), CategoricalDtype()], index=['value', 'D']) tm.assert_series_equal(result, expected) df['E'] = s str(df) result = df.dtypes - expected = Series([np.dtype('int32'), com.CategoricalDtype(), - com.CategoricalDtype()], + expected = Series([np.dtype('int32'), CategoricalDtype(), + CategoricalDtype()], index=['value', 'D', 'E']) tm.assert_series_equal(result, expected) @@ -3234,7 +3239,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.iloc[2:4, :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) # row res_row = df.iloc[2, :] @@ -3244,7 +3249,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) # single value res_val = df.iloc[2, 0] @@ -3254,7 +3259,7 @@ def test_slicing_and_getting_ops(self): # frame res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) # row res_row = df.loc["j", :] @@ -3264,7 +3269,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) # single value res_val = df.loc["j", "cats"] @@ -3275,7 +3280,7 @@ def test_slicing_and_getting_ops(self): # res_df = df.ix["j":"k",[0,1]] # doesn't work? res_df = df.ix["j":"k", :] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) # row res_row = df.ix["j", :] @@ -3285,7 +3290,7 @@ def test_slicing_and_getting_ops(self): # col res_col = df.ix[:, "cats"] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) # single value res_val = df.ix["j", 0] @@ -3318,23 +3323,23 @@ def test_slicing_and_getting_ops(self): res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) res_df = df.iloc[[2, 3]] tm.assert_frame_equal(res_df, exp_df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) - self.assertTrue(com.is_categorical_dtype(res_col)) + self.assertTrue(is_categorical_dtype(res_col)) res_df = df.iloc[:, slice(0, 2)] tm.assert_frame_equal(res_df, df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) res_df = df.iloc[:, [0, 1]] tm.assert_frame_equal(res_df, df) - self.assertTrue(com.is_categorical_dtype(res_df["cats"])) + self.assertTrue(is_categorical_dtype(res_df["cats"])) def test_slicing_doc_examples(self): @@ -4114,7 +4119,7 @@ def test_astype_to_other(self): s = self.cat['value_group'] expected = s tm.assert_series_equal(s.astype('category'), expected) - tm.assert_series_equal(s.astype(com.CategoricalDtype()), expected) + tm.assert_series_equal(s.astype(CategoricalDtype()), expected) self.assertRaises(ValueError, lambda: s.astype('float64')) cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) @@ -4139,10 +4144,10 @@ def cmp(a, b): # valid conversion for valid in [lambda x: x.astype('category'), - lambda x: x.astype(com.CategoricalDtype()), + lambda x: x.astype(CategoricalDtype()), lambda x: x.astype('object').astype('category'), lambda x: x.astype('object').astype( - com.CategoricalDtype()) + CategoricalDtype()) ]: result = valid(s) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 56b1b542d547e..09dd3f7ab517c 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,21 +1,12 @@ # -*- coding: utf-8 -*- -import collections -from datetime import datetime, timedelta -import re import nose import numpy as np -import pandas as pd -from pandas.tslib import iNaT, NaT -from pandas import (Series, DataFrame, date_range, DatetimeIndex, - TimedeltaIndex, Timestamp, Float64Index) -from pandas import compat -from pandas.compat import range, lrange, lmap, u -from pandas.core.common import notnull, isnull, array_equivalent + +from pandas import Series, Timestamp +from pandas.compat import range, lmap import pandas.core.common as com -import pandas.core.convert as convert import pandas.util.testing as tm -import pandas.core.config as cf _multiprocess_can_split_ = True @@ -28,22 +19,6 @@ def test_mut_exclusive(): assert com._mut_exclusive(major=None, major_axis=None) is None -def test_is_sequence(): - is_seq = com.is_sequence - assert (is_seq((1, 2))) - assert (is_seq([1, 2])) - assert (not is_seq("abcd")) - assert (not is_seq(u("abcd"))) - assert (not is_seq(np.int64)) - - class A(object): - - def __getitem__(self): - return 1 - - assert (not is_seq(A())) - - def test_get_callable_name(): from functools import partial getname = com._get_callable_name @@ -68,407 +43,6 @@ def __call__(self): assert getname(1) is None -class TestInferDtype(tm.TestCase): - - def test_infer_dtype_from_scalar(self): - # Test that _infer_dtype_from_scalar is returning correct dtype for int - # and float. - - for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, - np.int32, np.uint64, np.int64]: - data = dtypec(12) - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, type(data)) - - data = 12 - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.int64) - - for dtypec in [np.float16, np.float32, np.float64]: - data = dtypec(12) - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, dtypec) - - data = np.float(12) - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.float64) - - for data in [True, False]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.bool_) - - for data in [np.complex64(1), np.complex128(1)]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.complex_) - - import datetime - for data in [np.datetime64(1, 'ns'), pd.Timestamp(1), - datetime.datetime(2000, 1, 1, 0, 0)]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'M8[ns]') - - for data in [np.timedelta64(1, 'ns'), pd.Timedelta(1), - datetime.timedelta(1)]: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, 'm8[ns]') - - for data in [datetime.date(2000, 1, 1), - pd.Timestamp(1, tz='US/Eastern'), 'foo']: - dtype, val = com._infer_dtype_from_scalar(data) - self.assertEqual(dtype, np.object_) - - -def test_notnull(): - assert notnull(1.) - assert not notnull(None) - assert not notnull(np.NaN) - - with cf.option_context("mode.use_inf_as_null", False): - assert notnull(np.inf) - assert notnull(-np.inf) - - arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) - assert result.all() - - with cf.option_context("mode.use_inf_as_null", True): - assert not notnull(np.inf) - assert not notnull(-np.inf) - - arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) - assert result.sum() == 2 - - with cf.option_context("mode.use_inf_as_null", False): - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries(), tm.makeTimeSeries(), - tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) - - -def test_isnull(): - assert not isnull(1.) - assert isnull(None) - assert isnull(np.NaN) - assert not isnull(np.inf) - assert not isnull(-np.inf) - - # series - for s in [tm.makeFloatSeries(), tm.makeStringSeries(), - tm.makeObjectSeries(), tm.makeTimeSeries(), - tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) - - # frame - for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), - tm.makeMixedDataFrame()]: - result = isnull(df) - expected = df.apply(isnull) - tm.assert_frame_equal(result, expected) - - # panel - for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) - ]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel_equal(result, expected) - - # panel 4d - for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: - result = isnull(p) - expected = p.apply(isnull) - tm.assert_panel4d_equal(result, expected) - - -def test_isnull_lists(): - result = isnull([[False]]) - exp = np.array([[False]]) - assert (np.array_equal(result, exp)) - - result = isnull([[1], [2]]) - exp = np.array([[False], [False]]) - assert (np.array_equal(result, exp)) - - # list of strings / unicode - result = isnull(['foo', 'bar']) - assert (not result.any()) - - result = isnull([u('foo'), u('bar')]) - assert (not result.any()) - - -def test_isnull_nat(): - result = isnull([NaT]) - exp = np.array([True]) - assert (np.array_equal(result, exp)) - - result = isnull(np.array([NaT], dtype=object)) - exp = np.array([True]) - assert (np.array_equal(result, exp)) - - -def test_isnull_numpy_nat(): - arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), - np.datetime64('NaT', 's')]) - result = isnull(arr) - expected = np.array([True] * 4) - tm.assert_numpy_array_equal(result, expected) - - -def test_isnull_datetime(): - assert (not isnull(datetime.now())) - assert notnull(datetime.now()) - - idx = date_range('1/1/1990', periods=20) - assert (notnull(idx).all()) - - idx = np.asarray(idx) - idx[0] = iNaT - idx = DatetimeIndex(idx) - mask = isnull(idx) - assert (mask[0]) - assert (not mask[1:].any()) - - # GH 9129 - pidx = idx.to_period(freq='M') - mask = isnull(pidx) - assert (mask[0]) - assert (not mask[1:].any()) - - mask = isnull(pidx[1:]) - assert (not mask.any()) - - -class TestIsNull(tm.TestCase): - - def test_0d_array(self): - self.assertTrue(isnull(np.array(np.nan))) - self.assertFalse(isnull(np.array(0.0))) - self.assertFalse(isnull(np.array(0))) - # test object dtype - self.assertTrue(isnull(np.array(np.nan, dtype=object))) - self.assertFalse(isnull(np.array(0.0, dtype=object))) - self.assertFalse(isnull(np.array(0, dtype=object))) - - -class TestNumberScalar(tm.TestCase): - - def test_is_number(self): - - self.assertTrue(com.is_number(True)) - self.assertTrue(com.is_number(1)) - self.assertTrue(com.is_number(1.1)) - self.assertTrue(com.is_number(1 + 3j)) - self.assertTrue(com.is_number(np.bool(False))) - self.assertTrue(com.is_number(np.int64(1))) - self.assertTrue(com.is_number(np.float64(1.1))) - self.assertTrue(com.is_number(np.complex128(1 + 3j))) - self.assertTrue(com.is_number(np.nan)) - - self.assertFalse(com.is_number(None)) - self.assertFalse(com.is_number('x')) - self.assertFalse(com.is_number(datetime(2011, 1, 1))) - self.assertFalse(com.is_number(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_number(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_number(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_number(timedelta(1000))) - self.assertFalse(com.is_number(pd.Timedelta('1 days'))) - - # questionable - self.assertFalse(com.is_number(np.bool_(False))) - self.assertTrue(com.is_number(np.timedelta64(1, 'D'))) - - def test_is_bool(self): - self.assertTrue(com.is_bool(True)) - self.assertTrue(com.is_bool(np.bool(False))) - self.assertTrue(com.is_bool(np.bool_(False))) - - self.assertFalse(com.is_bool(1)) - self.assertFalse(com.is_bool(1.1)) - self.assertFalse(com.is_bool(1 + 3j)) - self.assertFalse(com.is_bool(np.int64(1))) - self.assertFalse(com.is_bool(np.float64(1.1))) - self.assertFalse(com.is_bool(np.complex128(1 + 3j))) - self.assertFalse(com.is_bool(np.nan)) - self.assertFalse(com.is_bool(None)) - self.assertFalse(com.is_bool('x')) - self.assertFalse(com.is_bool(datetime(2011, 1, 1))) - self.assertFalse(com.is_bool(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_bool(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_bool(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_bool(timedelta(1000))) - self.assertFalse(com.is_bool(np.timedelta64(1, 'D'))) - self.assertFalse(com.is_bool(pd.Timedelta('1 days'))) - - def test_is_integer(self): - self.assertTrue(com.is_integer(1)) - self.assertTrue(com.is_integer(np.int64(1))) - - self.assertFalse(com.is_integer(True)) - self.assertFalse(com.is_integer(1.1)) - self.assertFalse(com.is_integer(1 + 3j)) - self.assertFalse(com.is_integer(np.bool(False))) - self.assertFalse(com.is_integer(np.bool_(False))) - self.assertFalse(com.is_integer(np.float64(1.1))) - self.assertFalse(com.is_integer(np.complex128(1 + 3j))) - self.assertFalse(com.is_integer(np.nan)) - self.assertFalse(com.is_integer(None)) - self.assertFalse(com.is_integer('x')) - self.assertFalse(com.is_integer(datetime(2011, 1, 1))) - self.assertFalse(com.is_integer(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_integer(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_integer(timedelta(1000))) - self.assertFalse(com.is_integer(pd.Timedelta('1 days'))) - - # questionable - self.assertTrue(com.is_integer(np.timedelta64(1, 'D'))) - - def test_is_float(self): - self.assertTrue(com.is_float(1.1)) - self.assertTrue(com.is_float(np.float64(1.1))) - self.assertTrue(com.is_float(np.nan)) - - self.assertFalse(com.is_float(True)) - self.assertFalse(com.is_float(1)) - self.assertFalse(com.is_float(1 + 3j)) - self.assertFalse(com.is_float(np.bool(False))) - self.assertFalse(com.is_float(np.bool_(False))) - self.assertFalse(com.is_float(np.int64(1))) - self.assertFalse(com.is_float(np.complex128(1 + 3j))) - self.assertFalse(com.is_float(None)) - self.assertFalse(com.is_float('x')) - self.assertFalse(com.is_float(datetime(2011, 1, 1))) - self.assertFalse(com.is_float(np.datetime64('2011-01-01'))) - self.assertFalse(com.is_float(pd.Timestamp('2011-01-01'))) - self.assertFalse(com.is_float(pd.Timestamp('2011-01-01', - tz='US/Eastern'))) - self.assertFalse(com.is_float(timedelta(1000))) - self.assertFalse(com.is_float(np.timedelta64(1, 'D'))) - self.assertFalse(com.is_float(pd.Timedelta('1 days'))) - - -def test_downcast_conv(): - # test downcasting - - arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) - result = com._possibly_downcast_to_dtype(arr, 'infer') - assert (np.array_equal(result, arr)) - - arr = np.array([8., 8., 8., 8., 8.9999999999995]) - result = com._possibly_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9]) - assert (np.array_equal(result, expected)) - - arr = np.array([8., 8., 8., 8., 9.0000000000005]) - result = com._possibly_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9]) - assert (np.array_equal(result, expected)) - - # conversions - - expected = np.array([1, 2]) - for dtype in [np.float64, object, np.int64]: - arr = np.array([1.0, 2.0], dtype=dtype) - result = com._possibly_downcast_to_dtype(arr, 'infer') - tm.assert_almost_equal(result, expected, check_dtype=False) - - for dtype in [np.float64, object]: - expected = np.array([1.0, 2.0, np.nan], dtype=dtype) - arr = np.array([1.0, 2.0, np.nan], dtype=dtype) - result = com._possibly_downcast_to_dtype(arr, 'infer') - tm.assert_almost_equal(result, expected) - - # empties - for dtype in [np.int32, np.float64, np.float32, np.bool_, - np.int64, object]: - arr = np.array([], dtype=dtype) - result = com._possibly_downcast_to_dtype(arr, 'int64') - tm.assert_almost_equal(result, np.array([], dtype=np.int64)) - assert result.dtype == np.int64 - - -def test_array_equivalent(): - assert array_equivalent(np.array([np.nan, np.nan]), - np.array([np.nan, np.nan])) - assert array_equivalent(np.array([np.nan, 1, np.nan]), - np.array([np.nan, 1, np.nan])) - assert array_equivalent(np.array([np.nan, None], dtype='object'), - np.array([np.nan, None], dtype='object')) - assert array_equivalent(np.array([np.nan, 1 + 1j], dtype='complex'), - np.array([np.nan, 1 + 1j], dtype='complex')) - assert not array_equivalent( - np.array([np.nan, 1 + 1j], dtype='complex'), np.array( - [np.nan, 1 + 2j], dtype='complex')) - assert not array_equivalent( - np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) - assert not array_equivalent( - np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e'])) - assert array_equivalent(Float64Index([0, np.nan]), - Float64Index([0, np.nan])) - assert not array_equivalent( - Float64Index([0, np.nan]), Float64Index([1, np.nan])) - assert array_equivalent(DatetimeIndex([0, np.nan]), - DatetimeIndex([0, np.nan])) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) - assert array_equivalent(TimedeltaIndex([0, np.nan]), - TimedeltaIndex([0, np.nan])) - assert not array_equivalent( - TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) - assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), - DatetimeIndex([0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( - [1, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex( - [0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( - [0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) - - -def test_array_equivalent_str(): - for dtype in ['O', 'S', 'U']: - assert array_equivalent(np.array(['A', 'B'], dtype=dtype), - np.array(['A', 'B'], dtype=dtype)) - assert not array_equivalent(np.array(['A', 'B'], dtype=dtype), - np.array(['A', 'X'], dtype=dtype)) - - -def test_datetimeindex_from_empty_datetime64_array(): - for unit in ['ms', 'us', 'ns']: - idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) - assert (len(idx) == 0) - - -def test_nan_to_nat_conversions(): - - df = DataFrame(dict({ - 'A': np.asarray( - lrange(10), dtype='float64'), - 'B': Timestamp('20010101') - })) - df.iloc[3:6, :] = np.nan - result = df.loc[4, 'B'].value - assert (result == iNaT) - - s = df['B'].copy() - s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) - assert (isnull(s[8])) - - # numpy < 1.7.0 is wrong - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= '1.7.0': - assert (s[8].value == np.datetime64('NaT').astype(np.int64)) - - def test_any_none(): assert (com._any_none(1, 2, 3, None)) assert (not com._any_none(1, 2, 3, 4)) @@ -567,122 +141,6 @@ def test_groupby(): assert v == expected[k] -def test_is_list_like(): - passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), - Series([]), Series(['a']).str) - fails = (1, '2', object()) - - for p in passes: - assert com.is_list_like(p) - - for f in fails: - assert not com.is_list_like(f) - - -def test_is_dict_like(): - passes = [{}, {'A': 1}, pd.Series([1])] - fails = ['1', 1, [1, 2], (1, 2), range(2), pd.Index([1])] - - for p in passes: - assert com.is_dict_like(p) - - for f in fails: - assert not com.is_dict_like(f) - - -def test_is_named_tuple(): - passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) - fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) - - for p in passes: - assert com.is_named_tuple(p) - - for f in fails: - assert not com.is_named_tuple(f) - - -def test_is_hashable(): - - # all new-style classes are hashable by default - class HashableClass(object): - pass - - class UnhashableClass1(object): - __hash__ = None - - class UnhashableClass2(object): - - def __hash__(self): - raise TypeError("Not hashable") - - hashable = (1, - 3.14, - np.float64(3.14), - 'a', - tuple(), - (1, ), - HashableClass(), ) - not_hashable = ([], UnhashableClass1(), ) - abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) - - for i in hashable: - assert com.is_hashable(i) - for i in not_hashable: - assert not com.is_hashable(i) - for i in abc_hashable_not_really_hashable: - assert not com.is_hashable(i) - - # numpy.array is no longer collections.Hashable as of - # https://github.com/numpy/numpy/pull/5326, just test - # pandas.common.is_hashable() - assert not com.is_hashable(np.array([])) - - # old-style classes in Python 2 don't appear hashable to - # collections.Hashable but also seem to support hash() by default - if compat.PY2: - - class OldStyleClass(): - pass - - c = OldStyleClass() - assert not isinstance(c, collections.Hashable) - assert com.is_hashable(c) - hash(c) # this will not raise - - -def test_ensure_int32(): - values = np.arange(10, dtype=np.int32) - result = com._ensure_int32(values) - assert (result.dtype == np.int32) - - values = np.arange(10, dtype=np.int64) - result = com._ensure_int32(values) - assert (result.dtype == np.int32) - - -def test_is_re(): - passes = re.compile('ad'), - fails = 'x', 2, 3, object() - - for p in passes: - assert com.is_re(p) - - for f in fails: - assert not com.is_re(f) - - -def test_is_recompilable(): - passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), - re.compile(r'')) - fails = 1, [], object() - - for p in passes: - assert com.is_re_compilable(p) - - for f in fails: - assert not com.is_re_compilable(f) - - def test_random_state(): import numpy.random as npr # Check with seed @@ -730,83 +188,6 @@ def test_maybe_match_name(): assert (matched == 'y') -class TestMaybe(tm.TestCase): - - def test_maybe_convert_string_to_array(self): - result = com._maybe_convert_string_to_object('x') - tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) - self.assertTrue(result.dtype == object) - - result = com._maybe_convert_string_to_object(1) - self.assertEqual(result, 1) - - arr = np.array(['x', 'y'], dtype=str) - result = com._maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) - - # unicode - arr = np.array(['x', 'y']).astype('U') - result = com._maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) - self.assertTrue(result.dtype == object) - - # object - arr = np.array(['x', 2], dtype=object) - result = com._maybe_convert_string_to_object(arr) - tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) - self.assertTrue(result.dtype == object) - - def test_maybe_convert_scalar(self): - - # pass thru - result = com._maybe_convert_scalar('x') - self.assertEqual(result, 'x') - result = com._maybe_convert_scalar(np.array([1])) - self.assertEqual(result, np.array([1])) - - # leave scalar dtype - result = com._maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.int64(1)) - result = com._maybe_convert_scalar(np.int32(1)) - self.assertEqual(result, np.int32(1)) - result = com._maybe_convert_scalar(np.float32(1)) - self.assertEqual(result, np.float32(1)) - result = com._maybe_convert_scalar(np.int64(1)) - self.assertEqual(result, np.float64(1)) - - # coerce - result = com._maybe_convert_scalar(1) - self.assertEqual(result, np.int64(1)) - result = com._maybe_convert_scalar(1.0) - self.assertEqual(result, np.float64(1)) - result = com._maybe_convert_scalar(pd.Timestamp('20130101')) - self.assertEqual(result, pd.Timestamp('20130101').value) - result = com._maybe_convert_scalar(datetime(2013, 1, 1)) - self.assertEqual(result, pd.Timestamp('20130101').value) - result = com._maybe_convert_scalar(pd.Timedelta('1 day 1 min')) - self.assertEqual(result, pd.Timedelta('1 day 1 min').value) - - -class TestConvert(tm.TestCase): - - def test_possibly_convert_objects_copy(self): - values = np.array([1, 2]) - - out = convert._possibly_convert_objects(values, copy=False) - self.assertTrue(values is out) - - out = convert._possibly_convert_objects(values, copy=True) - self.assertTrue(values is not out) - - values = np.array(['apply', 'banana']) - out = convert._possibly_convert_objects(values, copy=False) - self.assertTrue(values is out) - - out = convert._possibly_convert_objects(values, copy=True) - self.assertTrue(values is not out) - - def test_dict_compat(): data_datetime64 = {np.datetime64('1990-03-15'): 1, np.datetime64('2015-03-15'): 2} @@ -817,39 +198,6 @@ def test_dict_compat(): assert (com._dict_compat(data_unchanged) == data_unchanged) -def test_is_timedelta(): - assert (com.is_timedelta64_dtype('timedelta64')) - assert (com.is_timedelta64_dtype('timedelta64[ns]')) - assert (not com.is_timedelta64_ns_dtype('timedelta64')) - assert (com.is_timedelta64_ns_dtype('timedelta64[ns]')) - - tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') - assert (com.is_timedelta64_dtype(tdi)) - assert (com.is_timedelta64_ns_dtype(tdi)) - assert (com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) - # Conversion to Int64Index: - assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) - assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))) - - -def test_array_equivalent_compat(): - # see gh-13388 - m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - n = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - assert (com.array_equivalent(m, n, strict_nan=True)) - assert (com.array_equivalent(m, n, strict_nan=False)) - - m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - n = np.array([(1, 2), (4, 3)], dtype=[('a', int), ('b', float)]) - assert (not com.array_equivalent(m, n, strict_nan=True)) - assert (not com.array_equivalent(m, n, strict_nan=False)) - - m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) - n = np.array([(1, 2), (3, 4)], dtype=[('b', int), ('a', float)]) - assert (not com.array_equivalent(m, n, strict_nan=True)) - assert (not com.array_equivalent(m, n, strict_nan=False)) - - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 2f4c2b414cc30..a53e79439b017 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -7,12 +7,12 @@ from numpy import nan import pandas as pd +from pandas.types.common import is_scalar from pandas import (Index, Series, DataFrame, Panel, isnull, date_range, period_range, Panel4D) from pandas.core.index import MultiIndex import pandas.formats.printing as printing -import pandas.lib as lib from pandas.compat import range, zip, PY3 from pandas import compat @@ -53,7 +53,7 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): if isinstance(shape, int): shape = tuple([shape] * self._ndim) if value is not None: - if lib.isscalar(value): + if is_scalar(value): if value == 'empty': arr = None diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index bd19a83ce2b64..3a5b0117948b7 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -9,6 +9,7 @@ from datetime import datetime, date +from pandas.types.common import is_list_like import pandas as pd from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, bdate_range) @@ -16,7 +17,6 @@ iteritems, OrderedDict, PY3) from pandas.util.decorators import cache_readonly from pandas.formats.printing import pprint_thing -import pandas.core.common as com import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, assert_is_valid_plot_return_object, slow) @@ -157,7 +157,7 @@ def _check_visible(self, collections, visible=True): """ from matplotlib.collections import Collection if not isinstance(collections, - Collection) and not com.is_list_like(collections): + Collection) and not is_list_like(collections): collections = [collections] for patch in collections: @@ -242,7 +242,7 @@ def _check_text_labels(self, texts, expected): expected : str or list-like which has the same length as texts expected text label, or its list """ - if not com.is_list_like(texts): + if not is_list_like(texts): self.assertEqual(texts.get_text(), expected) else: labels = [t.get_text() for t in texts] diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index a52f22fe2032a..57d43f22757ea 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -5,7 +5,8 @@ from datetime import datetime from numpy import nan -from pandas import date_range, bdate_range, Timestamp +from pandas.types.common import _ensure_platform_int +from pandas import date_range, bdate_range, Timestamp, isnull from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.api import Categorical, DataFrame from pandas.core.common import UnsupportedFunctionCall @@ -163,9 +164,9 @@ def test_first_last_nth(self): grouped['B'].nth(0) self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - self.assertTrue(com.isnull(grouped['B'].first()['foo'])) - self.assertTrue(com.isnull(grouped['B'].last()['foo'])) - self.assertTrue(com.isnull(grouped['B'].nth(0)['foo'])) + self.assertTrue(isnull(grouped['B'].first()['foo'])) + self.assertTrue(isnull(grouped['B'].last()['foo'])) + self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) # v0.14.0 whatsnew df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) @@ -1079,8 +1080,9 @@ def test_transform_fast(self): grp = df.groupby('id')['val'] values = np.repeat(grp.mean().values, - com._ensure_platform_int(grp.count().values)) + _ensure_platform_int(grp.count().values)) expected = pd.Series(values, index=df.index, name='val') + result = grp.transform(np.mean) assert_series_equal(result, expected) diff --git a/pandas/tests/test_infer_and_convert.py b/pandas/tests/test_infer_and_convert.py deleted file mode 100644 index 5f016322f101f..0000000000000 --- a/pandas/tests/test_infer_and_convert.py +++ /dev/null @@ -1,653 +0,0 @@ -# -*- coding: utf-8 -*- - -from datetime import datetime, timedelta, date, time - -import numpy as np -import pandas as pd -import pandas.lib as lib -import pandas.util.testing as tm -from pandas import Index - -from pandas.compat import long, u, PY2 - - -class TestInference(tm.TestCase): - - def test_infer_dtype_bytes(self): - compare = 'string' if PY2 else 'bytes' - - # string array of bytes - arr = np.array(list('abc'), dtype='S1') - self.assertEqual(pd.lib.infer_dtype(arr), compare) - - # object array of bytes - arr = arr.astype(object) - self.assertEqual(pd.lib.infer_dtype(arr), compare) - - def test_isinf_scalar(self): - # GH 11352 - self.assertTrue(lib.isposinf_scalar(float('inf'))) - self.assertTrue(lib.isposinf_scalar(np.inf)) - self.assertFalse(lib.isposinf_scalar(-np.inf)) - self.assertFalse(lib.isposinf_scalar(1)) - self.assertFalse(lib.isposinf_scalar('a')) - - self.assertTrue(lib.isneginf_scalar(float('-inf'))) - self.assertTrue(lib.isneginf_scalar(-np.inf)) - self.assertFalse(lib.isneginf_scalar(np.inf)) - self.assertFalse(lib.isneginf_scalar(1)) - self.assertFalse(lib.isneginf_scalar('a')) - - def test_maybe_convert_numeric_infinities(self): - # see gh-13274 - infinities = ['inf', 'inF', 'iNf', 'Inf', - 'iNF', 'InF', 'INf', 'INF'] - na_values = set(['', 'NULL', 'nan']) - - pos = np.array(['inf'], dtype=np.float64) - neg = np.array(['-inf'], dtype=np.float64) - - msg = "Unable to parse string" - - for infinity in infinities: - for maybe_int in (True, False): - out = lib.maybe_convert_numeric( - np.array([infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['-' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, neg) - - out = lib.maybe_convert_numeric( - np.array([u(infinity)], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - out = lib.maybe_convert_numeric( - np.array(['+' + infinity], dtype=object), - na_values, maybe_int) - tm.assert_numpy_array_equal(out, pos) - - # too many characters - with tm.assertRaisesRegexp(ValueError, msg): - lib.maybe_convert_numeric( - np.array(['foo_' + infinity], dtype=object), - na_values, maybe_int) - - def test_maybe_convert_numeric_post_floatify_nan(self): - # see gh-13314 - data = np.array(['1.200', '-999.000', '4.500'], dtype=object) - expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) - nan_values = set([-999, -999.0]) - - for coerce_type in (True, False): - out = lib.maybe_convert_numeric(data, nan_values, coerce_type) - tm.assert_numpy_array_equal(out, expected) - - def test_convert_infs(self): - arr = np.array(['inf', 'inf', 'inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) - - arr = np.array(['-inf', '-inf', '-inf'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False) - self.assertTrue(result.dtype == np.float64) - - def test_scientific_no_exponent(self): - # See PR 12215 - arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') - result = lib.maybe_convert_numeric(arr, set(), False, True) - self.assertTrue(np.all(np.isnan(result))) - - def test_convert_non_hashable(self): - # GH13324 - # make sure that we are handing non-hashables - arr = np.array([[10.0, 2], 1.0, 'apple']) - result = lib.maybe_convert_numeric(arr, set(), False, True) - tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) - - -class TestTypeInference(tm.TestCase): - _multiprocess_can_split_ = True - - def test_length_zero(self): - result = lib.infer_dtype(np.array([], dtype='i4')) - self.assertEqual(result, 'integer') - - result = lib.infer_dtype([]) - self.assertEqual(result, 'empty') - - def test_integers(self): - arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='i4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'integer') - - def test_bools(self): - arr = np.array([True, False, True, True, True], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - arr = np.array([True, False, True, 'foo'], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - arr = np.array([True, False, True], dtype=bool) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'boolean') - - def test_floats(self): - arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], - dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed-integer') - - arr = np.array([1, 2, 3, 4, 5], dtype='f4') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - arr = np.array([1, 2, 3, 4, 5], dtype='f8') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'floating') - - def test_string(self): - pass - - def test_unicode(self): - pass - - def test_datetime(self): - - dates = [datetime(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'datetime64') - - def test_infer_dtype_datetime(self): - - arr = np.array([pd.Timestamp('2011-01-01'), - pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.datetime64('2011-01-01'), - np.datetime64('2011-01-01')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, np.datetime64('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([n, datetime(2011, 1, 1)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, pd.Timestamp('2011-01-02'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([n, np.datetime64('2011-01-02'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([n, datetime(2011, 1, 1), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # different type of nat - arr = np.array([np.timedelta64('nat'), - np.datetime64('2011-01-02')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.datetime64('2011-01-02'), - np.timedelta64('nat')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - # mixed datetime - arr = np.array([datetime(2011, 1, 1), - pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # should be datetime? - arr = np.array([np.datetime64('2011-01-01'), - pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([pd.Timestamp('2011-01-02'), - np.datetime64('2011-01-01')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed-integer') - - arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_timedelta(self): - - arr = np.array([pd.Timedelta('1 days'), - pd.Timedelta('2 days')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([np.timedelta64(1, 'D'), - np.timedelta64(2, 'D')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([timedelta(1), timedelta(2)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - # starts with nan - for n in [pd.NaT, np.nan]: - arr = np.array([n, pd.Timedelta('1 days')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, np.timedelta64(1, 'D')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, timedelta(1)]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, pd.Timedelta('1 days'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, np.timedelta64(1, 'D'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([n, timedelta(1), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - # different type of nat - arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_infer_dtype_all_nan_nat_like(self): - arr = np.array([np.nan, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'floating') - - # nan and None mix are result in mixed - arr = np.array([np.nan, np.nan, None]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([None, np.nan, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - # pd.NaT - arr = np.array([pd.NaT]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([pd.NaT, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.nan, pd.NaT]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([np.nan, pd.NaT, np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - arr = np.array([None, pd.NaT, None]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime') - - # np.datetime64(nat) - arr = np.array([np.datetime64('nat')]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - for n in [np.nan, pd.NaT, None]: - arr = np.array([n, np.datetime64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'datetime64') - - arr = np.array([np.timedelta64('nat')], dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - for n in [np.nan, pd.NaT, None]: - arr = np.array([n, np.timedelta64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) - self.assertEqual(pd.lib.infer_dtype(arr), 'timedelta') - - # datetime / timedelta mixed - arr = np.array([pd.NaT, np.datetime64('nat'), - np.timedelta64('nat'), np.nan]) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], - dtype=object) - self.assertEqual(pd.lib.infer_dtype(arr), 'mixed') - - def test_is_datetimelike_array_all_nan_nat_like(self): - arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) - self.assertTrue(pd.lib.is_datetime_array(arr)) - self.assertTrue(pd.lib.is_datetime64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_array(arr)) - self.assertFalse(pd.lib.is_timedelta64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) - self.assertFalse(pd.lib.is_datetime_array(arr)) - self.assertFalse(pd.lib.is_datetime64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_array(arr)) - self.assertTrue(pd.lib.is_timedelta64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), - np.timedelta64('nat')]) - self.assertFalse(pd.lib.is_datetime_array(arr)) - self.assertFalse(pd.lib.is_datetime64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_array(arr)) - self.assertFalse(pd.lib.is_timedelta64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, pd.NaT]) - self.assertTrue(pd.lib.is_datetime_array(arr)) - self.assertTrue(pd.lib.is_datetime64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_array(arr)) - self.assertTrue(pd.lib.is_timedelta64_array(arr)) - self.assertTrue(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - arr = np.array([np.nan, np.nan], dtype=object) - self.assertFalse(pd.lib.is_datetime_array(arr)) - self.assertFalse(pd.lib.is_datetime64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_array(arr)) - self.assertFalse(pd.lib.is_timedelta64_array(arr)) - self.assertFalse(pd.lib.is_timedelta_or_timedelta64_array(arr)) - - def test_date(self): - - dates = [date(2012, 1, x) for x in range(1, 20)] - index = Index(dates) - self.assertEqual(index.inferred_type, 'date') - - def test_to_object_array_tuples(self): - r = (5, 6) - values = [r] - result = lib.to_object_array_tuples(values) - - try: - # make sure record array works - from collections import namedtuple - record = namedtuple('record', 'x y') - r = record(5, 6) - values = [r] - result = lib.to_object_array_tuples(values) # noqa - except ImportError: - pass - - def test_to_object_array_width(self): - # see gh-13320 - rows = [[1, 2, 3], [4, 5, 6]] - - expected = np.array(rows, dtype=object) - out = lib.to_object_array(rows) - tm.assert_numpy_array_equal(out, expected) - - expected = np.array(rows, dtype=object) - out = lib.to_object_array(rows, min_width=1) - tm.assert_numpy_array_equal(out, expected) - - expected = np.array([[1, 2, 3, None, None], - [4, 5, 6, None, None]], dtype=object) - out = lib.to_object_array(rows, min_width=5) - tm.assert_numpy_array_equal(out, expected) - - def test_object(self): - - # GH 7431 - # cannot infer more than this as only a single element - arr = np.array([None], dtype='O') - result = lib.infer_dtype(arr) - self.assertEqual(result, 'mixed') - - def test_categorical(self): - - # GH 8974 - from pandas import Categorical, Series - arr = Categorical(list('abc')) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) - result = lib.infer_dtype(arr) - self.assertEqual(result, 'categorical') - - result = lib.infer_dtype(Series(arr)) - self.assertEqual(result, 'categorical') - - def test_is_period(self): - self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) - self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) - self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) - self.assertFalse(lib.is_period(1)) - self.assertFalse(lib.is_period(np.nan)) - - -class TestConvert(tm.TestCase): - - def test_convert_objects(self): - arr = np.array(['a', 'b', np.nan, np.nan, 'd', 'e', 'f'], dtype='O') - result = lib.maybe_convert_objects(arr) - self.assertTrue(result.dtype == np.object_) - - def test_convert_objects_ints(self): - # test that we can detect many kinds of integers - dtypes = ['i1', 'i2', 'i4', 'i8', 'u1', 'u2', 'u4', 'u8'] - - for dtype_str in dtypes: - arr = np.array(list(np.arange(20, dtype=dtype_str)), dtype='O') - self.assertTrue(arr[0].dtype == np.dtype(dtype_str)) - result = lib.maybe_convert_objects(arr) - self.assertTrue(issubclass(result.dtype.type, np.integer)) - - def test_convert_objects_complex_number(self): - for dtype in np.sctypes['complex']: - arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') - self.assertTrue(arr[0].dtype == np.dtype(dtype)) - result = lib.maybe_convert_objects(arr) - self.assertTrue(issubclass(result.dtype.type, np.complexfloating)) - - -class Testisscalar(tm.TestCase): - - def test_isscalar_builtin_scalars(self): - self.assertTrue(lib.isscalar(None)) - self.assertTrue(lib.isscalar(True)) - self.assertTrue(lib.isscalar(False)) - self.assertTrue(lib.isscalar(0.)) - self.assertTrue(lib.isscalar(np.nan)) - self.assertTrue(lib.isscalar('foobar')) - self.assertTrue(lib.isscalar(b'foobar')) - self.assertTrue(lib.isscalar(u('efoobar'))) - self.assertTrue(lib.isscalar(datetime(2014, 1, 1))) - self.assertTrue(lib.isscalar(date(2014, 1, 1))) - self.assertTrue(lib.isscalar(time(12, 0))) - self.assertTrue(lib.isscalar(timedelta(hours=1))) - self.assertTrue(lib.isscalar(pd.NaT)) - - def test_isscalar_builtin_nonscalars(self): - self.assertFalse(lib.isscalar({})) - self.assertFalse(lib.isscalar([])) - self.assertFalse(lib.isscalar([1])) - self.assertFalse(lib.isscalar(())) - self.assertFalse(lib.isscalar((1, ))) - self.assertFalse(lib.isscalar(slice(None))) - self.assertFalse(lib.isscalar(Ellipsis)) - - def test_isscalar_numpy_array_scalars(self): - self.assertTrue(lib.isscalar(np.int64(1))) - self.assertTrue(lib.isscalar(np.float64(1.))) - self.assertTrue(lib.isscalar(np.int32(1))) - self.assertTrue(lib.isscalar(np.object_('foobar'))) - self.assertTrue(lib.isscalar(np.str_('foobar'))) - self.assertTrue(lib.isscalar(np.unicode_(u('foobar')))) - self.assertTrue(lib.isscalar(np.bytes_(b'foobar'))) - self.assertTrue(lib.isscalar(np.datetime64('2014-01-01'))) - self.assertTrue(lib.isscalar(np.timedelta64(1, 'h'))) - - def test_isscalar_numpy_zerodim_arrays(self): - for zerodim in [np.array(1), np.array('foobar'), - np.array(np.datetime64('2014-01-01')), - np.array(np.timedelta64(1, 'h')), - np.array(np.datetime64('NaT'))]: - self.assertFalse(lib.isscalar(zerodim)) - self.assertTrue(lib.isscalar(lib.item_from_zerodim(zerodim))) - - def test_isscalar_numpy_arrays(self): - self.assertFalse(lib.isscalar(np.array([]))) - self.assertFalse(lib.isscalar(np.array([[]]))) - self.assertFalse(lib.isscalar(np.matrix('1; 2'))) - - def test_isscalar_pandas_scalars(self): - self.assertTrue(lib.isscalar(pd.Timestamp('2014-01-01'))) - self.assertTrue(lib.isscalar(pd.Timedelta(hours=1))) - self.assertTrue(lib.isscalar(pd.Period('2014-01-01'))) - - def test_lisscalar_pandas_containers(self): - self.assertFalse(lib.isscalar(pd.Series())) - self.assertFalse(lib.isscalar(pd.Series([1]))) - self.assertFalse(lib.isscalar(pd.DataFrame())) - self.assertFalse(lib.isscalar(pd.DataFrame([[1]]))) - self.assertFalse(lib.isscalar(pd.Panel())) - self.assertFalse(lib.isscalar(pd.Panel([[[1]]]))) - self.assertFalse(lib.isscalar(pd.Index([]))) - self.assertFalse(lib.isscalar(pd.Index([1]))) - - -class TestParseSQL(tm.TestCase): - - def test_convert_sql_column_floats(self): - arr = np.array([1.5, None, 3, 4.2], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_strings(self): - arr = np.array(['1.5', None, '3', '4.2'], dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_unicode(self): - arr = np.array([u('1.5'), None, u('3'), u('4.2')], - dtype=object) - result = lib.convert_sql_column(arr) - expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], - dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_ints(self): - arr = np.array([1, 2, 3, 4], dtype='O') - arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') - result = lib.convert_sql_column(arr) - result2 = lib.convert_sql_column(arr2) - expected = np.array([1, 2, 3, 4], dtype='i8') - self.assert_numpy_array_equal(result, expected) - self.assert_numpy_array_equal(result2, expected) - - arr = np.array([1, 2, 3, None, 4], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_longs(self): - arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, 4], dtype='i8') - self.assert_numpy_array_equal(result, expected) - - arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_bools(self): - arr = np.array([True, False, True, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, True, False], dtype=bool) - self.assert_numpy_array_equal(result, expected) - - arr = np.array([True, False, None, False], dtype='O') - result = lib.convert_sql_column(arr) - expected = np.array([True, False, np.nan, False], dtype=object) - self.assert_numpy_array_equal(result, expected) - - def test_convert_sql_column_decimals(self): - from decimal import Decimal - arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) - result = lib.convert_sql_column(arr) - expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') - self.assert_numpy_array_equal(result, expected) - - def test_convert_downcast_int64(self): - from pandas.parser import na_values - - arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) - expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) - - # default argument - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - result = lib.downcast_int64(arr, na_values, use_unsigned=False) - self.assert_numpy_array_equal(result, expected) - - expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8) - result = lib.downcast_int64(arr, na_values, use_unsigned=True) - self.assert_numpy_array_equal(result, expected) - - # still cast to int8 despite use_unsigned=True - # because of the negative number as an element - arr = np.array([1, 2, -7, 8, 10], dtype=np.int64) - expected = np.array([1, 2, -7, 8, 10], dtype=np.int8) - result = lib.downcast_int64(arr, na_values, use_unsigned=True) - self.assert_numpy_array_equal(result, expected) - - arr = np.array([1, 2, 7, 8, 300], dtype=np.int64) - expected = np.array([1, 2, 7, 8, 300], dtype=np.int16) - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - int8_na = na_values[np.int8] - int64_na = na_values[np.int64] - arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64) - expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) - result = lib.downcast_int64(arr, na_values) - self.assert_numpy_array_equal(result, expected) - - -if __name__ == '__main__': - import nose - - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index 10a6bb5c75b01..84d7226f1b2f5 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -222,6 +222,7 @@ def test_duplicated_with_nas(): expected = trues + trues assert (np.array_equal(result, expected)) + if __name__ == '__main__': import nose diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 1b1db90ea713d..f3b0becccf596 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,6 +10,7 @@ from pandas.core.index import Index, MultiIndex from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp +from pandas.types.common import is_float_dtype, is_integer_dtype from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp) import pandas.core.common as com @@ -787,8 +788,8 @@ def test_delevel_infer_dtype(self): df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() - self.assertTrue(com.is_integer_dtype(deleveled['prm1'])) - self.assertTrue(com.is_float_dtype(deleveled['prm2'])) + self.assertTrue(is_integer_dtype(deleveled['prm1'])) + self.assertTrue(is_float_dtype(deleveled['prm2'])) def test_reset_index_with_drop(self): deleveled = self.ymd.reset_index(drop=True) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 904bedde03312..eeeddc278c714 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -5,8 +5,8 @@ import warnings import numpy as np -from pandas import Series -from pandas.core.common import isnull, is_integer_dtype +from pandas import Series, isnull +from pandas.types.common import is_integer_dtype import pandas.core.nanops as nanops import pandas.util.testing as tm diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index b1f09ad2685e3..f2e13867d3bf0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -10,12 +10,13 @@ import numpy as np import pandas as pd +from pandas.types.common import is_float_dtype from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex from pandas.core.datetools import bday from pandas.core.nanops import nanall, nanany from pandas.core.panel import Panel from pandas.core.series import remove_na -import pandas.core.common as com + from pandas.formats.printing import pprint_thing from pandas import compat from pandas.compat import range, lrange, StringIO, OrderedDict, signature @@ -903,7 +904,7 @@ def test_set_value(self): self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) - self.assertTrue(com.is_float_dtype(res3['ItemE'].values)) + self.assertTrue(is_float_dtype(res3['ItemE'].values)) with tm.assertRaisesRegexp(TypeError, "There must be an argument for each axis" " plus the value provided"): diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 607048df29faa..16a55c7ec4aeb 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -6,12 +6,12 @@ import numpy as np +from pandas.types.common import is_float_dtype from pandas import Series, Index, isnull, notnull from pandas.core.datetools import bday from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D from pandas.core.series import remove_na -import pandas.core.common as com from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, @@ -595,7 +595,7 @@ def test_set_value(self): self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) - self.assertTrue(com.is_float_dtype(res3['l4'].values)) + self.assertTrue(is_float_dtype(res3['l4'].values)) class TestPanel4d(tm.TestCase, CheckIndexing, SafeForSparse, diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 67d171bb8efda..4d23bed620265 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -12,8 +12,7 @@ from pandas.compat import range, u import pandas.compat as compat -from pandas import (Index, Series, DataFrame, isnull, MultiIndex) -import pandas.core.common as com +from pandas import (Index, Series, DataFrame, isnull, MultiIndex, notnull) from pandas.util.testing import assert_series_equal import pandas.util.testing as tm @@ -1350,7 +1349,7 @@ def test_len(self): values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo']) result = values.str.len() - exp = values.map(lambda x: len(x) if com.notnull(x) else NA) + exp = values.map(lambda x: len(x) if notnull(x) else NA) tm.assert_series_equal(result, exp) # mixed @@ -1368,7 +1367,7 @@ def test_len(self): 'fooooooo')]) result = values.str.len() - exp = values.map(lambda x: len(x) if com.notnull(x) else NA) + exp = values.map(lambda x: len(x) if notnull(x) else NA) tm.assert_series_equal(result, exp) def test_findall(self): diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py new file mode 100644 index 0000000000000..dd3f07ea8157f --- /dev/null +++ b/pandas/tests/types/test_cast.py @@ -0,0 +1,193 @@ +# -*- coding: utf-8 -*- + +""" +These test the private routines in types/cast.py + +""" + + +import nose +from datetime import datetime +import numpy as np + +from pandas import Timedelta, Timestamp +from pandas.types.cast import (_possibly_downcast_to_dtype, + _possibly_convert_objects, + _infer_dtype_from_scalar, + _maybe_convert_string_to_object, + _maybe_convert_scalar) +from pandas.util import testing as tm + +_multiprocess_can_split_ = True + + +def test_downcast_conv(): + # test downcasting + + arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) + result = _possibly_downcast_to_dtype(arr, 'infer') + assert (np.array_equal(result, arr)) + + arr = np.array([8., 8., 8., 8., 8.9999999999995]) + result = _possibly_downcast_to_dtype(arr, 'infer') + expected = np.array([8, 8, 8, 8, 9]) + assert (np.array_equal(result, expected)) + + arr = np.array([8., 8., 8., 8., 9.0000000000005]) + result = _possibly_downcast_to_dtype(arr, 'infer') + expected = np.array([8, 8, 8, 8, 9]) + assert (np.array_equal(result, expected)) + + # conversions + + expected = np.array([1, 2]) + for dtype in [np.float64, object, np.int64]: + arr = np.array([1.0, 2.0], dtype=dtype) + result = _possibly_downcast_to_dtype(arr, 'infer') + tm.assert_almost_equal(result, expected, check_dtype=False) + + for dtype in [np.float64, object]: + expected = np.array([1.0, 2.0, np.nan], dtype=dtype) + arr = np.array([1.0, 2.0, np.nan], dtype=dtype) + result = _possibly_downcast_to_dtype(arr, 'infer') + tm.assert_almost_equal(result, expected) + + # empties + for dtype in [np.int32, np.float64, np.float32, np.bool_, + np.int64, object]: + arr = np.array([], dtype=dtype) + result = _possibly_downcast_to_dtype(arr, 'int64') + tm.assert_almost_equal(result, np.array([], dtype=np.int64)) + assert result.dtype == np.int64 + + +class TestInferDtype(tm.TestCase): + + def test_infer_dtype_from_scalar(self): + # Test that _infer_dtype_from_scalar is returning correct dtype for int + # and float. + + for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, + np.int32, np.uint64, np.int64]: + data = dtypec(12) + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, type(data)) + + data = 12 + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.int64) + + for dtypec in [np.float16, np.float32, np.float64]: + data = dtypec(12) + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, dtypec) + + data = np.float(12) + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.float64) + + for data in [True, False]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.bool_) + + for data in [np.complex64(1), np.complex128(1)]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.complex_) + + import datetime + for data in [np.datetime64(1, 'ns'), Timestamp(1), + datetime.datetime(2000, 1, 1, 0, 0)]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, 'M8[ns]') + + for data in [np.timedelta64(1, 'ns'), Timedelta(1), + datetime.timedelta(1)]: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, 'm8[ns]') + + for data in [datetime.date(2000, 1, 1), + Timestamp(1, tz='US/Eastern'), 'foo']: + dtype, val = _infer_dtype_from_scalar(data) + self.assertEqual(dtype, np.object_) + + +class TestMaybe(tm.TestCase): + + def test_maybe_convert_string_to_array(self): + result = _maybe_convert_string_to_object('x') + tm.assert_numpy_array_equal(result, np.array(['x'], dtype=object)) + self.assertTrue(result.dtype == object) + + result = _maybe_convert_string_to_object(1) + self.assertEqual(result, 1) + + arr = np.array(['x', 'y'], dtype=str) + result = _maybe_convert_string_to_object(arr) + tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) + self.assertTrue(result.dtype == object) + + # unicode + arr = np.array(['x', 'y']).astype('U') + result = _maybe_convert_string_to_object(arr) + tm.assert_numpy_array_equal(result, np.array(['x', 'y'], dtype=object)) + self.assertTrue(result.dtype == object) + + # object + arr = np.array(['x', 2], dtype=object) + result = _maybe_convert_string_to_object(arr) + tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) + self.assertTrue(result.dtype == object) + + def test_maybe_convert_scalar(self): + + # pass thru + result = _maybe_convert_scalar('x') + self.assertEqual(result, 'x') + result = _maybe_convert_scalar(np.array([1])) + self.assertEqual(result, np.array([1])) + + # leave scalar dtype + result = _maybe_convert_scalar(np.int64(1)) + self.assertEqual(result, np.int64(1)) + result = _maybe_convert_scalar(np.int32(1)) + self.assertEqual(result, np.int32(1)) + result = _maybe_convert_scalar(np.float32(1)) + self.assertEqual(result, np.float32(1)) + result = _maybe_convert_scalar(np.int64(1)) + self.assertEqual(result, np.float64(1)) + + # coerce + result = _maybe_convert_scalar(1) + self.assertEqual(result, np.int64(1)) + result = _maybe_convert_scalar(1.0) + self.assertEqual(result, np.float64(1)) + result = _maybe_convert_scalar(Timestamp('20130101')) + self.assertEqual(result, Timestamp('20130101').value) + result = _maybe_convert_scalar(datetime(2013, 1, 1)) + self.assertEqual(result, Timestamp('20130101').value) + result = _maybe_convert_scalar(Timedelta('1 day 1 min')) + self.assertEqual(result, Timedelta('1 day 1 min').value) + + +class TestConvert(tm.TestCase): + + def test_possibly_convert_objects_copy(self): + values = np.array([1, 2]) + + out = _possibly_convert_objects(values, copy=False) + self.assertTrue(values is out) + + out = _possibly_convert_objects(values, copy=True) + self.assertTrue(values is not out) + + values = np.array(['apply', 'banana']) + out = _possibly_convert_objects(values, copy=False) + self.assertTrue(values is out) + + out = _possibly_convert_objects(values, copy=True) + self.assertTrue(values is not out) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py new file mode 100644 index 0000000000000..0a586410ad5a0 --- /dev/null +++ b/pandas/tests/types/test_common.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +import nose +import numpy as np + +from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype +from pandas.types.common import pandas_dtype + +_multiprocess_can_split_ = True + + +def test_pandas_dtype(): + + assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype( + 'datetime64[ns, US/Eastern]') + assert pandas_dtype('category') == CategoricalDtype() + for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: + assert pandas_dtype(dtype) == np.dtype(dtype) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index d48b9baf64777..1743e80ae01a9 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -4,13 +4,14 @@ import nose import numpy as np from pandas import Series, Categorical, date_range -import pandas.core.common as com -from pandas.types.api import CategoricalDtype -from pandas.core.common import (is_categorical_dtype, - is_categorical, DatetimeTZDtype, - is_datetime64tz_dtype, is_datetimetz, - is_dtype_equal, is_datetime64_ns_dtype, - is_datetime64_dtype) + +from pandas.types.dtypes import CategoricalDtype +from pandas.types.common import (is_categorical_dtype, + is_categorical, DatetimeTZDtype, + is_datetime64tz_dtype, is_datetimetz, + is_dtype_equal, is_datetime64_ns_dtype, + is_datetime64_dtype, + _coerce_to_dtype) import pandas.util.testing as tm _multiprocess_can_split_ = True @@ -124,9 +125,9 @@ def test_subclass(self): self.assertTrue(issubclass(type(a), type(b))) def test_coerce_to_dtype(self): - self.assertEqual(com._coerce_to_dtype('datetime64[ns, US/Eastern]'), + self.assertEqual(_coerce_to_dtype('datetime64[ns, US/Eastern]'), DatetimeTZDtype('ns', 'US/Eastern')) - self.assertEqual(com._coerce_to_dtype('datetime64[ns, Asia/Tokyo]'), + self.assertEqual(_coerce_to_dtype('datetime64[ns, Asia/Tokyo]'), DatetimeTZDtype('ns', 'Asia/Tokyo')) def test_compat(self): diff --git a/pandas/tests/types/test_generic.py b/pandas/tests/types/test_generic.py index 5549a3a376992..89913de6f6069 100644 --- a/pandas/tests/types/test_generic.py +++ b/pandas/tests/types/test_generic.py @@ -3,8 +3,8 @@ import nose import numpy as np import pandas as pd -import pandas.core.common as com import pandas.util.testing as tm +from pandas.types import generic as gt _multiprocess_can_split_ = True @@ -22,24 +22,24 @@ class TestABCClasses(tm.TestCase): sparse_array = pd.SparseArray(np.random.randn(10)) def test_abc_types(self): - self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndex) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCInt64Index) - self.assertIsInstance(pd.Float64Index([1, 2, 3]), com.ABCFloat64Index) - self.assertIsInstance(self.multi_index, com.ABCMultiIndex) - self.assertIsInstance(self.datetime_index, com.ABCDatetimeIndex) - self.assertIsInstance(self.timedelta_index, com.ABCTimedeltaIndex) - self.assertIsInstance(self.period_index, com.ABCPeriodIndex) + self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndex) + self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCInt64Index) + self.assertIsInstance(pd.Float64Index([1, 2, 3]), gt.ABCFloat64Index) + self.assertIsInstance(self.multi_index, gt.ABCMultiIndex) + self.assertIsInstance(self.datetime_index, gt.ABCDatetimeIndex) + self.assertIsInstance(self.timedelta_index, gt.ABCTimedeltaIndex) + self.assertIsInstance(self.period_index, gt.ABCPeriodIndex) self.assertIsInstance(self.categorical_df.index, - com.ABCCategoricalIndex) - self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndexClass) - self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCIndexClass) - self.assertIsInstance(pd.Series([1, 2, 3]), com.ABCSeries) - self.assertIsInstance(self.df, com.ABCDataFrame) - self.assertIsInstance(self.df.to_panel(), com.ABCPanel) - self.assertIsInstance(self.sparse_series, com.ABCSparseSeries) - self.assertIsInstance(self.sparse_array, com.ABCSparseArray) - self.assertIsInstance(self.categorical, com.ABCCategorical) - self.assertIsInstance(pd.Period('2012', freq='A-DEC'), com.ABCPeriod) + gt.ABCCategoricalIndex) + self.assertIsInstance(pd.Index(['a', 'b', 'c']), gt.ABCIndexClass) + self.assertIsInstance(pd.Int64Index([1, 2, 3]), gt.ABCIndexClass) + self.assertIsInstance(pd.Series([1, 2, 3]), gt.ABCSeries) + self.assertIsInstance(self.df, gt.ABCDataFrame) + self.assertIsInstance(self.df.to_panel(), gt.ABCPanel) + self.assertIsInstance(self.sparse_series, gt.ABCSparseSeries) + self.assertIsInstance(self.sparse_array, gt.ABCSparseArray) + self.assertIsInstance(self.categorical, gt.ABCCategorical) + self.assertIsInstance(pd.Period('2012', freq='A-DEC'), gt.ABCPeriod) if __name__ == '__main__': diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py new file mode 100644 index 0000000000000..34d10ee9dfa42 --- /dev/null +++ b/pandas/tests/types/test_inference.py @@ -0,0 +1,820 @@ +# -*- coding: utf-8 -*- + +""" +These the test the public routines exposed in types/common.py +related to inference and not otherwise tested in types/test_common.py + +""" + +import nose +import collections +import re +from datetime import datetime, date, timedelta, time +import numpy as np + +import pandas as pd +from pandas import lib, tslib +from pandas import (Series, Index, DataFrame, Timedelta, + DatetimeIndex, TimedeltaIndex, Timestamp, + Panel, Period) +from pandas.compat import u, PY2, lrange +from pandas.types import inference +from pandas.types.common import (is_timedelta64_dtype, + is_timedelta64_ns_dtype, + is_number, + is_integer, + is_float, + is_bool, + is_scalar, + _ensure_int32) +from pandas.types.missing import isnull +from pandas.util import testing as tm + +_multiprocess_can_split_ = True + + +def test_is_sequence(): + is_seq = inference.is_sequence + assert (is_seq((1, 2))) + assert (is_seq([1, 2])) + assert (not is_seq("abcd")) + assert (not is_seq(u("abcd"))) + assert (not is_seq(np.int64)) + + class A(object): + + def __getitem__(self): + return 1 + + assert (not is_seq(A())) + + +def test_is_list_like(): + passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), + Series([]), Series(['a']).str) + fails = (1, '2', object()) + + for p in passes: + assert inference.is_list_like(p) + + for f in fails: + assert not inference.is_list_like(f) + + +def test_is_dict_like(): + passes = [{}, {'A': 1}, Series([1])] + fails = ['1', 1, [1, 2], (1, 2), range(2), Index([1])] + + for p in passes: + assert inference.is_dict_like(p) + + for f in fails: + assert not inference.is_dict_like(f) + + +def test_is_named_tuple(): + passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) + fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) + + for p in passes: + assert inference.is_named_tuple(p) + + for f in fails: + assert not inference.is_named_tuple(f) + + +def test_is_hashable(): + + # all new-style classes are hashable by default + class HashableClass(object): + pass + + class UnhashableClass1(object): + __hash__ = None + + class UnhashableClass2(object): + + def __hash__(self): + raise TypeError("Not hashable") + + hashable = (1, + 3.14, + np.float64(3.14), + 'a', + tuple(), + (1, ), + HashableClass(), ) + not_hashable = ([], UnhashableClass1(), ) + abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) + + for i in hashable: + assert inference.is_hashable(i) + for i in not_hashable: + assert not inference.is_hashable(i) + for i in abc_hashable_not_really_hashable: + assert not inference.is_hashable(i) + + # numpy.array is no longer collections.Hashable as of + # https://github.com/numpy/numpy/pull/5326, just test + # is_hashable() + assert not inference.is_hashable(np.array([])) + + # old-style classes in Python 2 don't appear hashable to + # collections.Hashable but also seem to support hash() by default + if PY2: + + class OldStyleClass(): + pass + + c = OldStyleClass() + assert not isinstance(c, collections.Hashable) + assert inference.is_hashable(c) + hash(c) # this will not raise + + +def test_is_re(): + passes = re.compile('ad'), + fails = 'x', 2, 3, object() + + for p in passes: + assert inference.is_re(p) + + for f in fails: + assert not inference.is_re(f) + + +def test_is_recompilable(): + passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), + re.compile(r'')) + fails = 1, [], object() + + for p in passes: + assert inference.is_re_compilable(p) + + for f in fails: + assert not inference.is_re_compilable(f) + + +class TestInference(tm.TestCase): + + def test_infer_dtype_bytes(self): + compare = 'string' if PY2 else 'bytes' + + # string array of bytes + arr = np.array(list('abc'), dtype='S1') + self.assertEqual(lib.infer_dtype(arr), compare) + + # object array of bytes + arr = arr.astype(object) + self.assertEqual(lib.infer_dtype(arr), compare) + + def test_isinf_scalar(self): + # GH 11352 + self.assertTrue(lib.isposinf_scalar(float('inf'))) + self.assertTrue(lib.isposinf_scalar(np.inf)) + self.assertFalse(lib.isposinf_scalar(-np.inf)) + self.assertFalse(lib.isposinf_scalar(1)) + self.assertFalse(lib.isposinf_scalar('a')) + + self.assertTrue(lib.isneginf_scalar(float('-inf'))) + self.assertTrue(lib.isneginf_scalar(-np.inf)) + self.assertFalse(lib.isneginf_scalar(np.inf)) + self.assertFalse(lib.isneginf_scalar(1)) + self.assertFalse(lib.isneginf_scalar('a')) + + def test_maybe_convert_numeric_infinities(self): + # see gh-13274 + infinities = ['inf', 'inF', 'iNf', 'Inf', + 'iNF', 'InF', 'INf', 'INF'] + na_values = set(['', 'NULL', 'nan']) + + pos = np.array(['inf'], dtype=np.float64) + neg = np.array(['-inf'], dtype=np.float64) + + msg = "Unable to parse string" + + for infinity in infinities: + for maybe_int in (True, False): + out = lib.maybe_convert_numeric( + np.array([infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['-' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, neg) + + out = lib.maybe_convert_numeric( + np.array([u(infinity)], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + out = lib.maybe_convert_numeric( + np.array(['+' + infinity], dtype=object), + na_values, maybe_int) + tm.assert_numpy_array_equal(out, pos) + + # too many characters + with tm.assertRaisesRegexp(ValueError, msg): + lib.maybe_convert_numeric( + np.array(['foo_' + infinity], dtype=object), + na_values, maybe_int) + + def test_maybe_convert_numeric_post_floatify_nan(self): + # see gh-13314 + data = np.array(['1.200', '-999.000', '4.500'], dtype=object) + expected = np.array([1.2, np.nan, 4.5], dtype=np.float64) + nan_values = set([-999, -999.0]) + + for coerce_type in (True, False): + out = lib.maybe_convert_numeric(data, nan_values, coerce_type) + tm.assert_numpy_array_equal(out, expected) + + def test_convert_infs(self): + arr = np.array(['inf', 'inf', 'inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + self.assertTrue(result.dtype == np.float64) + + arr = np.array(['-inf', '-inf', '-inf'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False) + self.assertTrue(result.dtype == np.float64) + + def test_scientific_no_exponent(self): + # See PR 12215 + arr = np.array(['42E', '2E', '99e', '6e'], dtype='O') + result = lib.maybe_convert_numeric(arr, set(), False, True) + self.assertTrue(np.all(np.isnan(result))) + + def test_convert_non_hashable(self): + # GH13324 + # make sure that we are handing non-hashables + arr = np.array([[10.0, 2], 1.0, 'apple']) + result = lib.maybe_convert_numeric(arr, set(), False, True) + tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + + +class TestTypeInference(tm.TestCase): + _multiprocess_can_split_ = True + + def test_length_zero(self): + result = lib.infer_dtype(np.array([], dtype='i4')) + self.assertEqual(result, 'integer') + + result = lib.infer_dtype([]) + self.assertEqual(result, 'empty') + + def test_integers(self): + arr = np.array([1, 2, 3, np.int64(4), np.int32(5)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'integer') + + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed-integer') + + arr = np.array([1, 2, 3, 4, 5], dtype='i4') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'integer') + + def test_bools(self): + arr = np.array([True, False, True, True, True], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + arr = np.array([np.bool_(True), np.bool_(False)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + arr = np.array([True, False, True, 'foo'], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed') + + arr = np.array([True, False, True], dtype=bool) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'boolean') + + def test_floats(self): + arr = np.array([1., 2., 3., np.float64(4), np.float32(5)], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + arr = np.array([1, 2, 3, np.float64(4), np.float32(5), 'foo'], + dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed-integer') + + arr = np.array([1, 2, 3, 4, 5], dtype='f4') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + arr = np.array([1, 2, 3, 4, 5], dtype='f8') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'floating') + + def test_string(self): + pass + + def test_unicode(self): + pass + + def test_datetime(self): + + dates = [datetime(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + self.assertEqual(index.inferred_type, 'datetime64') + + def test_infer_dtype_datetime(self): + + arr = np.array([Timestamp('2011-01-01'), + Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.datetime64('2011-01-01'), + np.datetime64('2011-01-01')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([datetime(2011, 1, 1), datetime(2012, 2, 1)]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, np.datetime64('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([n, datetime(2011, 1, 1)]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, pd.Timestamp('2011-01-02'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([n, np.datetime64('2011-01-02'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([n, datetime(2011, 1, 1), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # different type of nat + arr = np.array([np.timedelta64('nat'), + np.datetime64('2011-01-02')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.datetime64('2011-01-02'), + np.timedelta64('nat')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + # mixed datetime + arr = np.array([datetime(2011, 1, 1), + pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # should be datetime? + arr = np.array([np.datetime64('2011-01-01'), + pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([pd.Timestamp('2011-01-02'), + np.datetime64('2011-01-01')]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1]) + self.assertEqual(lib.infer_dtype(arr), 'mixed-integer') + + arr = np.array([np.nan, pd.Timestamp('2011-01-02'), 1.1]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.nan, '2011-01-01', pd.Timestamp('2011-01-02')]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + def test_infer_dtype_timedelta(self): + + arr = np.array([pd.Timedelta('1 days'), + pd.Timedelta('2 days')]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([np.timedelta64(1, 'D'), + np.timedelta64(2, 'D')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([timedelta(1), timedelta(2)]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + # starts with nan + for n in [pd.NaT, np.nan]: + arr = np.array([n, Timedelta('1 days')]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, np.timedelta64(1, 'D')]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, timedelta(1)]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, pd.Timedelta('1 days'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, np.timedelta64(1, 'D'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([n, timedelta(1), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + # different type of nat + arr = np.array([np.datetime64('nat'), np.timedelta64(1, 'D')], + dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.timedelta64(1, 'D'), np.datetime64('nat')], + dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + def test_infer_dtype_all_nan_nat_like(self): + arr = np.array([np.nan, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'floating') + + # nan and None mix are result in mixed + arr = np.array([np.nan, np.nan, None]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([None, np.nan, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + # pd.NaT + arr = np.array([pd.NaT]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([pd.NaT, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.nan, pd.NaT]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([np.nan, pd.NaT, np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + arr = np.array([None, pd.NaT, None]) + self.assertEqual(lib.infer_dtype(arr), 'datetime') + + # np.datetime64(nat) + arr = np.array([np.datetime64('nat')]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.datetime64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([pd.NaT, n, np.datetime64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'datetime64') + + arr = np.array([np.timedelta64('nat')], dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + for n in [np.nan, pd.NaT, None]: + arr = np.array([n, np.timedelta64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + arr = np.array([pd.NaT, n, np.timedelta64('nat'), n]) + self.assertEqual(lib.infer_dtype(arr), 'timedelta') + + # datetime / timedelta mixed + arr = np.array([pd.NaT, np.datetime64('nat'), + np.timedelta64('nat'), np.nan]) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + arr = np.array([np.timedelta64('nat'), np.datetime64('nat')], + dtype=object) + self.assertEqual(lib.infer_dtype(arr), 'mixed') + + def test_is_datetimelike_array_all_nan_nat_like(self): + arr = np.array([np.nan, pd.NaT, np.datetime64('nat')]) + self.assertTrue(lib.is_datetime_array(arr)) + self.assertTrue(lib.is_datetime64_array(arr)) + self.assertFalse(lib.is_timedelta_array(arr)) + self.assertFalse(lib.is_timedelta64_array(arr)) + self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT, np.timedelta64('nat')]) + self.assertFalse(lib.is_datetime_array(arr)) + self.assertFalse(lib.is_datetime64_array(arr)) + self.assertTrue(lib.is_timedelta_array(arr)) + self.assertTrue(lib.is_timedelta64_array(arr)) + self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT, np.datetime64('nat'), + np.timedelta64('nat')]) + self.assertFalse(lib.is_datetime_array(arr)) + self.assertFalse(lib.is_datetime64_array(arr)) + self.assertFalse(lib.is_timedelta_array(arr)) + self.assertFalse(lib.is_timedelta64_array(arr)) + self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, pd.NaT]) + self.assertTrue(lib.is_datetime_array(arr)) + self.assertTrue(lib.is_datetime64_array(arr)) + self.assertTrue(lib.is_timedelta_array(arr)) + self.assertTrue(lib.is_timedelta64_array(arr)) + self.assertTrue(lib.is_timedelta_or_timedelta64_array(arr)) + + arr = np.array([np.nan, np.nan], dtype=object) + self.assertFalse(lib.is_datetime_array(arr)) + self.assertFalse(lib.is_datetime64_array(arr)) + self.assertFalse(lib.is_timedelta_array(arr)) + self.assertFalse(lib.is_timedelta64_array(arr)) + self.assertFalse(lib.is_timedelta_or_timedelta64_array(arr)) + + def test_date(self): + + dates = [date(2012, 1, x) for x in range(1, 20)] + index = Index(dates) + self.assertEqual(index.inferred_type, 'date') + + def test_to_object_array_tuples(self): + r = (5, 6) + values = [r] + result = lib.to_object_array_tuples(values) + + try: + # make sure record array works + from collections import namedtuple + record = namedtuple('record', 'x y') + r = record(5, 6) + values = [r] + result = lib.to_object_array_tuples(values) # noqa + except ImportError: + pass + + def test_object(self): + + # GH 7431 + # cannot infer more than this as only a single element + arr = np.array([None], dtype='O') + result = lib.infer_dtype(arr) + self.assertEqual(result, 'mixed') + + def test_to_object_array_width(self): + # see gh-13320 + rows = [[1, 2, 3], [4, 5, 6]] + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array(rows, dtype=object) + out = lib.to_object_array(rows, min_width=1) + tm.assert_numpy_array_equal(out, expected) + + expected = np.array([[1, 2, 3, None, None], + [4, 5, 6, None, None]], dtype=object) + out = lib.to_object_array(rows, min_width=5) + tm.assert_numpy_array_equal(out, expected) + + def test_is_period(self): + self.assertTrue(lib.is_period(pd.Period('2011-01', freq='M'))) + self.assertFalse(lib.is_period(pd.PeriodIndex(['2011-01'], freq='M'))) + self.assertFalse(lib.is_period(pd.Timestamp('2011-01'))) + self.assertFalse(lib.is_period(1)) + self.assertFalse(lib.is_period(np.nan)) + + def test_categorical(self): + + # GH 8974 + from pandas import Categorical, Series + arr = Categorical(list('abc')) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'categorical') + + result = lib.infer_dtype(Series(arr)) + self.assertEqual(result, 'categorical') + + arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) + result = lib.infer_dtype(arr) + self.assertEqual(result, 'categorical') + + result = lib.infer_dtype(Series(arr)) + self.assertEqual(result, 'categorical') + + +class TestNumberScalar(tm.TestCase): + + def test_is_number(self): + + self.assertTrue(is_number(True)) + self.assertTrue(is_number(1)) + self.assertTrue(is_number(1.1)) + self.assertTrue(is_number(1 + 3j)) + self.assertTrue(is_number(np.bool(False))) + self.assertTrue(is_number(np.int64(1))) + self.assertTrue(is_number(np.float64(1.1))) + self.assertTrue(is_number(np.complex128(1 + 3j))) + self.assertTrue(is_number(np.nan)) + + self.assertFalse(is_number(None)) + self.assertFalse(is_number('x')) + self.assertFalse(is_number(datetime(2011, 1, 1))) + self.assertFalse(is_number(np.datetime64('2011-01-01'))) + self.assertFalse(is_number(Timestamp('2011-01-01'))) + self.assertFalse(is_number(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_number(timedelta(1000))) + self.assertFalse(is_number(Timedelta('1 days'))) + + # questionable + self.assertFalse(is_number(np.bool_(False))) + self.assertTrue(is_number(np.timedelta64(1, 'D'))) + + def test_is_bool(self): + self.assertTrue(is_bool(True)) + self.assertTrue(is_bool(np.bool(False))) + self.assertTrue(is_bool(np.bool_(False))) + + self.assertFalse(is_bool(1)) + self.assertFalse(is_bool(1.1)) + self.assertFalse(is_bool(1 + 3j)) + self.assertFalse(is_bool(np.int64(1))) + self.assertFalse(is_bool(np.float64(1.1))) + self.assertFalse(is_bool(np.complex128(1 + 3j))) + self.assertFalse(is_bool(np.nan)) + self.assertFalse(is_bool(None)) + self.assertFalse(is_bool('x')) + self.assertFalse(is_bool(datetime(2011, 1, 1))) + self.assertFalse(is_bool(np.datetime64('2011-01-01'))) + self.assertFalse(is_bool(Timestamp('2011-01-01'))) + self.assertFalse(is_bool(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_bool(timedelta(1000))) + self.assertFalse(is_bool(np.timedelta64(1, 'D'))) + self.assertFalse(is_bool(Timedelta('1 days'))) + + def test_is_integer(self): + self.assertTrue(is_integer(1)) + self.assertTrue(is_integer(np.int64(1))) + + self.assertFalse(is_integer(True)) + self.assertFalse(is_integer(1.1)) + self.assertFalse(is_integer(1 + 3j)) + self.assertFalse(is_integer(np.bool(False))) + self.assertFalse(is_integer(np.bool_(False))) + self.assertFalse(is_integer(np.float64(1.1))) + self.assertFalse(is_integer(np.complex128(1 + 3j))) + self.assertFalse(is_integer(np.nan)) + self.assertFalse(is_integer(None)) + self.assertFalse(is_integer('x')) + self.assertFalse(is_integer(datetime(2011, 1, 1))) + self.assertFalse(is_integer(np.datetime64('2011-01-01'))) + self.assertFalse(is_integer(Timestamp('2011-01-01'))) + self.assertFalse(is_integer(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_integer(timedelta(1000))) + self.assertFalse(is_integer(Timedelta('1 days'))) + + # questionable + self.assertTrue(is_integer(np.timedelta64(1, 'D'))) + + def test_is_float(self): + self.assertTrue(is_float(1.1)) + self.assertTrue(is_float(np.float64(1.1))) + self.assertTrue(is_float(np.nan)) + + self.assertFalse(is_float(True)) + self.assertFalse(is_float(1)) + self.assertFalse(is_float(1 + 3j)) + self.assertFalse(is_float(np.bool(False))) + self.assertFalse(is_float(np.bool_(False))) + self.assertFalse(is_float(np.int64(1))) + self.assertFalse(is_float(np.complex128(1 + 3j))) + self.assertFalse(is_float(None)) + self.assertFalse(is_float('x')) + self.assertFalse(is_float(datetime(2011, 1, 1))) + self.assertFalse(is_float(np.datetime64('2011-01-01'))) + self.assertFalse(is_float(Timestamp('2011-01-01'))) + self.assertFalse(is_float(Timestamp('2011-01-01', + tz='US/Eastern'))) + self.assertFalse(is_float(timedelta(1000))) + self.assertFalse(is_float(np.timedelta64(1, 'D'))) + self.assertFalse(is_float(Timedelta('1 days'))) + + def test_is_timedelta(self): + self.assertTrue(is_timedelta64_dtype('timedelta64')) + self.assertTrue(is_timedelta64_dtype('timedelta64[ns]')) + self.assertFalse(is_timedelta64_ns_dtype('timedelta64')) + self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]')) + + tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') + self.assertTrue(is_timedelta64_dtype(tdi)) + self.assertTrue(is_timedelta64_ns_dtype(tdi)) + self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) + + # Conversion to Int64Index: + self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) + self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))) + + +class Testisscalar(tm.TestCase): + + def test_isscalar_builtin_scalars(self): + self.assertTrue(is_scalar(None)) + self.assertTrue(is_scalar(True)) + self.assertTrue(is_scalar(False)) + self.assertTrue(is_scalar(0.)) + self.assertTrue(is_scalar(np.nan)) + self.assertTrue(is_scalar('foobar')) + self.assertTrue(is_scalar(b'foobar')) + self.assertTrue(is_scalar(u('efoobar'))) + self.assertTrue(is_scalar(datetime(2014, 1, 1))) + self.assertTrue(is_scalar(date(2014, 1, 1))) + self.assertTrue(is_scalar(time(12, 0))) + self.assertTrue(is_scalar(timedelta(hours=1))) + self.assertTrue(is_scalar(pd.NaT)) + + def test_isscalar_builtin_nonscalars(self): + self.assertFalse(is_scalar({})) + self.assertFalse(is_scalar([])) + self.assertFalse(is_scalar([1])) + self.assertFalse(is_scalar(())) + self.assertFalse(is_scalar((1, ))) + self.assertFalse(is_scalar(slice(None))) + self.assertFalse(is_scalar(Ellipsis)) + + def test_isscalar_numpy_array_scalars(self): + self.assertTrue(is_scalar(np.int64(1))) + self.assertTrue(is_scalar(np.float64(1.))) + self.assertTrue(is_scalar(np.int32(1))) + self.assertTrue(is_scalar(np.object_('foobar'))) + self.assertTrue(is_scalar(np.str_('foobar'))) + self.assertTrue(is_scalar(np.unicode_(u('foobar')))) + self.assertTrue(is_scalar(np.bytes_(b'foobar'))) + self.assertTrue(is_scalar(np.datetime64('2014-01-01'))) + self.assertTrue(is_scalar(np.timedelta64(1, 'h'))) + + def test_isscalar_numpy_zerodim_arrays(self): + for zerodim in [np.array(1), np.array('foobar'), + np.array(np.datetime64('2014-01-01')), + np.array(np.timedelta64(1, 'h')), + np.array(np.datetime64('NaT'))]: + self.assertFalse(is_scalar(zerodim)) + self.assertTrue(is_scalar(lib.item_from_zerodim(zerodim))) + + def test_isscalar_numpy_arrays(self): + self.assertFalse(is_scalar(np.array([]))) + self.assertFalse(is_scalar(np.array([[]]))) + self.assertFalse(is_scalar(np.matrix('1; 2'))) + + def test_isscalar_pandas_scalars(self): + self.assertTrue(is_scalar(Timestamp('2014-01-01'))) + self.assertTrue(is_scalar(Timedelta(hours=1))) + self.assertTrue(is_scalar(Period('2014-01-01'))) + + def test_lisscalar_pandas_containers(self): + self.assertFalse(is_scalar(Series())) + self.assertFalse(is_scalar(Series([1]))) + self.assertFalse(is_scalar(DataFrame())) + self.assertFalse(is_scalar(DataFrame([[1]]))) + self.assertFalse(is_scalar(Panel())) + self.assertFalse(is_scalar(Panel([[[1]]]))) + self.assertFalse(is_scalar(Index([]))) + self.assertFalse(is_scalar(Index([1]))) + + +def test_datetimeindex_from_empty_datetime64_array(): + for unit in ['ms', 'us', 'ns']: + idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) + assert (len(idx) == 0) + + +def test_nan_to_nat_conversions(): + + df = DataFrame(dict({ + 'A': np.asarray( + lrange(10), dtype='float64'), + 'B': Timestamp('20010101') + })) + df.iloc[3:6, :] = np.nan + result = df.loc[4, 'B'].value + assert (result == tslib.iNaT) + + s = df['B'].copy() + s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) + assert (isnull(s[8])) + + # numpy < 1.7.0 is wrong + from distutils.version import LooseVersion + if LooseVersion(np.__version__) >= '1.7.0': + assert (s[8].value == np.datetime64('NaT').astype(np.int64)) + + +def test_ensure_int32(): + values = np.arange(10, dtype=np.int32) + result = _ensure_int32(values) + assert (result.dtype == np.int32) + + values = np.arange(10, dtype=np.int64) + result = _ensure_int32(values) + assert (result.dtype == np.int32) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_io.py b/pandas/tests/types/test_io.py new file mode 100644 index 0000000000000..545edf8f1386c --- /dev/null +++ b/pandas/tests/types/test_io.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pandas.lib as lib +import pandas.util.testing as tm + +from pandas.compat import long, u + + +class TestParseSQL(tm.TestCase): + + def test_convert_sql_column_floats(self): + arr = np.array([1.5, None, 3, 4.2], dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_strings(self): + arr = np.array(['1.5', None, '3', '4.2'], dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array(['1.5', np.nan, '3', '4.2'], dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_unicode(self): + arr = np.array([u('1.5'), None, u('3'), u('4.2')], + dtype=object) + result = lib.convert_sql_column(arr) + expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], + dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_ints(self): + arr = np.array([1, 2, 3, 4], dtype='O') + arr2 = np.array([1, 2, 3, 4], dtype='i4').astype('O') + result = lib.convert_sql_column(arr) + result2 = lib.convert_sql_column(arr2) + expected = np.array([1, 2, 3, 4], dtype='i8') + self.assert_numpy_array_equal(result, expected) + self.assert_numpy_array_equal(result2, expected) + + arr = np.array([1, 2, 3, None, 4], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_longs(self): + arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, 4], dtype='i8') + self.assert_numpy_array_equal(result, expected) + + arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_bools(self): + arr = np.array([True, False, True, False], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([True, False, True, False], dtype=bool) + self.assert_numpy_array_equal(result, expected) + + arr = np.array([True, False, None, False], dtype='O') + result = lib.convert_sql_column(arr) + expected = np.array([True, False, np.nan, False], dtype=object) + self.assert_numpy_array_equal(result, expected) + + def test_convert_sql_column_decimals(self): + from decimal import Decimal + arr = np.array([Decimal('1.5'), None, Decimal('3'), Decimal('4.2')]) + result = lib.convert_sql_column(arr) + expected = np.array([1.5, np.nan, 3, 4.2], dtype='f8') + self.assert_numpy_array_equal(result, expected) + + def test_convert_downcast_int64(self): + from pandas.parser import na_values + + arr = np.array([1, 2, 7, 8, 10], dtype=np.int64) + expected = np.array([1, 2, 7, 8, 10], dtype=np.int8) + + # default argument + result = lib.downcast_int64(arr, na_values) + self.assert_numpy_array_equal(result, expected) + + result = lib.downcast_int64(arr, na_values, use_unsigned=False) + self.assert_numpy_array_equal(result, expected) + + expected = np.array([1, 2, 7, 8, 10], dtype=np.uint8) + result = lib.downcast_int64(arr, na_values, use_unsigned=True) + self.assert_numpy_array_equal(result, expected) + + # still cast to int8 despite use_unsigned=True + # because of the negative number as an element + arr = np.array([1, 2, -7, 8, 10], dtype=np.int64) + expected = np.array([1, 2, -7, 8, 10], dtype=np.int8) + result = lib.downcast_int64(arr, na_values, use_unsigned=True) + self.assert_numpy_array_equal(result, expected) + + arr = np.array([1, 2, 7, 8, 300], dtype=np.int64) + expected = np.array([1, 2, 7, 8, 300], dtype=np.int16) + result = lib.downcast_int64(arr, na_values) + self.assert_numpy_array_equal(result, expected) + + int8_na = na_values[np.int8] + int64_na = na_values[np.int64] + arr = np.array([int64_na, 2, 3, 10, 15], dtype=np.int64) + expected = np.array([int8_na, 2, 3, 10, 15], dtype=np.int8) + result = lib.downcast_int64(arr, na_values) + self.assert_numpy_array_equal(result, expected) + + +if __name__ == '__main__': + import nose + + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_missing.py b/pandas/tests/types/test_missing.py new file mode 100644 index 0000000000000..edcb69de7bfad --- /dev/null +++ b/pandas/tests/types/test_missing.py @@ -0,0 +1,243 @@ +# -*- coding: utf-8 -*- + +import nose +import numpy as np +from datetime import datetime +from pandas.util import testing as tm + +from pandas.core import config as cf +from pandas.compat import u +from pandas.tslib import iNaT +from pandas import (NaT, Float64Index, Series, + DatetimeIndex, TimedeltaIndex, date_range) +from pandas.types.dtypes import DatetimeTZDtype +from pandas.types.missing import (array_equivalent, isnull, notnull, + na_value_for_dtype) + +_multiprocess_can_split_ = True + + +def test_notnull(): + assert notnull(1.) + assert not notnull(None) + assert not notnull(np.NaN) + + with cf.option_context("mode.use_inf_as_null", False): + assert notnull(np.inf) + assert notnull(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notnull(arr) + assert result.all() + + with cf.option_context("mode.use_inf_as_null", True): + assert not notnull(np.inf) + assert not notnull(-np.inf) + + arr = np.array([1.5, np.inf, 3.5, -np.inf]) + result = notnull(arr) + assert result.sum() == 2 + + with cf.option_context("mode.use_inf_as_null", False): + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + assert (isinstance(isnull(s), Series)) + + +def test_isnull(): + assert not isnull(1.) + assert isnull(None) + assert isnull(np.NaN) + assert not isnull(np.inf) + assert not isnull(-np.inf) + + # series + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + assert (isinstance(isnull(s), Series)) + + # frame + for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), + tm.makeMixedDataFrame()]: + result = isnull(df) + expected = df.apply(isnull) + tm.assert_frame_equal(result, expected) + + # panel + for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) + ]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel_equal(result, expected) + + # panel 4d + for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: + result = isnull(p) + expected = p.apply(isnull) + tm.assert_panel4d_equal(result, expected) + + +def test_isnull_lists(): + result = isnull([[False]]) + exp = np.array([[False]]) + assert (np.array_equal(result, exp)) + + result = isnull([[1], [2]]) + exp = np.array([[False], [False]]) + assert (np.array_equal(result, exp)) + + # list of strings / unicode + result = isnull(['foo', 'bar']) + assert (not result.any()) + + result = isnull([u('foo'), u('bar')]) + assert (not result.any()) + + +def test_isnull_nat(): + result = isnull([NaT]) + exp = np.array([True]) + assert (np.array_equal(result, exp)) + + result = isnull(np.array([NaT], dtype=object)) + exp = np.array([True]) + assert (np.array_equal(result, exp)) + + +def test_isnull_numpy_nat(): + arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), + np.datetime64('NaT', 's')]) + result = isnull(arr) + expected = np.array([True] * 4) + tm.assert_numpy_array_equal(result, expected) + + +def test_isnull_datetime(): + assert (not isnull(datetime.now())) + assert notnull(datetime.now()) + + idx = date_range('1/1/1990', periods=20) + assert (notnull(idx).all()) + + idx = np.asarray(idx) + idx[0] = iNaT + idx = DatetimeIndex(idx) + mask = isnull(idx) + assert (mask[0]) + assert (not mask[1:].any()) + + # GH 9129 + pidx = idx.to_period(freq='M') + mask = isnull(pidx) + assert (mask[0]) + assert (not mask[1:].any()) + + mask = isnull(pidx[1:]) + assert (not mask.any()) + + +class TestIsNull(tm.TestCase): + + def test_0d_array(self): + self.assertTrue(isnull(np.array(np.nan))) + self.assertFalse(isnull(np.array(0.0))) + self.assertFalse(isnull(np.array(0))) + # test object dtype + self.assertTrue(isnull(np.array(np.nan, dtype=object))) + self.assertFalse(isnull(np.array(0.0, dtype=object))) + self.assertFalse(isnull(np.array(0, dtype=object))) + + +def test_array_equivalent(): + assert array_equivalent(np.array([np.nan, np.nan]), + np.array([np.nan, np.nan])) + assert array_equivalent(np.array([np.nan, 1, np.nan]), + np.array([np.nan, 1, np.nan])) + assert array_equivalent(np.array([np.nan, None], dtype='object'), + np.array([np.nan, None], dtype='object')) + assert array_equivalent(np.array([np.nan, 1 + 1j], dtype='complex'), + np.array([np.nan, 1 + 1j], dtype='complex')) + assert not array_equivalent( + np.array([np.nan, 1 + 1j], dtype='complex'), np.array( + [np.nan, 1 + 2j], dtype='complex')) + assert not array_equivalent( + np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) + assert not array_equivalent( + np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e'])) + assert array_equivalent(Float64Index([0, np.nan]), + Float64Index([0, np.nan])) + assert not array_equivalent( + Float64Index([0, np.nan]), Float64Index([1, np.nan])) + assert array_equivalent(DatetimeIndex([0, np.nan]), + DatetimeIndex([0, np.nan])) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) + assert array_equivalent(TimedeltaIndex([0, np.nan]), + TimedeltaIndex([0, np.nan])) + assert not array_equivalent( + TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) + assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), + DatetimeIndex([0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( + [1, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) + + +def test_array_equivalent_compat(): + # see gh-13388 + m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + n = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + assert (array_equivalent(m, n, strict_nan=True)) + assert (array_equivalent(m, n, strict_nan=False)) + + m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + n = np.array([(1, 2), (4, 3)], dtype=[('a', int), ('b', float)]) + assert (not array_equivalent(m, n, strict_nan=True)) + assert (not array_equivalent(m, n, strict_nan=False)) + + m = np.array([(1, 2), (3, 4)], dtype=[('a', int), ('b', float)]) + n = np.array([(1, 2), (3, 4)], dtype=[('b', int), ('a', float)]) + assert (not array_equivalent(m, n, strict_nan=True)) + assert (not array_equivalent(m, n, strict_nan=False)) + + +def test_array_equivalent_str(): + for dtype in ['O', 'S', 'U']: + assert array_equivalent(np.array(['A', 'B'], dtype=dtype), + np.array(['A', 'B'], dtype=dtype)) + assert not array_equivalent(np.array(['A', 'B'], dtype=dtype), + np.array(['A', 'X'], dtype=dtype)) + + +def test_na_value_for_dtype(): + for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), + DatetimeTZDtype('datetime64[ns, US/Eastern]')]: + assert na_value_for_dtype(dtype) is NaT + + for dtype in ['u1', 'u2', 'u4', 'u8', + 'i1', 'i2', 'i4', 'i8']: + assert na_value_for_dtype(np.dtype(dtype)) == 0 + + for dtype in ['bool']: + assert na_value_for_dtype(np.dtype(dtype)) is False + + for dtype in ['f2', 'f4', 'f8']: + assert np.isnan(na_value_for_dtype(np.dtype(dtype))) + + for dtype in ['O']: + assert np.isnan(na_value_for_dtype(np.dtype(dtype))) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/types/test_types.py b/pandas/tests/types/test_types.py deleted file mode 100644 index b9f6006cab731..0000000000000 --- a/pandas/tests/types/test_types.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: utf-8 -*- -import nose -import numpy as np - -from pandas import NaT -from pandas.types.api import (DatetimeTZDtype, CategoricalDtype, - na_value_for_dtype, pandas_dtype) - - -def test_pandas_dtype(): - - assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype( - 'datetime64[ns, US/Eastern]') - assert pandas_dtype('category') == CategoricalDtype() - for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - assert pandas_dtype(dtype) == np.dtype(dtype) - - -def test_na_value_for_dtype(): - for dtype in [np.dtype('M8[ns]'), np.dtype('m8[ns]'), - DatetimeTZDtype('datetime64[ns, US/Eastern]')]: - assert na_value_for_dtype(dtype) is NaT - - for dtype in ['u1', 'u2', 'u4', 'u8', - 'i1', 'i2', 'i4', 'i8']: - assert na_value_for_dtype(np.dtype(dtype)) == 0 - - for dtype in ['bool']: - assert na_value_for_dtype(np.dtype(dtype)) is False - - for dtype in ['f2', 'f4', 'f8']: - assert np.isnan(na_value_for_dtype(np.dtype(dtype))) - - for dtype in ['O']: - assert np.isnan(na_value_for_dtype(np.dtype(dtype))) - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 075dff9cf6c38..5b66e55eb60b6 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -12,6 +12,21 @@ from pandas import (Categorical, DataFrame, Series, Index, MultiIndex, Timedelta) from pandas.core.frame import _merge_doc +from pandas.types.generic import ABCSeries +from pandas.types.common import (is_datetime64tz_dtype, + is_datetime64_dtype, + needs_i8_conversion, + is_int64_dtype, + is_integer, + is_int_or_datetime_dtype, + is_dtype_equal, + is_bool, + is_list_like, + _ensure_int64, + _ensure_platform_int, + _ensure_object) +from pandas.types.missing import na_value_for_dtype + from pandas.core.generic import NDFrame from pandas.core.index import (_get_combined_index, _ensure_index, _get_consensus_names, @@ -19,18 +34,10 @@ from pandas.core.internals import (items_overlap_with_suffix, concatenate_block_managers) from pandas.util.decorators import Appender, Substitution -from pandas.core.common import (ABCSeries, is_dtype_equal, - is_datetime64_dtype, - is_int64_dtype, - is_integer, - is_bool, - is_list_like, - needs_i8_conversion) import pandas.core.algorithms as algos import pandas.core.common as com import pandas.types.concat as _concat -from pandas.types.api import na_value_for_dtype import pandas.algos as _algos import pandas.hashtable as _hash @@ -436,7 +443,7 @@ def _merger(x, y): # if we DO have duplicates, then # we cannot guarantee order - sorter = com._ensure_platform_int( + sorter = _ensure_platform_int( np.concatenate([groupby.indices[g] for g, _ in groupby])) if len(result) != len(sorter): if check_duplicates: @@ -1111,8 +1118,8 @@ def _get_single_indexer(join_key, index, sort=False): left_key, right_key, count = _factorize_keys(join_key, index, sort=sort) left_indexer, right_indexer = _algos.left_outer_join( - com._ensure_int64(left_key), - com._ensure_int64(right_key), + _ensure_int64(left_key), + _ensure_int64(right_key), count, sort=sort) return left_indexer, right_indexer @@ -1158,18 +1165,17 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): - if com.is_datetime64tz_dtype(lk) and com.is_datetime64tz_dtype(rk): + if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): lk = lk.values rk = rk.values - - if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk): + if is_int_or_datetime_dtype(lk) and is_int_or_datetime_dtype(rk): klass = _hash.Int64Factorizer - lk = com._ensure_int64(com._values_from_object(lk)) - rk = com._ensure_int64(com._values_from_object(rk)) + lk = _ensure_int64(com._values_from_object(lk)) + rk = _ensure_int64(com._values_from_object(rk)) else: klass = _hash.Factorizer - lk = com._ensure_object(lk) - rk = com._ensure_object(rk) + lk = _ensure_object(lk) + rk = _ensure_object(rk) rizer = klass(max(len(lk), len(rk))) @@ -1208,10 +1214,10 @@ def _sort_labels(uniques, left, right): reverse_indexer = np.empty(len(sorter), dtype=np.int64) reverse_indexer.put(sorter, np.arange(len(sorter))) - new_left = reverse_indexer.take(com._ensure_platform_int(left)) + new_left = reverse_indexer.take(_ensure_platform_int(left)) np.putmask(new_left, left == -1, -1) - new_right = reverse_indexer.take(com._ensure_platform_int(right)) + new_right = reverse_indexer.take(_ensure_platform_int(right)) np.putmask(new_right, right == -1, -1) return new_left, new_right diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index e1405bc9e6add..3e2b7c3af460e 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -1,6 +1,7 @@ # pylint: disable=E1103 +from pandas.types.common import is_list_like, is_scalar from pandas import Series, DataFrame from pandas.core.index import MultiIndex, Index from pandas.core.groupby import Grouper @@ -9,7 +10,6 @@ from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com -import pandas.lib as lib import numpy as np @@ -95,7 +95,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', values_passed = values is not None if values_passed: - if com.is_list_like(values): + if is_list_like(values): values_multi = True values = list(values) else: @@ -361,7 +361,7 @@ def _all_key(): def _convert_by(by): if by is None: by = [] - elif (lib.isscalar(by) or + elif (is_scalar(by) or isinstance(by, (np.ndarray, Index, Series, Grouper)) or hasattr(by, '__call__')): by = [by] diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index b6c1926c1e7fc..4cf3364a03056 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -11,10 +11,17 @@ import numpy as np +from pandas.types.common import (is_list_like, + is_integer, + is_number, + is_hashable, + is_iterator) +from pandas.types.missing import isnull, notnull + from pandas.util.decorators import cache_readonly, deprecate_kwarg from pandas.core.base import PandasObject -import pandas.core.common as com -from pandas.core.common import AbstractMethodError + +from pandas.core.common import AbstractMethodError, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series, remove_na @@ -161,7 +168,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', if colormap is not None: warnings.warn("'color' and 'colormap' cannot be used " "simultaneously. Using 'color'") - colors = list(color) if com.is_list_like(color) else color + colors = list(color) if is_list_like(color) else color else: if color_type == 'default': # need to call list() on the result to copy so we don't @@ -336,7 +343,7 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) - mask = com.notnull(df) + mask = notnull(df) marker = _get_marker_compat(marker) @@ -980,7 +987,7 @@ def _validate_color_args(self): "simultaneously. Using 'color'") if 'color' in self.kwds and self.style is not None: - if com.is_list_like(self.style): + if is_list_like(self.style): styles = self.style else: styles = [self.style] @@ -1001,7 +1008,7 @@ def _iter_data(self, data=None, keep_index=False, fillna=None): # TODO: unused? # if self.sort_columns: - # columns = com._try_sort(data.columns) + # columns = _try_sort(data.columns) # else: # columns = data.columns @@ -1099,13 +1106,13 @@ def result(self): Return result axes """ if self.subplots: - if self.layout is not None and not com.is_list_like(self.ax): + if self.layout is not None and not is_list_like(self.ax): return self.axes.reshape(*self.layout) else: return self.axes else: sec_true = isinstance(self.secondary_y, bool) and self.secondary_y - all_sec = (com.is_list_like(self.secondary_y) and + all_sec = (is_list_like(self.secondary_y) and len(self.secondary_y) == self.nseries) if (sec_true or all_sec): # if all data is plotted on secondary, return right axes @@ -1322,7 +1329,7 @@ def _get_xticks(self, convert_period=False): @classmethod def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = com.isnull(y) + mask = isnull(y) if mask.any(): y = np.ma.array(y) y = np.ma.masked_where(mask, y) @@ -1463,8 +1470,8 @@ def match_labels(data, e): err = np.atleast_2d(evalues) err = np.tile(err, (self.nseries, 1)) - elif com.is_list_like(err): - if com.is_iterator(err): + elif is_list_like(err): + if is_iterator(err): err = np.atleast_2d(list(err)) else: # raw error values @@ -1486,7 +1493,7 @@ def match_labels(data, e): if len(err) == 1: err = np.tile(err, (self.nseries, 1)) - elif com.is_number(err): + elif is_number(err): err = np.tile([err], (self.nseries, len(self.data))) else: @@ -1543,9 +1550,9 @@ def __init__(self, data, x, y, **kwargs): MPLPlot.__init__(self, data, **kwargs) if x is None or y is None: raise ValueError(self._kind + ' requires and x and y column') - if com.is_integer(x) and not self.data.columns.holds_integer(): + if is_integer(x) and not self.data.columns.holds_integer(): x = self.data.columns[x] - if com.is_integer(y) and not self.data.columns.holds_integer(): + if is_integer(y) and not self.data.columns.holds_integer(): y = self.data.columns[y] self.x = x self.y = y @@ -1569,7 +1576,7 @@ def __init__(self, data, x, y, s=None, c=None, **kwargs): # the handling of this argument later s = 20 super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) - if com.is_integer(c) and not self.data.columns.holds_integer(): + if is_integer(c) and not self.data.columns.holds_integer(): c = self.data.columns[c] self.c = c @@ -1577,7 +1584,7 @@ def _make_plot(self): x, y, c, data = self.x, self.y, self.c, self.data ax = self.axes[0] - c_is_column = com.is_hashable(c) and c in self.data.columns + c_is_column = is_hashable(c) and c in self.data.columns # plot a colorbar only if a colormap is provided or necessary cb = self.kwds.pop('colorbar', self.colormap or c_is_column) @@ -1629,7 +1636,7 @@ class HexBinPlot(PlanePlot): def __init__(self, data, x, y, C=None, **kwargs): super(HexBinPlot, self).__init__(data, x, y, **kwargs) - if com.is_integer(C) and not self.data.columns.holds_integer(): + if is_integer(C) and not self.data.columns.holds_integer(): C = self.data.columns[C] self.C = C @@ -1912,9 +1919,9 @@ def __init__(self, data, **kwargs): self.ax_pos = self.tick_pos - self.tickoffset def _args_adjust(self): - if com.is_list_like(self.bottom): + if is_list_like(self.bottom): self.bottom = np.array(self.bottom) - if com.is_list_like(self.left): + if is_list_like(self.left): self.left = np.array(self.left) @classmethod @@ -2027,18 +2034,18 @@ def __init__(self, data, bins=10, bottom=0, **kwargs): MPLPlot.__init__(self, data, **kwargs) def _args_adjust(self): - if com.is_integer(self.bins): + if is_integer(self.bins): # create common bin edge values = (self.data._convert(datetime=True)._get_numeric_data()) values = np.ravel(values) - values = values[~com.isnull(values)] + values = values[~isnull(values)] hist, self.bins = np.histogram( values, bins=self.bins, range=self.kwds.get('range', None), weights=self.kwds.get('weights', None)) - if com.is_list_like(self.bottom): + if is_list_like(self.bottom): self.bottom = np.array(self.bottom) @classmethod @@ -2046,7 +2053,7 @@ def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, stacking_id=None, **kwds): if column_num == 0: cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~com.isnull(y)] + y = y[~isnull(y)] base = np.zeros(len(bins) - 1) bottom = bottom + \ @@ -2411,7 +2418,7 @@ def _plot(data, x=None, y=None, subplots=False, msg = "{0} requires either y column or 'subplots=True'" raise ValueError(msg.format(kind)) elif y is not None: - if com.is_integer(y) and not data.columns.holds_integer(): + if is_integer(y) and not data.columns.holds_integer(): y = data.columns[y] # converted to series actually. copy to not modify data = data[y].copy() @@ -2420,12 +2427,12 @@ def _plot(data, x=None, y=None, subplots=False, else: if isinstance(data, DataFrame): if x is not None: - if com.is_integer(x) and not data.columns.holds_integer(): + if is_integer(x) and not data.columns.holds_integer(): x = data.columns[x] data = data.set_index(x) if y is not None: - if com.is_integer(y) and not data.columns.holds_integer(): + if is_integer(y) and not data.columns.holds_integer(): y = data.columns[y] label = kwds['label'] if 'label' in kwds else y series = data[y].copy() # Don't modify @@ -2434,7 +2441,7 @@ def _plot(data, x=None, y=None, subplots=False, for kw in ['xerr', 'yerr']: if (kw in kwds) and \ (isinstance(kwds[kw], string_types) or - com.is_integer(kwds[kw])): + is_integer(kwds[kw])): try: kwds[kw] = data[kwds[kw]] except (IndexError, KeyError, TypeError): @@ -2897,7 +2904,7 @@ def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, layout=layout) _axes = _flatten(axes) - for i, col in enumerate(com._try_sort(data.columns)): + for i, col in enumerate(_try_sort(data.columns)): ax = _axes[i] ax.hist(data[col].dropna().values, bins=bins, **kwds) ax.set_title(col) @@ -3345,7 +3352,7 @@ def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, if ax is None: fig = plt.figure(**fig_kw) else: - if com.is_list_like(ax): + if is_list_like(ax): ax = _flatten(ax) if layout is not None: warnings.warn("When passing multiple axes, layout keyword is " @@ -3487,7 +3494,7 @@ def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): def _flatten(axes): - if not com.is_list_like(axes): + if not is_list_like(axes): return np.array([axes]) elif isinstance(axes, (np.ndarray, Index)): return axes.ravel() diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index b0bbf8ba70354..62bbfc2f630a5 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -2,12 +2,14 @@ Quantilization functions and related stuff """ +from pandas.types.missing import isnull +from pandas.types.common import (is_float, is_integer, + is_scalar) + from pandas.core.api import Series from pandas.core.categorical import Categorical import pandas.core.algorithms as algos -import pandas.core.common as com import pandas.core.nanops as nanops -import pandas.lib as lib from pandas.compat import zip import numpy as np @@ -80,7 +82,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, """ # NOTE: this binning code is changed a bit from histogram for var(x) == 0 if not np.iterable(bins): - if lib.isscalar(bins) and bins < 1: + if is_scalar(bins) and bins < 1: raise ValueError("`bins` should be a positive integer.") try: # for array-like sz = x.size @@ -164,7 +166,7 @@ def qcut(x, q, labels=None, retbins=False, precision=3): >>> pd.qcut(range(5), 4, labels=False) array([0, 0, 1, 2, 3], dtype=int64) """ - if com.is_integer(q): + if is_integer(q): quantiles = np.linspace(0, 1, q + 1) else: quantiles = q @@ -194,7 +196,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, if include_lowest: ids[x == bins[0]] = 1 - na_mask = com.isnull(x) | (ids == len(bins)) | (ids == 0) + na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: @@ -264,7 +266,7 @@ def _format_label(x, precision=3): fmt_str = '%%.%dg' % precision if np.isinf(x): return str(x) - elif com.is_float(x): + elif is_float(x): frac, whole = np.modf(x) sgn = '-' if x < 0 else '' whole = abs(whole) diff --git a/pandas/tools/util.py b/pandas/tools/util.py index d70904e1bf286..b8b28663387cc 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -1,6 +1,12 @@ import numpy as np import pandas.lib as lib +from pandas.types.common import (is_number, + is_numeric_dtype, + is_datetime_or_timedelta_dtype, + _ensure_object) +from pandas.types.cast import _possibly_downcast_to_dtype + import pandas as pd from pandas.compat import reduce from pandas.core.index import Index @@ -141,7 +147,7 @@ def to_numeric(arg, errors='raise', downcast=None): elif isinstance(arg, (list, tuple)): values = np.array(arg, dtype='O') elif np.isscalar(arg): - if com.is_number(arg): + if is_number(arg): return arg is_scalar = True values = np.array([arg], dtype='O') @@ -151,14 +157,13 @@ def to_numeric(arg, errors='raise', downcast=None): values = arg try: - if com.is_numeric_dtype(values): + if is_numeric_dtype(values): pass - elif com.is_datetime_or_timedelta_dtype(values): + elif is_datetime_or_timedelta_dtype(values): values = values.astype(np.int64) else: - values = com._ensure_object(values) + values = _ensure_object(values) coerce_numeric = False if errors in ('ignore', 'raise') else True - values = lib.maybe_convert_numeric(values, set(), coerce_numeric=coerce_numeric) @@ -168,7 +173,7 @@ def to_numeric(arg, errors='raise', downcast=None): # attempt downcast only if the data has been successfully converted # to a numerical dtype and if a downcast method has been specified - if downcast is not None and com.is_numeric_dtype(values): + if downcast is not None and is_numeric_dtype(values): typecodes = None if downcast in ('integer', 'signed'): @@ -189,7 +194,7 @@ def to_numeric(arg, errors='raise', downcast=None): # from smallest to largest for dtype in typecodes: if np.dtype(dtype).itemsize < values.dtype.itemsize: - values = com._possibly_downcast_to_dtype( + values = _possibly_downcast_to_dtype( values, dtype) # successful conversion diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 4bafac873ea09..fe0440170383b 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -9,10 +9,16 @@ from pandas.compat.numpy import function as nv import numpy as np - +from pandas.types.common import (is_integer, is_float, + is_bool_dtype, _ensure_int64, + is_scalar, + is_list_like) +from pandas.types.generic import (ABCIndex, ABCSeries, + ABCPeriodIndex, ABCIndexClass) +from pandas.types.missing import isnull from pandas.core import common as com, algorithms -from pandas.core.common import (is_integer, is_float, is_bool_dtype, - AbstractMethodError) +from pandas.core.common import AbstractMethodError + import pandas.formats.printing as printing import pandas.tslib as tslib import pandas._period as prlib @@ -111,9 +117,9 @@ def _join_i8_wrapper(joinf, dtype, with_indexers=True): @staticmethod def wrapper(left, right): - if isinstance(left, (np.ndarray, com.ABCIndex, com.ABCSeries)): + if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): left = left.view('i8') - if isinstance(right, (np.ndarray, com.ABCIndex, com.ABCSeries)): + if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: @@ -133,10 +139,10 @@ def _evaluate_compare(self, other, op): # coerce to a similar object if not isinstance(other, type(self)): - if not com.is_list_like(other): + if not is_list_like(other): # scalar other = [other] - elif lib.isscalar(lib.item_from_zerodim(other)): + elif is_scalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) @@ -174,7 +180,7 @@ def _ensure_localized(self, result): # reconvert to local tz if getattr(self, 'tz', None) is not None: - if not isinstance(result, com.ABCIndexClass): + if not isinstance(result, ABCIndexClass): result = self._simple_new(result) result = result.tz_localize(self.tz) return result @@ -202,7 +208,7 @@ def _format_with_header(self, header, **kwargs): def __contains__(self, key): try: res = self.get_loc(key) - return lib.isscalar(res) or type(res) == slice or np.any(res) + return is_scalar(res) or type(res) == slice or np.any(res) except (KeyError, TypeError, ValueError): return False @@ -213,7 +219,7 @@ def __getitem__(self, key): """ is_int = is_integer(key) - if lib.isscalar(key) and not is_int: + if is_scalar(key) and not is_int: raise ValueError getitem = self._data.__getitem__ @@ -282,7 +288,7 @@ def _nat_new(self, box=True): return result attribs = self._get_attributes_dict() - if not isinstance(self, com.ABCPeriodIndex): + if not isinstance(self, ABCPeriodIndex): attribs['freq'] = None return self._simple_new(result, **attribs) @@ -312,7 +318,7 @@ def sort_values(self, return_indexer=False, ascending=True): attribs = self._get_attributes_dict() freq = attribs['freq'] - if freq is not None and not isinstance(self, com.ABCPeriodIndex): + if freq is not None and not isinstance(self, ABCPeriodIndex): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: @@ -328,7 +334,7 @@ def sort_values(self, return_indexer=False, ascending=True): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = com._ensure_int64(indices) + indices = _ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): @@ -340,7 +346,7 @@ def take(self, indices, axis=0, allow_fill=True, na_value=tslib.iNaT) # keep freq in PeriodIndex, reset otherwise - freq = self.freq if isinstance(self, com.ABCPeriodIndex) else None + freq = self.freq if isinstance(self, ABCPeriodIndex) else None return self._shallow_copy(taken, freq=freq) def get_duplicates(self): @@ -545,7 +551,7 @@ def _convert_scalar_indexer(self, key, kind=None): # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem - if lib.isscalar(key): + if is_scalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): @@ -591,7 +597,7 @@ def __add__(self, other): elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(other) - elif com.is_integer(other): + elif is_integer(other): return self.shift(other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._add_datelike(other) @@ -619,7 +625,7 @@ def __sub__(self, other): elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(-other) - elif com.is_integer(other): + elif is_integer(other): return self.shift(-other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._sub_datelike(other) @@ -791,9 +797,9 @@ def summary(self, name=None): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ - if lib.isscalar(other) and com.isnull(other): + if lib.isscalar(other) and isnull(other): other = tslib.iNaT - elif isinstance(other, com.ABCIndexClass): + elif isinstance(other, ABCIndexClass): # convert tz if needed if getattr(other, 'tz', None) is not None: diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py index 8937e83c7009a..46e8bd43e8ff8 100644 --- a/pandas/tseries/common.py +++ b/pandas/tseries/common.py @@ -3,19 +3,21 @@ """ import numpy as np + +from pandas.types.common import (_NS_DTYPE, _TD_DTYPE, + is_period_arraylike, + is_datetime_arraylike, is_integer_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, is_categorical_dtype, + is_list_like) + from pandas.core.base import PandasDelegate, NoNewAttributesMixin -from pandas.core import common as com from pandas.tseries.index import DatetimeIndex from pandas._period import IncompatibleFrequency # flake8: noqa from pandas.tseries.period import PeriodIndex from pandas.tseries.tdi import TimedeltaIndex from pandas import tslib from pandas.core.algorithms import take_1d -from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike, - is_datetime_arraylike, is_integer_dtype, - is_list_like, - is_datetime64_dtype, is_datetime64tz_dtype, - is_timedelta64_dtype, is_categorical_dtype) def is_datetimelike(data): @@ -129,7 +131,7 @@ def _delegate_method(self, name, *args, **kwargs): method = getattr(self.values, name) result = method(*args, **kwargs) - if not com.is_list_like(result): + if not is_list_like(result): return result result = Series(result, index=self.index, name=self.name) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 78b185ae8cf31..fc23f4f99449b 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -10,6 +10,14 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular + +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike, + ) + from pandas.compat import lrange import pandas.compat as compat import pandas.lib as lib @@ -73,8 +81,8 @@ class TimeConverter(units.ConversionInterface): @staticmethod def convert(value, unit, axis): valid_types = (str, pydt.time) - if (isinstance(value, valid_types) or com.is_integer(value) or - com.is_float(value)): + if (isinstance(value, valid_types) or is_integer(value) or + is_float(value)): return time2num(value) if isinstance(value, Index): return value.map(time2num) @@ -129,14 +137,14 @@ def convert(values, units, axis): raise TypeError('Axis must have `freq` set to convert to Periods') valid_types = (compat.string_types, datetime, Period, pydt.date, pydt.time) - if (isinstance(values, valid_types) or com.is_integer(values) or - com.is_float(values)): + if (isinstance(values, valid_types) or is_integer(values) or + is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): return values.asfreq(axis.freq).values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) - if com.is_period_arraylike(values): + if is_period_arraylike(values): return PeriodIndex(values, freq=axis.freq).values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] @@ -149,7 +157,7 @@ def get_datevalue(date, freq): elif isinstance(date, (compat.string_types, datetime, pydt.date, pydt.time)): return Period(date, freq).ordinal - elif (com.is_integer(date) or com.is_float(date) or + elif (is_integer(date) or is_float(date) or (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): return date elif date is None: @@ -163,8 +171,8 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - if (isinstance(dt, (np.ndarray, Index, Series)) and - com.is_datetime64_ns_dtype(dt)): + if (isinstance(dt, (np.ndarray, Index, Series) + ) and is_datetime64_ns_dtype(dt)): base = dates.epoch2num(dt.asi8 / 1.0E9) else: base = dates.date2num(dt) @@ -188,7 +196,7 @@ def try_parse(values): return _dt_to_float_ordinal(lib.Timestamp(values)) elif isinstance(values, pydt.time): return dates.date2num(values) - elif (com.is_integer(values) or com.is_float(values)): + elif (is_integer(values) or is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) @@ -198,7 +206,7 @@ def try_parse(values): if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) - if com.is_integer_dtype(values) or com.is_float_dtype(values): + if is_integer_dtype(values) or is_float_dtype(values): return values try: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 3f1d0c6d969a6..e2132deb97d64 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -6,12 +6,17 @@ import numpy as np +from pandas.types.generic import ABCSeries +from pandas.types.common import (is_integer, + is_period_arraylike, + is_timedelta64_dtype, + is_datetime64_dtype) + import pandas.core.algorithms as algos from pandas.core.algorithms import unique from pandas.tseries.offsets import DateOffset from pandas.util.decorators import cache_readonly import pandas.tseries.offsets as offsets -import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib from pandas.tslib import Timedelta @@ -255,8 +260,8 @@ def get_freq_code(freqstr): freqstr = (freqstr.rule_code, freqstr.n) if isinstance(freqstr, tuple): - if (com.is_integer(freqstr[0]) and - com.is_integer(freqstr[1])): + if (is_integer(freqstr[0]) and + is_integer(freqstr[1])): # e.g., freqstr = (2000, 1) return freqstr else: @@ -265,13 +270,13 @@ def get_freq_code(freqstr): code = _period_str_to_code(freqstr[0]) stride = freqstr[1] except: - if com.is_integer(freqstr[1]): + if is_integer(freqstr[1]): raise code = _period_str_to_code(freqstr[1]) stride = freqstr[0] return code, stride - if com.is_integer(freqstr): + if is_integer(freqstr): return (freqstr, 1) base, stride = _base_and_stride(freqstr) @@ -843,16 +848,16 @@ def infer_freq(index, warn=True): """ import pandas as pd - if isinstance(index, com.ABCSeries): + if isinstance(index, ABCSeries): values = index._values - if not (com.is_datetime64_dtype(values) or - com.is_timedelta64_dtype(values) or + if not (is_datetime64_dtype(values) or + is_timedelta64_dtype(values) or values.dtype == object): raise TypeError("cannot infer freq from a non-convertible " "dtype on a Series of {0}".format(index.dtype)) index = values - if com.is_period_arraylike(index): + if is_period_arraylike(index): raise TypeError("PeriodIndex given. Check the `freq` attribute " "instead of using infer_freq.") elif isinstance(index, pd.TimedeltaIndex): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9b36bc5907066..47bb69b8d7ad6 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -6,13 +6,25 @@ from datetime import timedelta import numpy as np from pandas.core.base import _shared_docs -from pandas.core.common import (_INT64_DTYPE, _NS_DTYPE, _maybe_box, - _values_from_object, ABCSeries, - DatetimeTZDtype, PerformanceWarning, - is_datetimetz, is_datetime64_dtype, - is_datetime64_ns_dtype, is_dtype_equal, - is_float, is_integer, is_integer_dtype, - is_object_dtype, is_string_dtype) + +from pandas.types.common import (_NS_DTYPE, _INT64_DTYPE, + is_object_dtype, is_datetime64_dtype, + is_datetimetz, is_dtype_equal, + is_integer, is_float, + is_integer_dtype, + is_datetime64_ns_dtype, + is_bool_dtype, + is_string_dtype, + is_list_like, + is_scalar, + _ensure_int64) +from pandas.types.generic import ABCSeries +from pandas.types.dtypes import DatetimeTZDtype +from pandas.types.missing import isnull + +import pandas.types.concat as _concat +from pandas.core.common import (_values_from_object, _maybe_box, + PerformanceWarning) from pandas.core.index import Index, Int64Index, Float64Index from pandas.indexes.base import _index_shared_docs @@ -27,7 +39,6 @@ from pandas.util.decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) import pandas.core.common as com -import pandas.types.concat as _concat import pandas.tseries.offsets as offsets import pandas.tseries.tools as tools @@ -87,7 +98,7 @@ def wrapper(self, other): isinstance(other, compat.string_types)): other = _to_m8(other, tz=self.tz) result = func(other) - if com.isnull(other): + if isnull(other): result.fill(nat_result) else: if isinstance(other, list): @@ -109,7 +120,7 @@ def wrapper(self, other): result[self._isnan] = nat_result # support of bool dtype indexers - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result return Index(result) @@ -277,7 +288,7 @@ def __new__(cls, data=None, ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): - if lib.isscalar(data): + if is_scalar(data): raise ValueError('DatetimeIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) @@ -537,7 +548,7 @@ def _generate(cls, start, end, periods, name, offset, index = _generate_regular_range(start, end, periods, offset) if tz is not None and getattr(index, 'tz', None) is None: - index = tslib.tz_localize_to_utc(com._ensure_int64(index), tz, + index = tslib.tz_localize_to_utc(_ensure_int64(index), tz, ambiguous=ambiguous) index = index.view(_NS_DTYPE) @@ -601,7 +612,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, return cls(values, name=name, freq=freq, tz=tz, dtype=dtype, **kwargs).values elif not is_datetime64_dtype(values): - values = com._ensure_int64(values).view(_NS_DTYPE) + values = _ensure_int64(values).view(_NS_DTYPE) result = object.__new__(cls) result._data = values @@ -1683,7 +1694,7 @@ def inferred_type(self): def dtype(self): if self.tz is None: return _NS_DTYPE - return com.DatetimeTZDtype('ns', self.tz) + return DatetimeTZDtype('ns', self.tz) @property def is_all_dates(self): @@ -1787,9 +1798,9 @@ def delete(self, loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: - if com.is_list_like(loc): + if is_list_like(loc): loc = lib.maybe_indices_to_slice( - com._ensure_int64(np.array(loc)), len(self)) + _ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index d0b1fd746d0d5..f12ba8083f545 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -3,9 +3,9 @@ from pandas import compat import numpy as np +from pandas.types.generic import ABCSeries, ABCDatetimeIndex, ABCPeriod from pandas.tseries.tools import to_datetime, normalize_date -from pandas.core.common import (ABCSeries, ABCDatetimeIndex, ABCPeriod, - AbstractMethodError) +from pandas.core.common import AbstractMethodError # import after tools, dateutil check from dateutil.relativedelta import relativedelta, weekday diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 750e7a5553ef6..45f634050a5d8 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -1,6 +1,24 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta import numpy as np + + +from pandas.core import common as com +from pandas.types.common import (is_integer, + is_float, + is_object_dtype, + is_integer_dtype, + is_float_dtype, + is_scalar, + is_timedelta64_dtype, + is_bool_dtype, + _ensure_int64, + _ensure_object) + +from pandas.types.generic import ABCSeries +from pandas.types.missing import isnull + + import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries.index import DatetimeIndex, Int64Index, Index @@ -17,15 +35,10 @@ from pandas.core.base import _shared_docs from pandas.indexes.base import _index_shared_docs -import pandas.core.common as com -from pandas.core.common import ( - _maybe_box, _values_from_object, ABCSeries, is_float, is_integer, - is_integer_dtype, is_object_dtype, isnull) from pandas import compat from pandas.compat.numpy import function as nv from pandas.util.decorators import Appender, cache_readonly, Substitution from pandas.lib import Timedelta -import pandas.lib as lib import pandas.tslib as tslib import pandas.core.missing as missing from pandas.compat import zip, u @@ -209,7 +222,7 @@ def _generate_range(cls, start, end, periods, freq, fields): def _from_arraylike(cls, data, freq, tz): if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): - if lib.isscalar(data) or isinstance(data, Period): + if is_scalar(data) or isinstance(data, Period): raise ValueError('PeriodIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) @@ -219,13 +232,13 @@ def _from_arraylike(cls, data, freq, tz): data = list(data) try: - data = com._ensure_int64(data) + data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') data = np.array([Period(x, freq=freq).ordinal for x in data], dtype=np.int64) except (TypeError, ValueError): - data = com._ensure_object(data) + data = _ensure_object(data) if freq is None: freq = period.extract_freq(data) @@ -242,7 +255,7 @@ def _from_arraylike(cls, data, freq, tz): base1, base2, 1) else: - if freq is None and com.is_object_dtype(data): + if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals freq = period.extract_freq(data) data = period.extract_ordinals(data, freq) @@ -256,9 +269,9 @@ def _from_arraylike(cls, data, freq, tz): data = dt64arr_to_periodarr(data, freq, tz) else: try: - data = com._ensure_int64(data) + data = _ensure_int64(data) except (TypeError, ValueError): - data = com._ensure_object(data) + data = _ensure_object(data) data = period.extract_ordinals(data, freq) return data, freq @@ -266,9 +279,9 @@ def _from_arraylike(cls, data, freq, tz): @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): - if not com.is_integer_dtype(values): + if not is_integer_dtype(values): values = np.array(values, copy=False) - if (len(values) > 0 and com.is_float_dtype(values)): + if (len(values) > 0 and is_float_dtype(values)): raise TypeError("PeriodIndex can't take floats") else: return PeriodIndex(values, name=name, freq=freq, **kwargs) @@ -339,7 +352,7 @@ def __array_wrap__(self, result, context=None): # from here because numpy catches. raise ValueError(msg.format(func.__name__)) - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result return PeriodIndex(result, freq=self.freq, name=self.name) @@ -580,9 +593,9 @@ def _maybe_convert_timedelta(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) elif isinstance(other, np.ndarray): - if com.is_integer_dtype(other): + if is_integer_dtype(other): return other - elif com.is_timedelta64_dtype(other): + elif is_timedelta64_dtype(other): offset = frequencies.to_offset(self.freq) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) @@ -657,10 +670,11 @@ def get_value(self, series, key): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ - s = _values_from_object(series) + s = com._values_from_object(series) try: - return _maybe_box(self, super(PeriodIndex, self).get_value(s, key), - series, key) + return com._maybe_box(self, + super(PeriodIndex, self).get_value(s, key), + series, key) except (KeyError, IndexError): try: asdt, parsed, reso = parse_time_string(key, self.freq) @@ -683,16 +697,16 @@ def get_value(self, series, key): return series[key] elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal - return _maybe_box(self, self._engine.get_value(s, key), - series, key) + return com._maybe_box(self, self._engine.get_value(s, key), + series, key) else: raise KeyError(key) except TypeError: pass key = Period(key, self.freq).ordinal - return _maybe_box(self, self._engine.get_value(s, key), - series, key) + return com._maybe_box(self, self._engine.get_value(s, key), + series, key) def get_indexer(self, target, method=None, limit=None, tolerance=None): if hasattr(target, 'freq') and target.freq != self.freq: @@ -849,7 +863,7 @@ def _apply_meta(self, rawarr): def __getitem__(self, key): getitem = self._data.__getitem__ - if lib.isscalar(key): + if is_scalar(key): val = getitem(key) return Period(ordinal=val, freq=self.freq) else: diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index dbc0078b67ae7..f9fb51ebf710c 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -2,11 +2,20 @@ from datetime import timedelta import numpy as np -from pandas.core.common import (ABCSeries, _TD_DTYPE, _maybe_box, - _values_from_object, isnull, - is_integer, is_float, is_integer_dtype, - is_object_dtype, is_timedelta64_dtype, - is_timedelta64_ns_dtype) +from pandas.types.common import (_TD_DTYPE, + is_integer, is_float, + is_bool_dtype, + is_list_like, + is_scalar, + is_integer_dtype, + is_object_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + _ensure_int64) +from pandas.types.missing import isnull +from pandas.types.generic import ABCSeries +from pandas.core.common import _maybe_box, _values_from_object + from pandas.core.index import Index, Int64Index import pandas.compat as compat from pandas.compat import u @@ -44,10 +53,10 @@ def wrapper(self, other): # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) - if com.isnull(other): + if isnull(other): result.fill(nat_result) else: - if not com.is_list_like(other): + if not is_list_like(other): raise TypeError(msg.format(type(other))) other = TimedeltaIndex(other).values @@ -66,7 +75,7 @@ def wrapper(self, other): result[self._isnan] = nat_result # support of bool dtype indexers - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result return Index(result) @@ -175,7 +184,7 @@ def __new__(cls, data=None, unit=None, data = to_timedelta(data, unit=unit, box=False) if not isinstance(data, (np.ndarray, Index, ABCSeries)): - if lib.isscalar(data): + if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, %s was passed' % repr(data)) @@ -261,7 +270,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): if values.dtype == np.object_: values = tslib.array_to_timedelta64(values) if values.dtype != _TD_DTYPE: - values = com._ensure_int64(values).view(_TD_DTYPE) + values = _ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values @@ -905,9 +914,9 @@ def delete(self, loc): if loc in (0, -len(self), -1, len(self) - 1): freq = self.freq else: - if com.is_list_like(loc): + if is_list_like(loc): loc = lib.maybe_indices_to_slice( - com._ensure_int64(np.array(loc)), len(self)) + _ensure_int64(np.array(loc)), len(self)) if isinstance(loc, slice) and loc.step in (1, None): if (loc.start in (0, None) or loc.stop in (len(self), None)): freq = self.freq diff --git a/pandas/tseries/tests/test_bin_groupby.py b/pandas/tseries/tests/test_bin_groupby.py index 6b6c468b7c391..08c0833be0cd6 100644 --- a/pandas/tseries/tests/test_bin_groupby.py +++ b/pandas/tseries/tests/test_bin_groupby.py @@ -3,12 +3,12 @@ from numpy import nan import numpy as np +from pandas.types.common import _ensure_int64 from pandas import Index, isnull from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm import pandas.lib as lib import pandas.algos as algos -from pandas.core import common as com def test_series_grouper(): @@ -90,8 +90,8 @@ def _check(dtype): bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - labels = com._ensure_int64(np.repeat(np.arange(3), - np.diff(np.r_[0, bins]))) + labels = _ensure_int64(np.repeat(np.arange(3), + np.diff(np.r_[0, bins]))) func = getattr(algos, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 807fb86b1b4da..591fa19aad585 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -4326,10 +4326,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='period[D]') val = series[3] - self.assertTrue(com.isnull(val)) + self.assertTrue(isnull(val)) series[2] = val - self.assertTrue(com.isnull(series[2])) + self.assertTrue(isnull(series[2])) def test_NaT_cast(self): result = Series([np.nan]).astype('period[D]') diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 2236d20975eee..518f69485004c 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -11,10 +11,11 @@ import pandas.util.testing as tm from pandas import (Series, DataFrame, Panel, Index, isnull, notnull, Timestamp) + +from pandas.types.generic import ABCSeries, ABCDataFrame from pandas.compat import range, lrange, zip, product, OrderedDict from pandas.core.base import SpecificationError -from pandas.core.common import (ABCSeries, ABCDataFrame, - UnsupportedFunctionCall) +from pandas.core.common import UnsupportedFunctionCall from pandas.core.groupby import DataError from pandas.tseries.frequencies import MONTHS, DAYS from pandas.tseries.frequencies import to_offset diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index e594d31e57296..299ec374567e7 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -12,6 +12,7 @@ import pandas.lib as lib import pandas.tslib as tslib +from pandas.types.common import is_datetime64_ns_dtype import pandas as pd import pandas.compat as compat import pandas.core.common as com @@ -2282,7 +2283,7 @@ def test_to_datetime_tz_psycopg2(self): i = pd.DatetimeIndex([ '2000-01-01 08:00:00+00:00' ], tz=psycopg2.tz.FixedOffsetTimezone(offset=-300, name=None)) - self.assertFalse(com.is_datetime64_ns_dtype(i)) + self.assertFalse(is_datetime64_ns_dtype(i)) # tz coerceion result = pd.to_datetime(i, errors='coerce') diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 71a041d5139a2..470aafafec547 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -5,6 +5,7 @@ import numpy as np import pytz +from pandas.types.dtypes import DatetimeTZDtype from pandas import (Index, Series, DataFrame, isnull, Timestamp) from pandas import DatetimeIndex, to_datetime, NaT @@ -17,7 +18,6 @@ from pytz import NonExistentTimeError import pandas.util.testing as tm -from pandas.types.api import DatetimeTZDtype from pandas.util.testing import assert_frame_equal, set_timezone from pandas.compat import lrange, zip diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 5a28218500858..7f28ec86ec40d 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -4,9 +4,11 @@ import numpy as np import pandas.tslib as tslib -from pandas.core.common import (ABCSeries, is_integer_dtype, - is_timedelta64_dtype, is_list_like, - _ensure_object, ABCIndexClass) +from pandas.types.common import (_ensure_object, + is_integer_dtype, + is_timedelta64_dtype, + is_list_like) +from pandas.types.generic import ABCSeries, ABCIndexClass from pandas.util.decorators import deprecate_kwarg diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index efb8590dfccf4..067e8ec19f644 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -4,8 +4,17 @@ import pandas.lib as lib import pandas.tslib as tslib -import pandas.core.common as com -from pandas.core.common import ABCIndexClass, ABCSeries, ABCDataFrame + +from pandas.types.common import (_ensure_object, + is_datetime64_ns_dtype, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_integer_dtype, + is_list_like) +from pandas.types.generic import (ABCIndexClass, ABCSeries, + ABCDataFrame) +from pandas.types.missing import notnull + import pandas.compat as compat from pandas.util.decorators import deprecate_kwarg @@ -161,7 +170,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, def _guess_datetime_format_for_array(arr, **kwargs): # Try to guess the format based on the first non-NaN element - non_nan_elements = com.notnull(arr).nonzero()[0] + non_nan_elements = notnull(arr).nonzero()[0] if len(non_nan_elements): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) @@ -307,7 +316,7 @@ def _convert_listlike(arg, box, format, name=None): arg = np.array(arg, dtype='O') # these are shortcutable - if com.is_datetime64_ns_dtype(arg): + if is_datetime64_ns_dtype(arg): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None, @@ -317,7 +326,7 @@ def _convert_listlike(arg, box, format, name=None): return arg - elif com.is_datetime64tz_dtype(arg): + elif is_datetime64tz_dtype(arg): if not isinstance(arg, DatetimeIndex): return DatetimeIndex(arg, tz='utc' if utc else None) if utc: @@ -342,7 +351,7 @@ def _convert_listlike(arg, box, format, name=None): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = com._ensure_object(arg) + arg = _ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: @@ -399,7 +408,7 @@ def _convert_listlike(arg, box, format, name=None): require_iso8601=require_iso8601 ) - if com.is_datetime64_dtype(result) and box: + if is_datetime64_dtype(result) and box: result = DatetimeIndex(result, tz='utc' if utc else None, name=name) @@ -424,7 +433,7 @@ def _convert_listlike(arg, box, format, name=None): return _assemble_from_unit_mappings(arg, errors=errors) elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, box, format, name=arg.name) - elif com.is_list_like(arg): + elif is_list_like(arg): return _convert_listlike(arg, box, format) return _convert_listlike(np.array([arg]), box, format)[0] @@ -511,7 +520,7 @@ def coerce(values): values = to_numeric(values, errors=errors) # prevent overflow in case of int8 or int16 - if com.is_integer_dtype(values): + if is_integer_dtype(values): values = values.astype('int64', copy=False) return values @@ -574,7 +583,7 @@ def calc_with_mask(carg, mask): # a float with actual np.nan try: carg = arg.astype(np.float64) - return calc_with_mask(carg, com.notnull(carg)) + return calc_with_mask(carg, notnull(carg)) except: pass @@ -654,7 +663,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): def _guess_time_format_for_array(arr): # Try to guess the format based on the first non-NaN element - non_nan_elements = com.notnull(arr).nonzero()[0] + non_nan_elements = notnull(arr).nonzero()[0] if len(non_nan_elements): element = arr[non_nan_elements[0]] for time_format in _time_formats: @@ -705,7 +714,7 @@ def _convert_listlike(arg, format): raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') - arg = com._ensure_object(arg) + arg = _ensure_object(arg) if infer_time_format and format is None: format = _guess_time_format_for_array(arg) @@ -762,7 +771,7 @@ def _convert_listlike(arg, format): return Series(values, index=arg.index, name=arg.name) elif isinstance(arg, ABCIndexClass): return _convert_listlike(arg, format) - elif com.is_list_like(arg): + elif is_list_like(arg): return _convert_listlike(arg, format) return _convert_listlike(np.array([arg]), format)[0] diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index 7e314657cb25c..98a93d22b09a6 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,6 +1,6 @@ from pandas.compat import lrange import numpy as np -import pandas.core.common as com +from pandas.types.common import _ensure_platform_int from pandas.core.frame import DataFrame import pandas.core.nanops as nanops @@ -69,7 +69,7 @@ def pivot_annual(series, freq=None): raise NotImplementedError(freq) flat_index = (year - years.min()) * width + offset - flat_index = com._ensure_platform_int(flat_index) + flat_index = _ensure_platform_int(flat_index) values = np.empty((len(years), width)) values.fill(np.nan) diff --git a/pandas/types/api.py b/pandas/types/api.py index 721d8d29bba8b..2d68e041f632e 100644 --- a/pandas/types/api.py +++ b/pandas/types/api.py @@ -1,75 +1,54 @@ # flake8: noqa import numpy as np -from pandas.compat import string_types -from .dtypes import (CategoricalDtype, CategoricalDtypeType, - DatetimeTZDtype, DatetimeTZDtypeType) -from .generic import (ABCIndex, ABCInt64Index, ABCRangeIndex, - ABCFloat64Index, ABCMultiIndex, - ABCDatetimeIndex, - ABCTimedeltaIndex, ABCPeriodIndex, - ABCCategoricalIndex, - ABCIndexClass, - ABCSeries, ABCDataFrame, ABCPanel, - ABCSparseSeries, ABCSparseArray, - ABCCategorical, ABCPeriod, - ABCGeneric) - -def pandas_dtype(dtype): - """ - Converts input into a pandas only dtype object or a numpy dtype object. - - Parameters - ---------- - dtype : object to be converted - - Returns - ------- - np.dtype or a pandas dtype - """ - if isinstance(dtype, DatetimeTZDtype): - return dtype - elif isinstance(dtype, CategoricalDtype): - return dtype - elif isinstance(dtype, string_types): - try: - return DatetimeTZDtype.construct_from_string(dtype) - except TypeError: - pass - - try: - return CategoricalDtype.construct_from_string(dtype) - except TypeError: - pass - - return np.dtype(dtype) - -def na_value_for_dtype(dtype): - """ - Return a dtype compat na value - - Parameters - ---------- - dtype : string / dtype - - Returns - ------- - dtype compat na value - """ - - from pandas.core import common as com - from pandas import NaT - dtype = pandas_dtype(dtype) - - if (com.is_datetime64_dtype(dtype) or - com.is_datetime64tz_dtype(dtype) or - com.is_timedelta64_dtype(dtype)): - return NaT - elif com.is_float_dtype(dtype): - return np.nan - elif com.is_integer_dtype(dtype): - return 0 - elif com.is_bool_dtype(dtype): - return False - return np.nan +from .common import (pandas_dtype, + is_dtype_equal, + is_extension_type, + + # categorical + is_categorical, + is_categorical_dtype, + + # datetimelike + is_datetimetz, + is_datetime64_dtype, + is_datetime64tz_dtype, + is_datetime64_any_dtype, + is_datetime64_ns_dtype, + is_timedelta64_dtype, + is_timedelta64_ns_dtype, + + # string-like + is_string_dtype, + is_object_dtype, + + # sparse + is_sparse, + + # numeric types + is_scalar, + is_sparse, + is_bool, + is_integer, + is_float, + is_complex, + is_number, + is_any_int_dtype, + is_integer_dtype, + is_int64_dtype, + is_numeric_dtype, + is_float_dtype, + is_floating_dtype, + is_bool_dtype, + is_complex_dtype, + + # like + is_re, + is_re_compilable, + is_dict_like, + is_iterator, + is_list_like, + is_hashable, + is_named_tuple, + is_sequence) diff --git a/pandas/types/cast.py b/pandas/types/cast.py new file mode 100644 index 0000000000000..e55cb91d36430 --- /dev/null +++ b/pandas/types/cast.py @@ -0,0 +1,860 @@ +""" routings for casting """ + +from datetime import datetime, timedelta +import numpy as np +from pandas import lib, tslib +from pandas.tslib import iNaT +from pandas.compat import string_types, text_type, PY3 +from .common import (_ensure_object, is_bool, is_integer, is_float, + is_complex, is_datetimetz, is_categorical_dtype, + is_extension_type, is_object_dtype, + is_datetime64tz_dtype, is_datetime64_dtype, + is_timedelta64_dtype, is_dtype_equal, + is_float_dtype, is_complex_dtype, + is_integer_dtype, is_datetime_or_timedelta_dtype, + is_scalar, + _string_dtypes, + _coerce_to_dtype, + _ensure_int8, _ensure_int16, + _ensure_int32, _ensure_int64, + _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE, + _DATELIKE_DTYPES, _POSSIBLY_CAST_DTYPES) +from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries +from .missing import isnull, notnull +from .inference import is_list_like + +_int8_max = np.iinfo(np.int8).max +_int16_max = np.iinfo(np.int16).max +_int32_max = np.iinfo(np.int32).max +_int64_max = np.iinfo(np.int64).max + + +def _possibly_convert_platform(values): + """ try to do platform conversion, allow ndarray or list here """ + + if isinstance(values, (list, tuple)): + values = lib.list_to_object_array(values) + if getattr(values, 'dtype', None) == np.object_: + if hasattr(values, '_values'): + values = values._values + values = lib.maybe_convert_objects(values) + + return values + + +def _possibly_downcast_to_dtype(result, dtype): + """ try to cast to the specified dtype (e.g. convert back to bool/int + or could be an astype of float64->float32 + """ + + if is_scalar(result): + return result + + def trans(x): + return x + + if isinstance(dtype, string_types): + if dtype == 'infer': + inferred_type = lib.infer_dtype(_ensure_object(result.ravel())) + if inferred_type == 'boolean': + dtype = 'bool' + elif inferred_type == 'integer': + dtype = 'int64' + elif inferred_type == 'datetime64': + dtype = 'datetime64[ns]' + elif inferred_type == 'timedelta64': + dtype = 'timedelta64[ns]' + + # try to upcast here + elif inferred_type == 'floating': + dtype = 'int64' + if issubclass(result.dtype.type, np.number): + + def trans(x): # noqa + return x.round() + else: + dtype = 'object' + + if isinstance(dtype, string_types): + dtype = np.dtype(dtype) + + try: + + # don't allow upcasts here (except if empty) + if dtype.kind == result.dtype.kind: + if (result.dtype.itemsize <= dtype.itemsize and + np.prod(result.shape)): + return result + + if issubclass(dtype.type, np.floating): + return result.astype(dtype) + elif dtype == np.bool_ or issubclass(dtype.type, np.integer): + + # if we don't have any elements, just astype it + if not np.prod(result.shape): + return trans(result).astype(dtype) + + # do a test on the first element, if it fails then we are done + r = result.ravel() + arr = np.array([r[0]]) + + # if we have any nulls, then we are done + if isnull(arr).any() or not np.allclose(arr, + trans(arr).astype(dtype)): + return result + + # a comparable, e.g. a Decimal may slip in here + elif not isinstance(r[0], (np.integer, np.floating, np.bool, int, + float, bool)): + return result + + if (issubclass(result.dtype.type, (np.object_, np.number)) and + notnull(result).all()): + new_result = trans(result).astype(dtype) + try: + if np.allclose(new_result, result): + return new_result + except: + + # comparison of an object dtype with a number type could + # hit here + if (new_result == result).all(): + return new_result + + # a datetimelike + elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i']: + try: + result = result.astype(dtype) + except: + if dtype.tz: + # convert to datetime and change timezone + from pandas import to_datetime + result = to_datetime(result).tz_localize(dtype.tz) + + except: + pass + + return result + + +def _maybe_upcast_putmask(result, mask, other): + """ + A safe version of putmask that potentially upcasts the result + + Parameters + ---------- + result : ndarray + The destination array. This will be mutated in-place if no upcasting is + necessary. + mask : boolean ndarray + other : ndarray or scalar + The source array or value + + Returns + ------- + result : ndarray + changed : boolean + Set to true if the result array was upcasted + """ + + if mask.any(): + # Two conversions for date-like dtypes that can't be done automatically + # in np.place: + # NaN -> NaT + # integer or integer array -> date-like array + if result.dtype in _DATELIKE_DTYPES: + if is_scalar(other): + if isnull(other): + other = result.dtype.type('nat') + elif is_integer(other): + other = np.array(other, dtype=result.dtype) + elif is_integer_dtype(other): + other = np.array(other, dtype=result.dtype) + + def changeit(): + + # try to directly set by expanding our array to full + # length of the boolean + try: + om = other[mask] + om_at = om.astype(result.dtype) + if (om == om_at).all(): + new_result = result.values.copy() + new_result[mask] = om_at + result[:] = new_result + return result, False + except: + pass + + # we are forced to change the dtype of the result as the input + # isn't compatible + r, _ = _maybe_upcast(result, fill_value=other, copy=True) + np.place(r, mask, other) + + return r, True + + # we want to decide whether place will work + # if we have nans in the False portion of our mask then we need to + # upcast (possibly), otherwise we DON't want to upcast (e.g. if we + # have values, say integers, in the success portion then it's ok to not + # upcast) + new_dtype, _ = _maybe_promote(result.dtype, other) + if new_dtype != result.dtype: + + # we have a scalar or len 0 ndarray + # and its nan and we are changing some values + if (is_scalar(other) or + (isinstance(other, np.ndarray) and other.ndim < 1)): + if isnull(other): + return changeit() + + # we have an ndarray and the masking has nans in it + else: + + if isnull(other[mask]).any(): + return changeit() + + try: + np.place(result, mask, other) + except: + return changeit() + + return result, False + + +def _maybe_promote(dtype, fill_value=np.nan): + + # if we passed an array here, determine the fill value by dtype + if isinstance(fill_value, np.ndarray): + if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): + fill_value = iNaT + else: + + # we need to change to object type as our + # fill_value is of object type + if fill_value.dtype == np.object_: + dtype = np.dtype(np.object_) + fill_value = np.nan + + # returns tuple of (dtype, fill_value) + if issubclass(dtype.type, (np.datetime64, np.timedelta64)): + # for now: refuse to upcast datetime64 + # (this is because datetime64 will not implicitly upconvert + # to object correctly as of numpy 1.6.1) + if isnull(fill_value): + fill_value = iNaT + else: + if issubclass(dtype.type, np.datetime64): + try: + fill_value = lib.Timestamp(fill_value).value + except: + # the proper thing to do here would probably be to upcast + # to object (but numpy 1.6.1 doesn't do this properly) + fill_value = iNaT + elif issubclass(dtype.type, np.timedelta64): + try: + fill_value = lib.Timedelta(fill_value).value + except: + # as for datetimes, cannot upcast to object + fill_value = iNaT + else: + fill_value = iNaT + elif is_datetimetz(dtype): + if isnull(fill_value): + fill_value = iNaT + elif is_float(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif issubclass(dtype.type, np.integer): + dtype = np.float64 + elif is_bool(fill_value): + if not issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif is_integer(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif issubclass(dtype.type, np.integer): + # upcast to prevent overflow + arr = np.asarray(fill_value) + if arr != arr.astype(dtype): + dtype = arr.dtype + elif is_complex(fill_value): + if issubclass(dtype.type, np.bool_): + dtype = np.object_ + elif issubclass(dtype.type, (np.integer, np.floating)): + dtype = np.complex128 + elif fill_value is None: + if is_float_dtype(dtype) or is_complex_dtype(dtype): + fill_value = np.nan + elif is_integer_dtype(dtype): + dtype = np.float64 + fill_value = np.nan + elif is_datetime_or_timedelta_dtype(dtype): + fill_value = iNaT + else: + dtype = np.object_ + else: + dtype = np.object_ + + # in case we have a string that looked like a number + if is_categorical_dtype(dtype): + pass + elif is_datetimetz(dtype): + pass + elif issubclass(np.dtype(dtype).type, string_types): + dtype = np.object_ + + return dtype, fill_value + + +def _infer_dtype_from_scalar(val): + """ interpret the dtype from a scalar """ + + dtype = np.object_ + + # a 1-element ndarray + if isinstance(val, np.ndarray): + if val.ndim != 0: + raise ValueError( + "invalid ndarray passed to _infer_dtype_from_scalar") + + dtype = val.dtype + val = val.item() + + elif isinstance(val, string_types): + + # If we create an empty array using a string to infer + # the dtype, NumPy will only allocate one character per entry + # so this is kind of bad. Alternately we could use np.repeat + # instead of np.empty (but then you still don't want things + # coming out as np.str_! + + dtype = np.object_ + + elif isinstance(val, (np.datetime64, + datetime)) and getattr(val, 'tzinfo', None) is None: + val = lib.Timestamp(val).value + dtype = np.dtype('M8[ns]') + + elif isinstance(val, (np.timedelta64, timedelta)): + val = lib.Timedelta(val).value + dtype = np.dtype('m8[ns]') + + elif is_bool(val): + dtype = np.bool_ + + elif is_integer(val): + if isinstance(val, np.integer): + dtype = type(val) + else: + dtype = np.int64 + + elif is_float(val): + if isinstance(val, np.floating): + dtype = type(val) + else: + dtype = np.float64 + + elif is_complex(val): + dtype = np.complex_ + + return dtype, val + + +def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): + """ provide explict type promotion and coercion + + Parameters + ---------- + values : the ndarray that we want to maybe upcast + fill_value : what we want to fill with + dtype : if None, then use the dtype of the values, else coerce to this type + copy : if True always make a copy even if no upcast is required + """ + + if is_extension_type(values): + if copy: + values = values.copy() + else: + if dtype is None: + dtype = values.dtype + new_dtype, fill_value = _maybe_promote(dtype, fill_value) + if new_dtype != values.dtype: + values = values.astype(new_dtype) + elif copy: + values = values.copy() + + return values, fill_value + + +def _possibly_cast_item(obj, item, dtype): + chunk = obj[item] + + if chunk.values.dtype != dtype: + if dtype in (np.object_, np.bool_): + obj[item] = chunk.astype(np.object_) + elif not issubclass(dtype, (np.integer, np.bool_)): # pragma: no cover + raise ValueError("Unexpected dtype encountered: %s" % dtype) + + +def _invalidate_string_dtypes(dtype_set): + """Change string like dtypes to object for + ``DataFrame.select_dtypes()``. + """ + non_string_dtypes = dtype_set - _string_dtypes + if non_string_dtypes != dtype_set: + raise TypeError("string dtypes are not allowed, use 'object' instead") + + +def _maybe_convert_string_to_object(values): + """ + + Convert string-like and string-like array to convert object dtype. + This is to avoid numpy to handle the array as str dtype. + """ + if isinstance(values, string_types): + values = np.array([values], dtype=object) + elif (isinstance(values, np.ndarray) and + issubclass(values.dtype.type, (np.string_, np.unicode_))): + values = values.astype(object) + return values + + +def _maybe_convert_scalar(values): + """ + Convert a python scalar to the appropriate numpy dtype if possible + This avoids numpy directly converting according to platform preferences + """ + if is_scalar(values): + dtype, values = _infer_dtype_from_scalar(values) + try: + values = dtype(values) + except TypeError: + pass + return values + + +def _coerce_indexer_dtype(indexer, categories): + """ coerce the indexer input array to the smallest dtype possible """ + l = len(categories) + if l < _int8_max: + return _ensure_int8(indexer) + elif l < _int16_max: + return _ensure_int16(indexer) + elif l < _int32_max: + return _ensure_int32(indexer) + return _ensure_int64(indexer) + + +def _coerce_to_dtypes(result, dtypes): + """ + given a dtypes and a result set, coerce the result elements to the + dtypes + """ + if len(result) != len(dtypes): + raise AssertionError("_coerce_to_dtypes requires equal len arrays") + + from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type + + def conv(r, dtype): + try: + if isnull(r): + pass + elif dtype == _NS_DTYPE: + r = lib.Timestamp(r) + elif dtype == _TD_DTYPE: + r = _coerce_scalar_to_timedelta_type(r) + elif dtype == np.bool_: + # messy. non 0/1 integers do not get converted. + if is_integer(r) and r not in [0, 1]: + return int(r) + r = bool(r) + elif dtype.kind == 'f': + r = float(r) + elif dtype.kind == 'i': + r = int(r) + except: + pass + + return r + + return [conv(r, dtype) for r, dtype in zip(result, dtypes)] + + +def _astype_nansafe(arr, dtype, copy=True): + """ return a view if copy is False, but + need to be very careful as the result shape could change! """ + if not isinstance(dtype, np.dtype): + dtype = _coerce_to_dtype(dtype) + + if issubclass(dtype.type, text_type): + # in Py3 that's str, in Py2 that's unicode + return lib.astype_unicode(arr.ravel()).reshape(arr.shape) + elif issubclass(dtype.type, string_types): + return lib.astype_str(arr.ravel()).reshape(arr.shape) + elif is_datetime64_dtype(arr): + if dtype == object: + return tslib.ints_to_pydatetime(arr.view(np.int64)) + elif dtype == np.int64: + return arr.view(dtype) + elif dtype != _NS_DTYPE: + raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % + (arr.dtype, dtype)) + return arr.astype(_NS_DTYPE) + elif is_timedelta64_dtype(arr): + if dtype == np.int64: + return arr.view(dtype) + elif dtype == object: + return tslib.ints_to_pytimedelta(arr.view(np.int64)) + + # in py3, timedelta64[ns] are int64 + elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or + (not PY3 and dtype != _TD_DTYPE)): + + # allow frequency conversions + if dtype.kind == 'm': + mask = isnull(arr) + result = arr.astype(dtype).astype(np.float64) + result[mask] = np.nan + return result + + raise TypeError("cannot astype a timedelta from [%s] to [%s]" % + (arr.dtype, dtype)) + + return arr.astype(_TD_DTYPE) + elif (np.issubdtype(arr.dtype, np.floating) and + np.issubdtype(dtype, np.integer)): + + if np.isnan(arr).any(): + raise ValueError('Cannot convert NA to integer') + elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): + # work around NumPy brokenness, #1987 + return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + + if copy: + return arr.astype(dtype) + return arr.view(dtype) + + +def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, + convert_timedeltas=True, copy=True): + """ if we have an object dtype, try to coerce dates and/or numbers """ + + # if we have passed in a list or scalar + if isinstance(values, (list, tuple)): + values = np.array(values, dtype=np.object_) + if not hasattr(values, 'dtype'): + values = np.array([values], dtype=np.object_) + + # convert dates + if convert_dates and values.dtype == np.object_: + + # we take an aggressive stance and convert to datetime64[ns] + if convert_dates == 'coerce': + new_values = _possibly_cast_to_datetime(values, 'M8[ns]', + errors='coerce') + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + else: + values = lib.maybe_convert_objects(values, + convert_datetime=convert_dates) + + # convert timedeltas + if convert_timedeltas and values.dtype == np.object_: + + if convert_timedeltas == 'coerce': + from pandas.tseries.timedeltas import to_timedelta + new_values = to_timedelta(values, coerce=True) + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + else: + values = lib.maybe_convert_objects( + values, convert_timedelta=convert_timedeltas) + + # convert to numeric + if values.dtype == np.object_: + if convert_numeric: + try: + new_values = lib.maybe_convert_numeric(values, set(), + coerce_numeric=True) + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + except: + pass + else: + # soft-conversion + values = lib.maybe_convert_objects(values) + + values = values.copy() if copy else values + + return values + + +def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, + coerce=False, copy=True): + """ if we have an object dtype, try to coerce dates and/or numbers """ + + conversion_count = sum((datetime, numeric, timedelta)) + if conversion_count == 0: + raise ValueError('At least one of datetime, numeric or timedelta must ' + 'be True.') + elif conversion_count > 1 and coerce: + raise ValueError("Only one of 'datetime', 'numeric' or " + "'timedelta' can be True when when coerce=True.") + + if isinstance(values, (list, tuple)): + # List or scalar + values = np.array(values, dtype=np.object_) + elif not hasattr(values, 'dtype'): + values = np.array([values], dtype=np.object_) + elif not is_object_dtype(values.dtype): + # If not object, do not attempt conversion + values = values.copy() if copy else values + return values + + # If 1 flag is coerce, ensure 2 others are False + if coerce: + # Immediate return if coerce + if datetime: + from pandas import to_datetime + return to_datetime(values, errors='coerce', box=False) + elif timedelta: + from pandas import to_timedelta + return to_timedelta(values, errors='coerce', box=False) + elif numeric: + from pandas import to_numeric + return to_numeric(values, errors='coerce') + + # Soft conversions + if datetime: + values = lib.maybe_convert_objects(values, convert_datetime=datetime) + + if timedelta and is_object_dtype(values.dtype): + # Object check to ensure only run if previous did not convert + values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) + + if numeric and is_object_dtype(values.dtype): + try: + converted = lib.maybe_convert_numeric(values, set(), + coerce_numeric=True) + # If all NaNs, then do not-alter + values = converted if not isnull(converted).all() else values + values = values.copy() if copy else values + except: + pass + + return values + + +def _possibly_castable(arr): + # return False to force a non-fastpath + + # check datetime64[ns]/timedelta64[ns] are valid + # otherwise try to coerce + kind = arr.dtype.kind + if kind == 'M' or kind == 'm': + return arr.dtype in _DATELIKE_DTYPES + + return arr.dtype.name not in _POSSIBLY_CAST_DTYPES + + +def _possibly_infer_to_datetimelike(value, convert_dates=False): + """ + we might have a array (or single object) that is datetime like, + and no dtype is passed don't change the value unless we find a + datetime/timedelta set + + this is pretty strict in that a datetime/timedelta is REQUIRED + in addition to possible nulls/string likes + + ONLY strings are NOT datetimelike + + Parameters + ---------- + value : np.array / Series / Index / list-like + convert_dates : boolean, default False + if True try really hard to convert dates (such as datetime.date), other + leave inferred dtype 'date' alone + + """ + + if isinstance(value, (ABCDatetimeIndex, ABCPeriodIndex)): + return value + elif isinstance(value, ABCSeries): + if isinstance(value._values, ABCDatetimeIndex): + return value._values + + v = value + + if not is_list_like(v): + v = [v] + v = np.array(v, copy=False) + shape = v.shape + if not v.ndim == 1: + v = v.ravel() + + if len(v): + + def _try_datetime(v): + # safe coerce to datetime64 + try: + v = tslib.array_to_datetime(v, errors='raise') + except ValueError: + + # we might have a sequence of the same-datetimes with tz's + # if so coerce to a DatetimeIndex; if they are not the same, + # then these stay as object dtype + try: + from pandas import to_datetime + return to_datetime(v) + except: + pass + + except: + pass + + return v.reshape(shape) + + def _try_timedelta(v): + # safe coerce to timedelta64 + + # will try first with a string & object conversion + from pandas import to_timedelta + try: + return to_timedelta(v)._values.reshape(shape) + except: + return v + + # do a quick inference for perf + sample = v[:min(3, len(v))] + inferred_type = lib.infer_dtype(sample) + + if (inferred_type in ['datetime', 'datetime64'] or + (convert_dates and inferred_type in ['date'])): + value = _try_datetime(v) + elif inferred_type in ['timedelta', 'timedelta64']: + value = _try_timedelta(v) + + # It's possible to have nulls intermixed within the datetime or + # timedelta. These will in general have an inferred_type of 'mixed', + # so have to try both datetime and timedelta. + + # try timedelta first to avoid spurious datetime conversions + # e.g. '00:00:01' is a timedelta but technically is also a datetime + elif inferred_type in ['mixed']: + + if lib.is_possible_datetimelike_array(_ensure_object(v)): + value = _try_timedelta(v) + if lib.infer_dtype(value) in ['mixed']: + value = _try_datetime(v) + + return value + + +def _possibly_cast_to_datetime(value, dtype, errors='raise'): + """ try to cast the array/value to a datetimelike dtype, converting float + nan to iNaT + """ + from pandas.tseries.timedeltas import to_timedelta + from pandas.tseries.tools import to_datetime + + if dtype is not None: + if isinstance(dtype, string_types): + dtype = np.dtype(dtype) + + is_datetime64 = is_datetime64_dtype(dtype) + is_datetime64tz = is_datetime64tz_dtype(dtype) + is_timedelta64 = is_timedelta64_dtype(dtype) + + if is_datetime64 or is_datetime64tz or is_timedelta64: + + # force the dtype if needed + if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): + if dtype.name == 'datetime64[ns]': + dtype = _NS_DTYPE + else: + raise TypeError("cannot convert datetimelike to " + "dtype [%s]" % dtype) + elif is_datetime64tz: + + # our NaT doesn't support tz's + # this will coerce to DatetimeIndex with + # a matching dtype below + if is_scalar(value) and isnull(value): + value = [value] + + elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): + if dtype.name == 'timedelta64[ns]': + dtype = _TD_DTYPE + else: + raise TypeError("cannot convert timedeltalike to " + "dtype [%s]" % dtype) + + if is_scalar(value): + if value == tslib.iNaT or isnull(value): + value = tslib.iNaT + else: + value = np.array(value, copy=False) + + # have a scalar array-like (e.g. NaT) + if value.ndim == 0: + value = tslib.iNaT + + # we have an array of datetime or timedeltas & nulls + elif np.prod(value.shape) or not is_dtype_equal(value.dtype, + dtype): + try: + if is_datetime64: + value = to_datetime(value, errors=errors)._values + elif is_datetime64tz: + # input has to be UTC at this point, so just + # localize + value = to_datetime( + value, + errors=errors).tz_localize(dtype.tz) + elif is_timedelta64: + value = to_timedelta(value, errors=errors)._values + except (AttributeError, ValueError, TypeError): + pass + + # coerce datetimelike to object + elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype): + if is_object_dtype(dtype): + ints = np.asarray(value).view('i8') + return tslib.ints_to_pydatetime(ints) + + # we have a non-castable dtype that was passed + raise TypeError('Cannot cast datetime64 to %s' % dtype) + + else: + + is_array = isinstance(value, np.ndarray) + + # catch a datetime/timedelta that is not of ns variety + # and no coercion specified + if is_array and value.dtype.kind in ['M', 'm']: + dtype = value.dtype + + if dtype.kind == 'M' and dtype != _NS_DTYPE: + value = value.astype(_NS_DTYPE) + + elif dtype.kind == 'm' and dtype != _TD_DTYPE: + value = to_timedelta(value) + + # only do this if we have an array and the dtype of the array is not + # setup already we are not an integer/object, so don't bother with this + # conversion + elif not (is_array and not (issubclass(value.dtype.type, np.integer) or + value.dtype == np.object_)): + value = _possibly_infer_to_datetimelike(value) + + return value diff --git a/pandas/types/common.py b/pandas/types/common.py new file mode 100644 index 0000000000000..9d0ccaac843ef --- /dev/null +++ b/pandas/types/common.py @@ -0,0 +1,448 @@ +""" common type operations """ + +import numpy as np +from pandas.compat import string_types, text_type, binary_type +from pandas import lib, algos +from .dtypes import (CategoricalDtype, CategoricalDtypeType, + DatetimeTZDtype, DatetimeTZDtypeType, + ExtensionDtype) +from .generic import (ABCCategorical, ABCPeriodIndex, + ABCDatetimeIndex, ABCSeries, + ABCSparseArray, ABCSparseSeries) +from .inference import is_integer, is_string_like +from .inference import * # noqa + + +_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name + for t in ['O', 'int8', 'uint8', 'int16', 'uint16', + 'int32', 'uint32', 'int64', 'uint64']]) + +_NS_DTYPE = np.dtype('M8[ns]') +_TD_DTYPE = np.dtype('m8[ns]') +_INT64_DTYPE = np.dtype(np.int64) +_DATELIKE_DTYPES = set([np.dtype(t) + for t in ['M8[ns]', 'M8[ns]', + 'm8[ns]', 'm8[ns]']]) + +_ensure_float64 = algos.ensure_float64 +_ensure_float32 = algos.ensure_float32 + + +def _ensure_float(arr): + if issubclass(arr.dtype.type, (np.integer, np.bool_)): + arr = arr.astype(float) + return arr + +_ensure_int64 = algos.ensure_int64 +_ensure_int32 = algos.ensure_int32 +_ensure_int16 = algos.ensure_int16 +_ensure_int8 = algos.ensure_int8 +_ensure_platform_int = algos.ensure_platform_int +_ensure_object = algos.ensure_object + + +def is_object_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.object_) + + +def is_sparse(array): + """ return if we are a sparse array """ + return isinstance(array, (ABCSparseArray, ABCSparseSeries)) + + +def is_categorical(array): + """ return if we are a categorical possibility """ + return isinstance(array, ABCCategorical) or is_categorical_dtype(array) + + +def is_datetimetz(array): + """ return if we are a datetime with tz array """ + return ((isinstance(array, ABCDatetimeIndex) and + getattr(array, 'tz', None) is not None) or + is_datetime64tz_dtype(array)) + + +def is_datetime64_dtype(arr_or_dtype): + try: + tipo = _get_dtype_type(arr_or_dtype) + except TypeError: + return False + return issubclass(tipo, np.datetime64) + + +def is_datetime64tz_dtype(arr_or_dtype): + return DatetimeTZDtype.is_dtype(arr_or_dtype) + + +def is_timedelta64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.timedelta64) + + +def is_categorical_dtype(arr_or_dtype): + return CategoricalDtype.is_dtype(arr_or_dtype) + + +def is_string_dtype(arr_or_dtype): + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('O', 'S', 'U') + + +def is_period_arraylike(arr): + """ return if we are period arraylike / PeriodIndex """ + if isinstance(arr, ABCPeriodIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return arr.dtype == object and lib.infer_dtype(arr) == 'period' + return getattr(arr, 'inferred_type', None) == 'period' + + +def is_datetime_arraylike(arr): + """ return if we are datetime arraylike / DatetimeIndex """ + if isinstance(arr, ABCDatetimeIndex): + return True + elif isinstance(arr, (np.ndarray, ABCSeries)): + return arr.dtype == object and lib.infer_dtype(arr) == 'datetime' + return getattr(arr, 'inferred_type', None) == 'datetime' + + +def is_datetimelike(arr): + return (arr.dtype in _DATELIKE_DTYPES or + isinstance(arr, ABCPeriodIndex) or + is_datetimetz(arr)) + + +def is_dtype_equal(source, target): + """ return a boolean if the dtypes are equal """ + try: + source = _get_dtype(source) + target = _get_dtype(target) + return source == target + except (TypeError, AttributeError): + + # invalid comparison + # object == category will hit this + return False + + +def is_any_int_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.integer) + + +def is_integer_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return (issubclass(tipo, np.integer) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + + +def is_int64_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.int64) + + +def is_int_or_datetime_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return (issubclass(tipo, np.integer) or + issubclass(tipo, (np.datetime64, np.timedelta64))) + + +def is_datetime64_any_dtype(arr_or_dtype): + return (is_datetime64_dtype(arr_or_dtype) or + is_datetime64tz_dtype(arr_or_dtype)) + + +def is_datetime64_ns_dtype(arr_or_dtype): + try: + tipo = _get_dtype(arr_or_dtype) + except TypeError: + return False + return tipo == _NS_DTYPE + + +def is_timedelta64_ns_dtype(arr_or_dtype): + tipo = _get_dtype(arr_or_dtype) + return tipo == _TD_DTYPE + + +def is_datetime_or_timedelta_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, (np.datetime64, np.timedelta64)) + + +def is_numeric_v_string_like(a, b): + """ + numpy doesn't like to compare numeric arrays vs scalar string-likes + + return a boolean result if this is the case for a,b or b,a + + """ + is_a_array = isinstance(a, np.ndarray) + is_b_array = isinstance(b, np.ndarray) + + is_a_numeric_array = is_a_array and is_numeric_dtype(a) + is_b_numeric_array = is_b_array and is_numeric_dtype(b) + is_a_string_array = is_a_array and is_string_like_dtype(a) + is_b_string_array = is_b_array and is_string_like_dtype(b) + + is_a_scalar_string_like = not is_a_array and is_string_like(a) + is_b_scalar_string_like = not is_b_array and is_string_like(b) + + return ((is_a_numeric_array and is_b_scalar_string_like) or + (is_b_numeric_array and is_a_scalar_string_like) or + (is_a_numeric_array and is_b_string_array) or + (is_b_numeric_array and is_a_string_array)) + + +def is_datetimelike_v_numeric(a, b): + # return if we have an i8 convertible and numeric comparison + if not hasattr(a, 'dtype'): + a = np.asarray(a) + if not hasattr(b, 'dtype'): + b = np.asarray(b) + + def is_numeric(x): + return is_integer_dtype(x) or is_float_dtype(x) + + is_datetimelike = needs_i8_conversion + return ((is_datetimelike(a) and is_numeric(b)) or + (is_datetimelike(b) and is_numeric(a))) + + +def is_datetimelike_v_object(a, b): + # return if we have an i8 convertible and object comparsion + if not hasattr(a, 'dtype'): + a = np.asarray(a) + if not hasattr(b, 'dtype'): + b = np.asarray(b) + + def f(x): + return is_object_dtype(x) + + def is_object(x): + return is_integer_dtype(x) or is_float_dtype(x) + + is_datetimelike = needs_i8_conversion + return ((is_datetimelike(a) and is_object(b)) or + (is_datetimelike(b) and is_object(a))) + + +def needs_i8_conversion(arr_or_dtype): + return (is_datetime_or_timedelta_dtype(arr_or_dtype) or + is_datetime64tz_dtype(arr_or_dtype)) + + +def is_numeric_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return (issubclass(tipo, (np.number, np.bool_)) and + not issubclass(tipo, (np.datetime64, np.timedelta64))) + + +def is_string_like_dtype(arr_or_dtype): + # exclude object as its a mixed dtype + dtype = _get_dtype(arr_or_dtype) + return dtype.kind in ('S', 'U') + + +def is_float_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.floating) + + +def is_floating_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return isinstance(tipo, np.floating) + + +def is_bool_dtype(arr_or_dtype): + try: + tipo = _get_dtype_type(arr_or_dtype) + except ValueError: + # this isn't even a dtype + return False + return issubclass(tipo, np.bool_) + + +def is_extension_type(value): + """ + if we are a klass that is preserved by the internals + these are internal klasses that we represent (and don't use a np.array) + """ + if is_categorical(value): + return True + elif is_sparse(value): + return True + elif is_datetimetz(value): + return True + return False + + +def is_complex_dtype(arr_or_dtype): + tipo = _get_dtype_type(arr_or_dtype) + return issubclass(tipo, np.complexfloating) + + +def _coerce_to_dtype(dtype): + """ coerce a string / np.dtype to a dtype """ + if is_categorical_dtype(dtype): + dtype = CategoricalDtype() + elif is_datetime64tz_dtype(dtype): + dtype = DatetimeTZDtype(dtype) + else: + dtype = np.dtype(dtype) + return dtype + + +def _get_dtype(arr_or_dtype): + if isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype) + elif isinstance(arr_or_dtype, CategoricalDtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, DatetimeTZDtype): + return arr_or_dtype + elif isinstance(arr_or_dtype, string_types): + if is_categorical_dtype(arr_or_dtype): + return CategoricalDtype.construct_from_string(arr_or_dtype) + elif is_datetime64tz_dtype(arr_or_dtype): + return DatetimeTZDtype.construct_from_string(arr_or_dtype) + + if hasattr(arr_or_dtype, 'dtype'): + arr_or_dtype = arr_or_dtype.dtype + return np.dtype(arr_or_dtype) + + +def _get_dtype_type(arr_or_dtype): + if isinstance(arr_or_dtype, np.dtype): + return arr_or_dtype.type + elif isinstance(arr_or_dtype, type): + return np.dtype(arr_or_dtype).type + elif isinstance(arr_or_dtype, CategoricalDtype): + return CategoricalDtypeType + elif isinstance(arr_or_dtype, DatetimeTZDtype): + return DatetimeTZDtypeType + elif isinstance(arr_or_dtype, string_types): + if is_categorical_dtype(arr_or_dtype): + return CategoricalDtypeType + elif is_datetime64tz_dtype(arr_or_dtype): + return DatetimeTZDtypeType + return _get_dtype_type(np.dtype(arr_or_dtype)) + try: + return arr_or_dtype.dtype.type + except AttributeError: + return type(None) + + +def _get_dtype_from_object(dtype): + """Get a numpy dtype.type-style object. This handles the datetime64[ns] + and datetime64[ns, TZ] compat + + Notes + ----- + If nothing can be found, returns ``object``. + """ + + # type object from a dtype + if isinstance(dtype, type) and issubclass(dtype, np.generic): + return dtype + elif is_categorical(dtype): + return CategoricalDtype().type + elif is_datetimetz(dtype): + return DatetimeTZDtype(dtype).type + elif isinstance(dtype, np.dtype): # dtype object + try: + _validate_date_like_dtype(dtype) + except TypeError: + # should still pass if we don't have a datelike + pass + return dtype.type + elif isinstance(dtype, string_types): + if dtype == 'datetime' or dtype == 'timedelta': + dtype += '64' + + try: + return _get_dtype_from_object(getattr(np, dtype)) + except (AttributeError, TypeError): + # handles cases like _get_dtype(int) + # i.e., python objects that are valid dtypes (unlike user-defined + # types, in general) + # TypeError handles the float16 typecode of 'e' + # further handle internal types + pass + + return _get_dtype_from_object(np.dtype(dtype)) + + +def _validate_date_like_dtype(dtype): + try: + typ = np.datetime_data(dtype)[0] + except ValueError as e: + raise TypeError('%s' % e) + if typ != 'generic' and typ != 'ns': + raise ValueError('%r is too specific of a frequency, try passing %r' % + (dtype.name, dtype.type.__name__)) + + +def _lcd_dtypes(a_dtype, b_dtype): + """ return the lcd dtype to hold these types """ + + if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype): + return _NS_DTYPE + elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype): + return _TD_DTYPE + elif is_complex_dtype(a_dtype): + if is_complex_dtype(b_dtype): + return a_dtype + return np.float64 + elif is_integer_dtype(a_dtype): + if is_integer_dtype(b_dtype): + if a_dtype.itemsize == b_dtype.itemsize: + return a_dtype + return np.int64 + return np.float64 + elif is_float_dtype(a_dtype): + if is_float_dtype(b_dtype): + if a_dtype.itemsize == b_dtype.itemsize: + return a_dtype + else: + return np.float64 + elif is_integer(b_dtype): + return np.float64 + return np.object + +_string_dtypes = frozenset(map(_get_dtype_from_object, (binary_type, + text_type))) + + +def pandas_dtype(dtype): + """ + Converts input into a pandas only dtype object or a numpy dtype object. + + Parameters + ---------- + dtype : object to be converted + + Returns + ------- + np.dtype or a pandas dtype + """ + if isinstance(dtype, DatetimeTZDtype): + return dtype + elif isinstance(dtype, CategoricalDtype): + return dtype + elif isinstance(dtype, string_types): + try: + return DatetimeTZDtype.construct_from_string(dtype) + except TypeError: + pass + + try: + return CategoricalDtype.construct_from_string(dtype) + except TypeError: + pass + elif isinstance(dtype, ExtensionDtype): + return dtype + + return np.dtype(dtype) diff --git a/pandas/types/concat.py b/pandas/types/concat.py index 44338f26eb2e8..3b30531fb30ac 100644 --- a/pandas/types/concat.py +++ b/pandas/types/concat.py @@ -3,10 +3,19 @@ """ import numpy as np -import pandas.core.common as com import pandas.tslib as tslib from pandas import compat from pandas.compat import map +from .common import (is_categorical_dtype, + is_sparse, + is_datetimetz, + is_datetime64_dtype, + is_timedelta64_dtype, + is_object_dtype, + is_bool_dtype, + is_dtype_equal, + _NS_DTYPE, + _TD_DTYPE) def get_dtype_kinds(l): @@ -24,19 +33,19 @@ def get_dtype_kinds(l): for arr in l: dtype = arr.dtype - if com.is_categorical_dtype(dtype): + if is_categorical_dtype(dtype): typ = 'category' - elif com.is_sparse(arr): + elif is_sparse(arr): typ = 'sparse' - elif com.is_datetimetz(arr): + elif is_datetimetz(arr): typ = 'datetimetz' - elif com.is_datetime64_dtype(dtype): + elif is_datetime64_dtype(dtype): typ = 'datetime' - elif com.is_timedelta64_dtype(dtype): + elif is_timedelta64_dtype(dtype): typ = 'timedelta' - elif com.is_object_dtype(dtype): + elif is_object_dtype(dtype): typ = 'object' - elif com.is_bool_dtype(dtype): + elif is_bool_dtype(dtype): typ = 'bool' else: typ = dtype.kind @@ -51,14 +60,14 @@ def _get_series_result_type(result): """ if isinstance(result, dict): # concat Series with axis 1 - if all(com.is_sparse(c) for c in compat.itervalues(result)): + if all(is_sparse(c) for c in compat.itervalues(result)): from pandas.sparse.api import SparseDataFrame return SparseDataFrame else: from pandas.core.frame import DataFrame return DataFrame - elif com.is_sparse(result): + elif is_sparse(result): # concat Series with axis 1 from pandas.sparse.api import SparseSeries return SparseSeries @@ -165,7 +174,7 @@ def _concat_categorical(to_concat, axis=0): def convert_categorical(x): # coerce to object dtype - if com.is_categorical_dtype(x.dtype): + if is_categorical_dtype(x.dtype): return x.get_values() return x.ravel() @@ -177,7 +186,7 @@ def convert_categorical(x): # we could have object blocks and categoricals here # if we only have a single categoricals then combine everything # else its a non-compat categorical - categoricals = [x for x in to_concat if com.is_categorical_dtype(x.dtype)] + categoricals = [x for x in to_concat if is_categorical_dtype(x.dtype)] # validate the categories categories = categoricals[0] @@ -235,7 +244,7 @@ def union_categoricals(to_union): if any(c.ordered for c in to_union): raise TypeError("Can only combine unordered Categoricals") - if not all(com.is_dtype_equal(c.categories.dtype, first.categories.dtype) + if not all(is_dtype_equal(c.categories.dtype, first.categories.dtype) for c in to_union): raise TypeError("dtype of categories must be the same") @@ -272,7 +281,7 @@ def convert_to_pydatetime(x, axis): # coerce to an object dtype # if dtype is of datetimetz or timezone - if x.dtype.kind == com._NS_DTYPE.kind: + if x.dtype.kind == _NS_DTYPE.kind: if getattr(x, 'tz', None) is not None: x = x.asobject.values else: @@ -280,7 +289,7 @@ def convert_to_pydatetime(x, axis): x = tslib.ints_to_pydatetime(x.view(np.int64).ravel()) x = x.reshape(shape) - elif x.dtype == com._TD_DTYPE: + elif x.dtype == _TD_DTYPE: shape = x.shape x = tslib.ints_to_pytimedelta(x.view(np.int64).ravel()) x = x.reshape(shape) @@ -310,12 +319,12 @@ def convert_to_pydatetime(x, axis): elif 'datetime' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) - return new_values.view(com._NS_DTYPE) + return new_values.view(_NS_DTYPE) elif 'timedelta' in typs: new_values = np.concatenate([x.view(np.int64) for x in to_concat], axis=axis) - return new_values.view(com._TD_DTYPE) + return new_values.view(_TD_DTYPE) # need to coerce to object to_concat = [convert_to_pydatetime(x, axis) for x in to_concat] @@ -350,7 +359,7 @@ def convert_sparse(x, axis): return x if typs is None: - typs = com.get_dtype_kinds(to_concat) + typs = get_dtype_kinds(to_concat) if len(typs) == 1: # concat input as it is if all inputs are sparse @@ -374,7 +383,7 @@ def convert_sparse(x, axis): # input may be sparse / dense mixed and may have different fill_value # input must contain sparse at least 1 - sparses = [c for c in to_concat if com.is_sparse(c)] + sparses = [c for c in to_concat if is_sparse(c)] fill_values = [c.fill_value for c in sparses] sp_indexes = [c.sp_index for c in sparses] diff --git a/pandas/types/inference.py b/pandas/types/inference.py new file mode 100644 index 0000000000000..35a2dc2fb831b --- /dev/null +++ b/pandas/types/inference.py @@ -0,0 +1,104 @@ +""" basic inference routines """ + +import collections +import re +import numpy as np +from numbers import Number +from pandas.compat import (string_types, text_type, + string_and_binary_types) +from pandas import lib + +is_bool = lib.is_bool + +is_integer = lib.is_integer + +is_float = lib.is_float + +is_complex = lib.is_complex + +is_scalar = lib.isscalar + + +def is_number(obj): + return isinstance(obj, (Number, np.number)) + + +def is_string_like(obj): + return isinstance(obj, (text_type, string_types)) + + +def _iterable_not_string(x): + return (isinstance(x, collections.Iterable) and + not isinstance(x, string_types)) + + +def is_iterator(obj): + # python 3 generators have __next__ instead of next + return hasattr(obj, 'next') or hasattr(obj, '__next__') + + +def is_re(obj): + return isinstance(obj, re._pattern_type) + + +def is_re_compilable(obj): + try: + re.compile(obj) + except TypeError: + return False + else: + return True + + +def is_list_like(arg): + return (hasattr(arg, '__iter__') and + not isinstance(arg, string_and_binary_types)) + + +def is_dict_like(arg): + return hasattr(arg, '__getitem__') and hasattr(arg, 'keys') + + +def is_named_tuple(arg): + return isinstance(arg, tuple) and hasattr(arg, '_fields') + + +def is_hashable(arg): + """Return True if hash(arg) will succeed, False otherwise. + + Some types will pass a test against collections.Hashable but fail when they + are actually hashed with hash(). + + Distinguish between these and other types by trying the call to hash() and + seeing if they raise TypeError. + + Examples + -------- + >>> a = ([],) + >>> isinstance(a, collections.Hashable) + True + >>> is_hashable(a) + False + """ + # unfortunately, we can't use isinstance(arg, collections.Hashable), which + # can be faster than calling hash, because numpy scalars on Python 3 fail + # this test + + # reconsider this decision once this numpy bug is fixed: + # https://github.com/numpy/numpy/issues/5562 + + try: + hash(arg) + except TypeError: + return False + else: + return True + + +def is_sequence(x): + try: + iter(x) + len(x) # it has a length + return not isinstance(x, string_and_binary_types) + except (TypeError, AttributeError): + return False diff --git a/pandas/types/missing.py b/pandas/types/missing.py new file mode 100644 index 0000000000000..8b4193d02beb7 --- /dev/null +++ b/pandas/types/missing.py @@ -0,0 +1,394 @@ +""" +missing types & inference +""" +import numpy as np +from pandas import lib +from pandas.tslib import NaT, iNaT +from .generic import (ABCMultiIndex, ABCSeries, + ABCIndexClass, ABCGeneric) +from .common import (is_string_dtype, is_datetimelike, + is_datetimelike_v_numeric, is_float_dtype, + is_datetime64_dtype, is_datetime64tz_dtype, + is_timedelta64_dtype, + is_complex_dtype, is_categorical_dtype, + is_string_like_dtype, is_bool_dtype, + is_integer_dtype, is_dtype_equal, + needs_i8_conversion, _ensure_object, + pandas_dtype, + is_scalar, + is_object_dtype, + is_integer, + _TD_DTYPE, + _NS_DTYPE, + _DATELIKE_DTYPES) +from .inference import is_list_like + + +def isnull(obj): + """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) + + Parameters + ---------- + arr : ndarray or object value + Object to check for null-ness + + Returns + ------- + isnulled : array-like of bool or bool + Array or bool indicating whether an object is null or if an array is + given which of the element is null. + + See also + -------- + pandas.notnull: boolean inverse of pandas.isnull + """ + return _isnull(obj) + + +def _isnull_new(obj): + if is_scalar(obj): + return lib.checknull(obj) + # hack (for now) because MI registers as ndarray + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isnull is not defined for MultiIndex") + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + return _isnull_ndarraylike(obj) + elif isinstance(obj, ABCGeneric): + return obj._constructor(obj._data.isnull(func=isnull)) + elif isinstance(obj, list) or hasattr(obj, '__array__'): + return _isnull_ndarraylike(np.asarray(obj)) + else: + return obj is None + + +def _isnull_old(obj): + """Detect missing values. Treat None, NaN, INF, -INF as null. + + Parameters + ---------- + arr: ndarray or object value + + Returns + ------- + boolean ndarray or boolean + """ + if is_scalar(obj): + return lib.checknull_old(obj) + # hack (for now) because MI registers as ndarray + elif isinstance(obj, ABCMultiIndex): + raise NotImplementedError("isnull is not defined for MultiIndex") + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + return _isnull_ndarraylike_old(obj) + elif isinstance(obj, ABCGeneric): + return obj._constructor(obj._data.isnull(func=_isnull_old)) + elif isinstance(obj, list) or hasattr(obj, '__array__'): + return _isnull_ndarraylike_old(np.asarray(obj)) + else: + return obj is None + + +_isnull = _isnull_new + + +def _use_inf_as_null(key): + """Option change callback for null/inf behaviour + Choose which replacement for numpy.isnan / -numpy.isfinite is used. + + Parameters + ---------- + flag: bool + True means treat None, NaN, INF, -INF as null (old way), + False means None and NaN are null, but INF, -INF are not null + (new way). + + Notes + ----- + This approach to setting global module values is discussed and + approved here: + + * http://stackoverflow.com/questions/4859217/ + programmatically-creating-variables-in-python/4859312#4859312 + """ + from pandas.core.config import get_option + flag = get_option(key) + if flag: + globals()['_isnull'] = _isnull_old + else: + globals()['_isnull'] = _isnull_new + + +def _isnull_ndarraylike(obj): + + values = getattr(obj, 'values', obj) + dtype = values.dtype + + if is_string_dtype(dtype): + if is_categorical_dtype(values): + from pandas import Categorical + if not isinstance(values, Categorical): + values = values.values + result = values.isnull() + else: + + # Working around NumPy ticket 1542 + shape = values.shape + + if is_string_like_dtype(dtype): + result = np.zeros(values.shape, dtype=bool) + else: + result = np.empty(shape, dtype=bool) + vec = lib.isnullobj(values.ravel()) + result[...] = vec.reshape(shape) + + elif is_datetimelike(obj): + # this is the NaT pattern + result = values.view('i8') == iNaT + else: + result = np.isnan(values) + + # box + if isinstance(obj, ABCSeries): + from pandas import Series + result = Series(result, index=obj.index, name=obj.name, copy=False) + + return result + + +def _isnull_ndarraylike_old(obj): + values = getattr(obj, 'values', obj) + dtype = values.dtype + + if is_string_dtype(dtype): + # Working around NumPy ticket 1542 + shape = values.shape + + if is_string_like_dtype(dtype): + result = np.zeros(values.shape, dtype=bool) + else: + result = np.empty(shape, dtype=bool) + vec = lib.isnullobj_old(values.ravel()) + result[:] = vec.reshape(shape) + + elif dtype in _DATELIKE_DTYPES: + # this is the NaT pattern + result = values.view('i8') == iNaT + else: + result = ~np.isfinite(values) + + # box + if isinstance(obj, ABCSeries): + from pandas import Series + result = Series(result, index=obj.index, name=obj.name, copy=False) + + return result + + +def notnull(obj): + """Replacement for numpy.isfinite / -numpy.isnan which is suitable for use + on object arrays. + + Parameters + ---------- + arr : ndarray or object value + Object to check for *not*-null-ness + + Returns + ------- + isnulled : array-like of bool or bool + Array or bool indicating whether an object is *not* null or if an array + is given which of the element is *not* null. + + See also + -------- + pandas.isnull : boolean inverse of pandas.notnull + """ + res = isnull(obj) + if is_scalar(res): + return not res + return ~res + + +def is_null_datelike_scalar(other): + """ test whether the object is a null datelike, e.g. Nat + but guard against passing a non-scalar """ + if other is NaT or other is None: + return True + elif is_scalar(other): + + # a timedelta + if hasattr(other, 'dtype'): + return other.view('i8') == iNaT + elif is_integer(other) and other == iNaT: + return True + return isnull(other) + return False + + +def _is_na_compat(arr, fill_value=np.nan): + """ + Parameters + ---------- + arr: a numpy array + fill_value: fill value, default to np.nan + + Returns + ------- + True if we can fill using this fill_value + """ + dtype = arr.dtype + if isnull(fill_value): + return not (is_bool_dtype(dtype) or + is_integer_dtype(dtype)) + return True + + +def array_equivalent(left, right, strict_nan=False): + """ + True if two arrays, left and right, have equal non-NaN elements, and NaNs + in corresponding locations. False otherwise. It is assumed that left and + right are NumPy arrays of the same dtype. The behavior of this function + (particularly with respect to NaNs) is not defined if the dtypes are + different. + + Parameters + ---------- + left, right : ndarrays + strict_nan : bool, default False + If True, consider NaN and None to be different. + + Returns + ------- + b : bool + Returns True if the arrays are equivalent. + + Examples + -------- + >>> array_equivalent( + ... np.array([1, 2, np.nan]), + ... np.array([1, 2, np.nan])) + True + >>> array_equivalent( + ... np.array([1, np.nan, 2]), + ... np.array([1, 2, np.nan])) + False + """ + + left, right = np.asarray(left), np.asarray(right) + + # shape compat + if left.shape != right.shape: + return False + + # Object arrays can contain None, NaN and NaT. + # string dtypes must be come to this path for NumPy 1.7.1 compat + if is_string_dtype(left) or is_string_dtype(right): + + if not strict_nan: + # isnull considers NaN and None to be equivalent. + return lib.array_equivalent_object( + _ensure_object(left.ravel()), _ensure_object(right.ravel())) + + for left_value, right_value in zip(left, right): + if left_value is NaT and right_value is not NaT: + return False + + elif isinstance(left_value, float) and np.isnan(left_value): + if (not isinstance(right_value, float) or + not np.isnan(right_value)): + return False + else: + if left_value != right_value: + return False + return True + + # NaNs can occur in float and complex arrays. + if is_float_dtype(left) or is_complex_dtype(left): + return ((left == right) | (np.isnan(left) & np.isnan(right))).all() + + # numpy will will not allow this type of datetimelike vs integer comparison + elif is_datetimelike_v_numeric(left, right): + return False + + # M8/m8 + elif needs_i8_conversion(left) and needs_i8_conversion(right): + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.view('i8') + right = right.view('i8') + + # NaNs cannot occur otherwise. + try: + return np.array_equal(left, right) + except AttributeError: + # see gh-13388 + # + # NumPy v1.7.1 has a bug in its array_equal + # function that prevents it from correctly + # comparing two arrays with complex dtypes. + # This bug is corrected in v1.8.0, so remove + # this try-except block as soon as we stop + # supporting NumPy versions < 1.8.0 + if not is_dtype_equal(left.dtype, right.dtype): + return False + + left = left.tolist() + right = right.tolist() + + return left == right + + +def _infer_fill_value(val): + """ + infer the fill value for the nan/NaT from the provided + scalar/ndarray/list-like if we are a NaT, return the correct dtyped + element to provide proper block construction + """ + + if not is_list_like(val): + val = [val] + val = np.array(val, copy=False) + if is_datetimelike(val): + return np.array('NaT', dtype=val.dtype) + elif is_object_dtype(val.dtype): + dtype = lib.infer_dtype(_ensure_object(val)) + if dtype in ['datetime', 'datetime64']: + return np.array('NaT', dtype=_NS_DTYPE) + elif dtype in ['timedelta', 'timedelta64']: + return np.array('NaT', dtype=_TD_DTYPE) + return np.nan + + +def _maybe_fill(arr, fill_value=np.nan): + """ + if we have a compatiable fill_value and arr dtype, then fill + """ + if _is_na_compat(arr, fill_value): + arr.fill(fill_value) + return arr + + +def na_value_for_dtype(dtype): + """ + Return a dtype compat na value + + Parameters + ---------- + dtype : string / dtype + + Returns + ------- + np.dtype or a pandas dtype + """ + dtype = pandas_dtype(dtype) + + if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or + is_timedelta64_dtype(dtype)): + return NaT + elif is_float_dtype(dtype): + return np.nan + elif is_integer_dtype(dtype): + return 0 + elif is_bool_dtype(dtype): + return False + return np.nan diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2961b2fb2241f..4442eed898b60 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -23,11 +23,14 @@ import numpy as np import pandas as pd -from pandas.core.common import (is_sequence, array_equivalent, - is_list_like, is_datetimelike_v_numeric, - is_datetimelike_v_object, - is_number, is_bool, - needs_i8_conversion, is_categorical_dtype) +from pandas.types.missing import array_equivalent +from pandas.types.common import (is_datetimelike_v_numeric, + is_datetimelike_v_object, + is_number, is_bool, + needs_i8_conversion, + is_categorical_dtype, + is_sequence, + is_list_like) from pandas.formats.printing import pprint_thing from pandas.core.algorithms import take_1d @@ -1001,17 +1004,20 @@ def assert_categorical_equal(left, right, check_dtype=True, assert_attr_equal('ordered', left, right, obj=obj) -def raise_assert_detail(obj, message, left, right): +def raise_assert_detail(obj, message, left, right, diff=None): if isinstance(left, np.ndarray): left = pprint_thing(left) if isinstance(right, np.ndarray): right = pprint_thing(right) + if diff is not None: + diff = "\n[diff]: {diff}".format(diff=diff) + msg = """{0} are different {1} [left]: {2} -[right]: {3}""".format(obj, message, left, right) +[right]: {3}{4}""".format(obj, message, left, right, diff) raise AssertionError(msg) diff --git a/pandas/util/validators.py b/pandas/util/validators.py index bbfd24df9c13e..964fa9d9b38d5 100644 --- a/pandas/util/validators.py +++ b/pandas/util/validators.py @@ -3,6 +3,8 @@ for validating data or function arguments """ +from pandas.types.common import is_bool + def _check_arg_length(fname, args, max_fname_arg_count, compat_args): """ @@ -35,8 +37,6 @@ def _check_for_default_values(fname, arg_val_dict, compat_args): checked that arg_val_dict.keys() is a subset of compat_args """ - from pandas.core.common import is_bool - for key in arg_val_dict: # try checking equality directly with '=' operator, # as comparison may have been overriden for the left From 20de2661c8eff66e465248cbe28062eae0e0e3bb Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 13 Jul 2016 10:38:09 -0400 Subject: [PATCH 26/44] BLD: included pandas.api.* in setup.py (#13640) --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index 8f8865ecc3b7a..650357588570a 100755 --- a/setup.py +++ b/setup.py @@ -547,6 +547,9 @@ def pxd(name): maintainer=AUTHOR, version=versioneer.get_version(), packages=['pandas', + 'pandas.api', + 'pandas.api.tests', + 'pandas.api.types', 'pandas.compat', 'pandas.compat.numpy', 'pandas.computation', From 44f3229709d40241917267f4cfa7b28f9a92678b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Jul 2016 09:12:52 +0200 Subject: [PATCH 27/44] DOC/BLD: pin IPython version to 4.2.0 (#13639) (#13647) --- ci/requirements-2.7_DOC_BUILD.run | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.7_DOC_BUILD.run b/ci/requirements-2.7_DOC_BUILD.run index a07721c75cf34..cde0719aa027e 100644 --- a/ci/requirements-2.7_DOC_BUILD.run +++ b/ci/requirements-2.7_DOC_BUILD.run @@ -1,4 +1,4 @@ -ipython=4 +ipython=4.2.0 ipykernel sphinx nbconvert From 6f0a020e0929d53b2341f58f970806c85facef91 Mon Sep 17 00:00:00 2001 From: Sinhrks Date: Thu, 14 Jul 2016 17:15:23 +0900 Subject: [PATCH 28/44] TST: reorganize tools.tests (#13619) --- pandas/tools/tests/test_concat.py | 432 +++++++++----- pandas/tools/tests/test_join.py | 787 ++++++++++++++++++++++++++ pandas/tools/tests/test_merge.py | 900 +----------------------------- 3 files changed, 1082 insertions(+), 1037 deletions(-) create mode 100644 pandas/tools/tests/test_join.py diff --git a/pandas/tools/tests/test_concat.py b/pandas/tools/tests/test_concat.py index a8c86657a48cc..568cf63c02e30 100644 --- a/pandas/tools/tests/test_concat.py +++ b/pandas/tools/tests/test_concat.py @@ -17,7 +17,7 @@ assert_almost_equal) -class TestConcatenate(tm.TestCase): +class ConcatenateBase(tm.TestCase): _multiprocess_can_split_ = True @@ -26,6 +26,9 @@ def setUp(self): self.mixed_frame = self.frame.copy() self.mixed_frame['foo'] = 'bar' + +class TestAppend(ConcatenateBase): + def test_append(self): begin_index = self.frame.index[:5] end_index = self.frame.index[5:] @@ -142,42 +145,32 @@ def test_append_preserve_index_name(self): result = df1.append(df2) self.assertEqual(result.index.name, 'A') - def test_join_many(self): - df = DataFrame(np.random.randn(10, 6), columns=list('abcdef')) - df_list = [df[['a', 'b']], df[['c', 'd']], df[['e', 'f']]] - - joined = df_list[0].join(df_list[1:]) - tm.assert_frame_equal(joined, df) - - df_list = [df[['a', 'b']][:-2], - df[['c', 'd']][2:], df[['e', 'f']][1:9]] - - def _check_diff_index(df_list, result, exp_index): - reindexed = [x.reindex(exp_index) for x in df_list] - expected = reindexed[0].join(reindexed[1:]) - tm.assert_frame_equal(result, expected) - - # different join types - joined = df_list[0].join(df_list[1:], how='outer') - _check_diff_index(df_list, joined, df.index) - - joined = df_list[0].join(df_list[1:]) - _check_diff_index(df_list, joined, df_list[0].index) - - joined = df_list[0].join(df_list[1:], how='inner') - _check_diff_index(df_list, joined, df.index[2:8]) - - self.assertRaises(ValueError, df_list[0].join, df_list[1:], on='a') - - def test_join_many_mixed(self): - df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) - df['key'] = ['foo', 'bar'] * 4 - df1 = df.ix[:, ['A', 'B']] - df2 = df.ix[:, ['C', 'D']] - df3 = df.ix[:, ['key']] - - result = df1.join([df2, df3]) - assert_frame_equal(result, df) + def test_append_dtype_coerce(self): + + # GH 4993 + # appending with datetime will incorrectly convert datetime64 + import datetime as dt + from pandas import NaT + + df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0), + dt.datetime(2013, 1, 2, 0, 0)], + columns=['start_time']) + df2 = DataFrame(index=[4, 5], data=[[dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 3, 6, 10)], + [dt.datetime(2013, 1, 4, 0, 0), + dt.datetime(2013, 1, 4, 7, 10)]], + columns=['start_time', 'end_time']) + + expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10), + dt.datetime(2013, 1, 4, 7, 10)], + name='end_time'), + Series([dt.datetime(2013, 1, 1, 0, 0), + dt.datetime(2013, 1, 2, 0, 0), + dt.datetime(2013, 1, 3, 0, 0), + dt.datetime(2013, 1, 4, 0, 0)], + name='start_time')], axis=1) + result = df1.append(df2, ignore_index=True) + assert_frame_equal(result, expected) def test_append_missing_column_proper_upcast(self): df1 = DataFrame({'A': np.array([1, 2, 3, 4], dtype='i8')}) @@ -188,6 +181,9 @@ def test_append_missing_column_proper_upcast(self): self.assertEqual(appended['A'].dtype, 'f8') self.assertEqual(appended['B'].dtype, 'O') + +class TestConcatenate(ConcatenateBase): + def test_concat_copy(self): df = DataFrame(np.random.randn(4, 3)) @@ -524,35 +520,6 @@ def test_with_mixed_tuples(self): # it works concat([df1, df2]) - def test_join_dups(self): - - # joining dups - df = concat([DataFrame(np.random.randn(10, 4), - columns=['A', 'A', 'B', 'B']), - DataFrame(np.random.randint(0, 10, size=20) - .reshape(10, 2), - columns=['A', 'C'])], - axis=1) - - expected = concat([df, df], axis=1) - result = df.join(df, rsuffix='_2') - result.columns = expected.columns - assert_frame_equal(result, expected) - - # GH 4975, invalid join on dups - w = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - x = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - y = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - z = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) - - dta = x.merge(y, left_index=True, right_index=True).merge( - z, left_index=True, right_index=True, how="outer") - dta = dta.merge(w, left_index=True, right_index=True) - expected = concat([x, y, z, w], axis=1) - expected.columns = ['x_x', 'y_x', 'x_y', - 'y_y', 'x_x', 'y_x', 'x_y', 'y_y'] - assert_frame_equal(dta, expected) - def test_handle_empty_objects(self): df = DataFrame(np.random.randn(10, 4), columns=list('abcd')) @@ -649,86 +616,40 @@ def test_concat_mixed_objs(self): panel = tm.makePanel() self.assertRaises(ValueError, lambda: concat([panel, s1], axis=1)) - def test_panel_join(self): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.ix[:2, :10, :3] - p2 = panel.ix[2:, 5:, 2:] - - # left join - result = p1.join(p2) - expected = p1.copy() - expected['ItemC'] = p2['ItemC'] - tm.assert_panel_equal(result, expected) - - # right join - result = p1.join(p2, how='right') - expected = p2.copy() - expected['ItemA'] = p1['ItemA'] - expected['ItemB'] = p1['ItemB'] - expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) - tm.assert_panel_equal(result, expected) - - # inner join - result = p1.join(p2, how='inner') - expected = panel.ix[:, 5:10, 2:3] - tm.assert_panel_equal(result, expected) - - # outer join - result = p1.join(p2, how='outer') - expected = p1.reindex(major=panel.major_axis, - minor=panel.minor_axis) - expected = expected.join(p2.reindex(major=panel.major_axis, - minor=panel.minor_axis)) - tm.assert_panel_equal(result, expected) - - def test_panel_join_overlap(self): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.ix[['ItemA', 'ItemB', 'ItemC']] - p2 = panel.ix[['ItemB', 'ItemC']] - - # Expected index is - # - # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 - joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') - p1_suf = p1.ix[['ItemB', 'ItemC']].add_suffix('_p1') - p2_suf = p2.ix[['ItemB', 'ItemC']].add_suffix('_p2') - no_overlap = panel.ix[['ItemA']] - expected = no_overlap.join(p1_suf.join(p2_suf)) - tm.assert_panel_equal(joined, expected) - - def test_panel_join_many(self): - tm.K = 10 - panel = tm.makePanel() - tm.K = 4 + def test_empty_dtype_coerce(self): - panels = [panel.ix[:2], panel.ix[2:6], panel.ix[6:]] + # xref to #12411 + # xref to #12045 + # xref to #11594 + # see below - joined = panels[0].join(panels[1:]) - tm.assert_panel_equal(joined, panel) + # 10571 + df1 = DataFrame(data=[[1, None], [2, None]], columns=['a', 'b']) + df2 = DataFrame(data=[[3, None], [4, None]], columns=['a', 'b']) + result = concat([df1, df2]) + expected = df1.dtypes + tm.assert_series_equal(result.dtypes, expected) - panels = [panel.ix[:2, :-5], panel.ix[2:6, 2:], panel.ix[6:, 5:-7]] + def test_dtype_coerceion(self): - data_dict = {} - for p in panels: - data_dict.update(p.iteritems()) + # 12411 + df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), + pd.NaT]}) - joined = panels[0].join(panels[1:], how='inner') - expected = Panel.from_dict(data_dict, intersect=True) - tm.assert_panel_equal(joined, expected) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) - joined = panels[0].join(panels[1:], how='outer') - expected = Panel.from_dict(data_dict, intersect=False) - tm.assert_panel_equal(joined, expected) + # 12045 + import datetime + df = DataFrame({'date': [datetime.datetime(2012, 1, 1), + datetime.datetime(1012, 1, 2)]}) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) - # edge cases - self.assertRaises(ValueError, panels[0].join, panels[1:], - how='outer', lsuffix='foo', rsuffix='bar') - self.assertRaises(ValueError, panels[0].join, panels[1:], - how='right') + # 11594 + df = DataFrame({'text': ['some words'] + [None] * 9}) + result = concat([df.iloc[[0]], df.iloc[[1]]]) + tm.assert_series_equal(result.dtypes, df.dtypes) def test_panel_concat_other_axes(self): panel = tm.makePanel() @@ -1080,6 +1001,239 @@ def test_concat_invalid_first_argument(self): expected = read_csv(StringIO(data)) assert_frame_equal(result, expected) + def test_concat_NaT_series(self): + # GH 11693 + # test for merging NaT series with datetime series. + x = Series(date_range('20151124 08:00', '20151124 09:00', + freq='1h', tz='US/Eastern')) + y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') + expected = Series([x[0], x[1], pd.NaT, pd.NaT]) + + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # all NaT with tz + expected = Series(pd.NaT, index=range(4), + dtype='datetime64[ns, US/Eastern]') + result = pd.concat([y, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # without tz + x = pd.Series(pd.date_range('20151124 08:00', + '20151124 09:00', freq='1h')) + y = pd.Series(pd.date_range('20151124 10:00', + '20151124 11:00', freq='1h')) + y[:] = pd.NaT + expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT]) + result = pd.concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # all NaT without tz + x[:] = pd.NaT + expected = pd.Series(pd.NaT, index=range(4), + dtype='datetime64[ns]') + result = pd.concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + def test_concat_tz_frame(self): + df2 = DataFrame(dict(A=pd.Timestamp('20130102', tz='US/Eastern'), + B=pd.Timestamp('20130603', tz='CET')), + index=range(5)) + + # concat + df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) + assert_frame_equal(df2, df3) + + def test_concat_tz_series(self): + # GH 11755 + # tz and no tz + x = Series(date_range('20151124 08:00', + '20151124 09:00', + freq='1h', tz='UTC')) + y = Series(date_range('2012-01-01', '2012-01-02')) + expected = Series([x[0], x[1], y[0], y[1]], + dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # GH 11887 + # concat tz and object + x = Series(date_range('20151124 08:00', + '20151124 09:00', + freq='1h', tz='UTC')) + y = Series(['a', 'b']) + expected = Series([x[0], x[1], y[0], y[1]], + dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + + # 12217 + # 12306 fixed I think + + # Concat'ing two UTC times + first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize('UTC') + + second = pd.DataFrame([[datetime(2016, 1, 2)]]) + second[0] = second[0].dt.tz_localize('UTC') + + result = pd.concat([first, second]) + self.assertEqual(result[0].dtype, 'datetime64[ns, UTC]') + + # Concat'ing two London times + first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize('Europe/London') + + second = pd.DataFrame([[datetime(2016, 1, 2)]]) + second[0] = second[0].dt.tz_localize('Europe/London') + + result = pd.concat([first, second]) + self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + + # Concat'ing 2+1 London times + first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) + first[0] = first[0].dt.tz_localize('Europe/London') + + second = pd.DataFrame([[datetime(2016, 1, 3)]]) + second[0] = second[0].dt.tz_localize('Europe/London') + + result = pd.concat([first, second]) + self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + + # Concat'ing 1+2 London times + first = pd.DataFrame([[datetime(2016, 1, 1)]]) + first[0] = first[0].dt.tz_localize('Europe/London') + + second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]]) + second[0] = second[0].dt.tz_localize('Europe/London') + + result = pd.concat([first, second]) + self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') + + def test_concat_tz_series_with_datetimelike(self): + # GH 12620 + # tz and timedelta + x = [pd.Timestamp('2011-01-01', tz='US/Eastern'), + pd.Timestamp('2011-02-01', tz='US/Eastern')] + y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) + + # tz and period + y = [pd.Period('2011-03', freq='M'), pd.Period('2011-04', freq='M')] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) + + def test_concat_tz_series_tzlocal(self): + # GH 13583 + tm._skip_if_no_dateutil() + import dateutil + x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()), + pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())] + y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()), + pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())] + result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) + tm.assert_series_equal(result, pd.Series(x + y)) + self.assertEqual(result.dtype, 'datetime64[ns, tzlocal()]') + + def test_concat_period_series(self): + x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) + y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D')) + expected = Series([x[0], x[1], y[0], y[1]], dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, 'object') + + # different freq + x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) + y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M')) + expected = Series([x[0], x[1], y[0], y[1]], dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, 'object') + + x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) + y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) + expected = Series([x[0], x[1], y[0], y[1]], dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, 'object') + + # non-period + x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) + y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01'])) + expected = Series([x[0], x[1], y[0], y[1]], dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, 'object') + + x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) + y = Series(['A', 'B']) + expected = Series([x[0], x[1], y[0], y[1]], dtype='object') + result = concat([x, y], ignore_index=True) + tm.assert_series_equal(result, expected) + self.assertEqual(result.dtype, 'object') + + def test_concat_empty_series(self): + # GH 11082 + s1 = pd.Series([1, 2, 3], name='x') + s2 = pd.Series(name='y') + res = pd.concat([s1, s2], axis=1) + exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]}) + tm.assert_frame_equal(res, exp) + + s1 = pd.Series([1, 2, 3], name='x') + s2 = pd.Series(name='y') + res = pd.concat([s1, s2], axis=0) + # name will be reset + exp = pd.Series([1, 2, 3]) + tm.assert_series_equal(res, exp) + + # empty Series with no name + s1 = pd.Series([1, 2, 3], name='x') + s2 = pd.Series(name=None) + res = pd.concat([s1, s2], axis=1) + exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, + columns=['x', 0]) + tm.assert_frame_equal(res, exp) + + def test_default_index(self): + # is_series and ignore_index + s1 = pd.Series([1, 2, 3], name='x') + s2 = pd.Series([4, 5, 6], name='y') + res = pd.concat([s1, s2], axis=1, ignore_index=True) + self.assertIsInstance(res.columns, pd.RangeIndex) + exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) + # use check_index_type=True to check the result have + # RangeIndex (default index) + tm.assert_frame_equal(res, exp, check_index_type=True, + check_column_type=True) + + # is_series and all inputs have no names + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([4, 5, 6]) + res = pd.concat([s1, s2], axis=1, ignore_index=False) + self.assertIsInstance(res.columns, pd.RangeIndex) + exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) + exp.columns = pd.RangeIndex(2) + tm.assert_frame_equal(res, exp, check_index_type=True, + check_column_type=True) + + # is_dataframe and ignore_index + df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]}) + df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]}) + + res = pd.concat([df1, df2], axis=0, ignore_index=True) + exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], + columns=['A', 'B']) + tm.assert_frame_equal(res, exp, check_index_type=True, + check_column_type=True) + + res = pd.concat([df1, df2], axis=1, ignore_index=True) + exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) + tm.assert_frame_equal(res, exp, check_index_type=True, + check_column_type=True) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tools/tests/test_join.py b/pandas/tools/tests/test_join.py new file mode 100644 index 0000000000000..86aee0b4a01c9 --- /dev/null +++ b/pandas/tools/tests/test_join.py @@ -0,0 +1,787 @@ +# pylint: disable=E1103 + +import nose + +from numpy.random import randn +import numpy as np + +import pandas as pd +from pandas.compat import lrange +import pandas.compat as compat +from pandas.tools.merge import merge, concat +from pandas.util.testing import assert_frame_equal +from pandas import DataFrame, MultiIndex, Series + +import pandas.algos as algos +import pandas.util.testing as tm +from pandas.tools.tests.test_merge import get_test_data, N, NGROUPS + + +a_ = np.array + + +class TestJoin(tm.TestCase): + + _multiprocess_can_split_ = True + + def setUp(self): + # aggregate multiple columns + self.df = DataFrame({'key1': get_test_data(), + 'key2': get_test_data(), + 'data1': np.random.randn(N), + 'data2': np.random.randn(N)}) + + # exclude a couple keys for fun + self.df = self.df[self.df['key2'] > 1] + + self.df2 = DataFrame({'key1': get_test_data(n=N // 5), + 'key2': get_test_data(ngroups=NGROUPS // 2, + n=N // 5), + 'value': np.random.randn(N // 5)}) + + index, data = tm.getMixedTypeDict() + self.target = DataFrame(data, index=index) + + # Join on string value + self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']}, + index=data['C']) + + def test_cython_left_outer_join(self): + left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) + right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) + max_group = 5 + + ls, rs = algos.left_outer_join(left, right, max_group) + + exp_ls = left.argsort(kind='mergesort') + exp_rs = right.argsort(kind='mergesort') + + exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 6, 6, 7, 7, 8, 8, 9, 10]) + exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, + 4, 5, 4, 5, 4, 5, -1, -1]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_cython_right_outer_join(self): + left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) + right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) + max_group = 5 + + rs, ls = algos.left_outer_join(right, left, max_group) + + exp_ls = left.argsort(kind='mergesort') + exp_rs = right.argsort(kind='mergesort') + + # 0 1 1 1 + exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, + # 2 2 4 + 6, 7, 8, 6, 7, 8, -1]) + exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, + 4, 4, 4, 5, 5, 5, 6]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_cython_inner_join(self): + left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) + right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) + max_group = 5 + + ls, rs = algos.inner_join(left, right, max_group) + + exp_ls = left.argsort(kind='mergesort') + exp_rs = right.argsort(kind='mergesort') + + exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, + 6, 6, 7, 7, 8, 8]) + exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, + 4, 5, 4, 5, 4, 5]) + + exp_ls = exp_ls.take(exp_li) + exp_ls[exp_li == -1] = -1 + + exp_rs = exp_rs.take(exp_ri) + exp_rs[exp_ri == -1] = -1 + + self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) + self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) + + def test_left_outer_join(self): + joined_key2 = merge(self.df, self.df2, on='key2') + _check_join(self.df, self.df2, joined_key2, ['key2'], how='left') + + joined_both = merge(self.df, self.df2) + _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], + how='left') + + def test_right_outer_join(self): + joined_key2 = merge(self.df, self.df2, on='key2', how='right') + _check_join(self.df, self.df2, joined_key2, ['key2'], how='right') + + joined_both = merge(self.df, self.df2, how='right') + _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], + how='right') + + def test_full_outer_join(self): + joined_key2 = merge(self.df, self.df2, on='key2', how='outer') + _check_join(self.df, self.df2, joined_key2, ['key2'], how='outer') + + joined_both = merge(self.df, self.df2, how='outer') + _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], + how='outer') + + def test_inner_join(self): + joined_key2 = merge(self.df, self.df2, on='key2', how='inner') + _check_join(self.df, self.df2, joined_key2, ['key2'], how='inner') + + joined_both = merge(self.df, self.df2, how='inner') + _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], + how='inner') + + def test_handle_overlap(self): + joined = merge(self.df, self.df2, on='key2', + suffixes=['.foo', '.bar']) + + self.assertIn('key1.foo', joined) + self.assertIn('key1.bar', joined) + + def test_handle_overlap_arbitrary_key(self): + joined = merge(self.df, self.df2, + left_on='key2', right_on='key1', + suffixes=['.foo', '.bar']) + self.assertIn('key1.foo', joined) + self.assertIn('key2.bar', joined) + + def test_join_on(self): + target = self.target + source = self.source + + merged = target.join(source, on='C') + self.assert_series_equal(merged['MergedA'], target['A'], + check_names=False) + self.assert_series_equal(merged['MergedD'], target['D'], + check_names=False) + + # join with duplicates (fix regression from DataFrame/Matrix merge) + df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']}) + df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c']) + joined = df.join(df2, on='key') + expected = DataFrame({'key': ['a', 'a', 'b', 'b', 'c'], + 'value': [0, 0, 1, 1, 2]}) + assert_frame_equal(joined, expected) + + # Test when some are missing + df_a = DataFrame([[1], [2], [3]], index=['a', 'b', 'c'], + columns=['one']) + df_b = DataFrame([['foo'], ['bar']], index=[1, 2], + columns=['two']) + df_c = DataFrame([[1], [2]], index=[1, 2], + columns=['three']) + joined = df_a.join(df_b, on='one') + joined = joined.join(df_c, on='one') + self.assertTrue(np.isnan(joined['two']['c'])) + self.assertTrue(np.isnan(joined['three']['c'])) + + # merge column not p resent + self.assertRaises(KeyError, target.join, source, on='E') + + # overlap + source_copy = source.copy() + source_copy['A'] = 0 + self.assertRaises(ValueError, target.join, source_copy, on='A') + + def test_join_on_fails_with_different_right_index(self): + with tm.assertRaises(ValueError): + df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), + 'b': np.random.randn(3)}) + df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), + 'b': np.random.randn(10)}, + index=tm.makeCustomIndex(10, 2)) + merge(df, df2, left_on='a', right_index=True) + + def test_join_on_fails_with_different_left_index(self): + with tm.assertRaises(ValueError): + df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), + 'b': np.random.randn(3)}, + index=tm.makeCustomIndex(10, 2)) + df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), + 'b': np.random.randn(10)}) + merge(df, df2, right_on='b', left_index=True) + + def test_join_on_fails_with_different_column_counts(self): + with tm.assertRaises(ValueError): + df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), + 'b': np.random.randn(3)}) + df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), + 'b': np.random.randn(10)}, + index=tm.makeCustomIndex(10, 2)) + merge(df, df2, right_on='a', left_on=['a', 'b']) + + def test_join_on_fails_with_wrong_object_type(self): + # GH12081 + wrongly_typed = [Series([0, 1]), 2, 'str', None, np.array([0, 1])] + df = DataFrame({'a': [1, 1]}) + + for obj in wrongly_typed: + with tm.assertRaisesRegexp(ValueError, str(type(obj))): + merge(obj, df, left_on='a', right_on='a') + with tm.assertRaisesRegexp(ValueError, str(type(obj))): + merge(df, obj, left_on='a', right_on='a') + + def test_join_on_pass_vector(self): + expected = self.target.join(self.source, on='C') + del expected['C'] + + join_col = self.target.pop('C') + result = self.target.join(self.source, on=join_col) + assert_frame_equal(result, expected) + + def test_join_with_len0(self): + # nothing to merge + merged = self.target.join(self.source.reindex([]), on='C') + for col in self.source: + self.assertIn(col, merged) + self.assertTrue(merged[col].isnull().all()) + + merged2 = self.target.join(self.source.reindex([]), on='C', + how='inner') + self.assert_index_equal(merged2.columns, merged.columns) + self.assertEqual(len(merged2), 0) + + def test_join_on_inner(self): + df = DataFrame({'key': ['a', 'a', 'd', 'b', 'b', 'c']}) + df2 = DataFrame({'value': [0, 1]}, index=['a', 'b']) + + joined = df.join(df2, on='key', how='inner') + + expected = df.join(df2, on='key') + expected = expected[expected['value'].notnull()] + self.assert_series_equal(joined['key'], expected['key'], + check_dtype=False) + self.assert_series_equal(joined['value'], expected['value'], + check_dtype=False) + self.assert_index_equal(joined.index, expected.index) + + def test_join_on_singlekey_list(self): + df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']}) + df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c']) + + # corner cases + joined = df.join(df2, on=['key']) + expected = df.join(df2, on='key') + + assert_frame_equal(joined, expected) + + def test_join_on_series(self): + result = self.target.join(self.source['MergedA'], on='C') + expected = self.target.join(self.source[['MergedA']], on='C') + assert_frame_equal(result, expected) + + def test_join_on_series_buglet(self): + # GH #638 + df = DataFrame({'a': [1, 1]}) + ds = Series([2], index=[1], name='b') + result = df.join(ds, on='a') + expected = DataFrame({'a': [1, 1], + 'b': [2, 2]}, index=df.index) + tm.assert_frame_equal(result, expected) + + def test_join_index_mixed(self): + df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, + index=np.arange(10), + columns=['A', 'B', 'C', 'D']) + self.assertEqual(df1['B'].dtype, np.int64) + self.assertEqual(df1['D'].dtype, np.bool_) + + df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, + index=np.arange(0, 10, 2), + columns=['A', 'B', 'C', 'D']) + + # overlap + joined = df1.join(df2, lsuffix='_one', rsuffix='_two') + expected_columns = ['A_one', 'B_one', 'C_one', 'D_one', + 'A_two', 'B_two', 'C_two', 'D_two'] + df1.columns = expected_columns[:4] + df2.columns = expected_columns[4:] + expected = _join_by_hand(df1, df2) + assert_frame_equal(joined, expected) + + # no overlapping blocks + df1 = DataFrame(index=np.arange(10)) + df1['bool'] = True + df1['string'] = 'foo' + + df2 = DataFrame(index=np.arange(5, 15)) + df2['int'] = 1 + df2['float'] = 1. + + for kind in ['inner', 'outer', 'left', 'right']: + + joined = df1.join(df2, how=kind) + expected = _join_by_hand(df1, df2, how=kind) + assert_frame_equal(joined, expected) + + joined = df2.join(df1, how=kind) + expected = _join_by_hand(df2, df1, how=kind) + assert_frame_equal(joined, expected) + + def test_join_empty_bug(self): + # generated an exception in 0.4.3 + x = DataFrame() + x.join(DataFrame([3], index=[0], columns=['A']), how='outer') + + def test_join_unconsolidated(self): + # GH #331 + a = DataFrame(randn(30, 2), columns=['a', 'b']) + c = Series(randn(30)) + a['c'] = c + d = DataFrame(randn(30, 1), columns=['q']) + + # it works! + a.join(d) + d.join(a) + + def test_join_multiindex(self): + index1 = MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b'], + [1, 2, 3, 1, 2, 3]], + names=['first', 'second']) + + index2 = MultiIndex.from_arrays([['b', 'b', 'b', 'c', 'c', 'c'], + [1, 2, 3, 1, 2, 3]], + names=['first', 'second']) + + df1 = DataFrame(data=np.random.randn(6), index=index1, + columns=['var X']) + df2 = DataFrame(data=np.random.randn(6), index=index2, + columns=['var Y']) + + df1 = df1.sortlevel(0) + df2 = df2.sortlevel(0) + + joined = df1.join(df2, how='outer') + ex_index = index1._tuple_index.union(index2._tuple_index) + expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) + expected.index.names = index1.names + assert_frame_equal(joined, expected) + self.assertEqual(joined.index.names, index1.names) + + df1 = df1.sortlevel(1) + df2 = df2.sortlevel(1) + + joined = df1.join(df2, how='outer').sortlevel(0) + ex_index = index1._tuple_index.union(index2._tuple_index) + expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) + expected.index.names = index1.names + + assert_frame_equal(joined, expected) + self.assertEqual(joined.index.names, index1.names) + + def test_join_inner_multiindex(self): + key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux', + 'qux', 'snap'] + key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two', + 'three', 'one'] + + data = np.random.randn(len(key1)) + data = DataFrame({'key1': key1, 'key2': key2, + 'data': data}) + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], + ['one', 'two', 'three']], + labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], + [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], + names=['first', 'second']) + to_join = DataFrame(np.random.randn(10, 3), index=index, + columns=['j_one', 'j_two', 'j_three']) + + joined = data.join(to_join, on=['key1', 'key2'], how='inner') + expected = merge(data, to_join.reset_index(), + left_on=['key1', 'key2'], + right_on=['first', 'second'], how='inner', + sort=False) + + expected2 = merge(to_join, data, + right_on=['key1', 'key2'], left_index=True, + how='inner', sort=False) + assert_frame_equal(joined, expected2.reindex_like(joined)) + + expected2 = merge(to_join, data, right_on=['key1', 'key2'], + left_index=True, how='inner', sort=False) + + expected = expected.drop(['first', 'second'], axis=1) + expected.index = joined.index + + self.assertTrue(joined.index.is_monotonic) + assert_frame_equal(joined, expected) + + # _assert_same_contents(expected, expected2.ix[:, expected.columns]) + + def test_join_hierarchical_mixed(self): + # GH 2024 + df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=['a', 'b', 'c']) + new_df = df.groupby(['a']).agg({'b': [np.mean, np.sum]}) + other_df = DataFrame( + [(1, 2, 3), (7, 10, 6)], columns=['a', 'b', 'd']) + other_df.set_index('a', inplace=True) + # GH 9455, 12219 + with tm.assert_produces_warning(UserWarning): + result = merge(new_df, other_df, left_index=True, right_index=True) + self.assertTrue(('b', 'mean') in result) + self.assertTrue('b' in result) + + def test_join_float64_float32(self): + + a = DataFrame(randn(10, 2), columns=['a', 'b'], dtype=np.float64) + b = DataFrame(randn(10, 1), columns=['c'], dtype=np.float32) + joined = a.join(b) + self.assertEqual(joined.dtypes['a'], 'float64') + self.assertEqual(joined.dtypes['b'], 'float64') + self.assertEqual(joined.dtypes['c'], 'float32') + + a = np.random.randint(0, 5, 100).astype('int64') + b = np.random.random(100).astype('float64') + c = np.random.random(100).astype('float32') + df = DataFrame({'a': a, 'b': b, 'c': c}) + xpdf = DataFrame({'a': a, 'b': b, 'c': c}) + s = DataFrame(np.random.random(5).astype('float32'), columns=['md']) + rs = df.merge(s, left_on='a', right_index=True) + self.assertEqual(rs.dtypes['a'], 'int64') + self.assertEqual(rs.dtypes['b'], 'float64') + self.assertEqual(rs.dtypes['c'], 'float32') + self.assertEqual(rs.dtypes['md'], 'float32') + + xp = xpdf.merge(s, left_on='a', right_index=True) + assert_frame_equal(rs, xp) + + def test_join_many_non_unique_index(self): + df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]}) + df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]}) + df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]}) + idf1 = df1.set_index(["a", "b"]) + idf2 = df2.set_index(["a", "b"]) + idf3 = df3.set_index(["a", "b"]) + + result = idf1.join([idf2, idf3], how='outer') + + df_partially_merged = merge(df1, df2, on=['a', 'b'], how='outer') + expected = merge(df_partially_merged, df3, on=['a', 'b'], how='outer') + + result = result.reset_index() + expected = expected[result.columns] + expected['a'] = expected.a.astype('int64') + expected['b'] = expected.b.astype('int64') + assert_frame_equal(result, expected) + + df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]}) + df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]}) + df3 = DataFrame( + {"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]}) + idf1 = df1.set_index(["a", "b"]) + idf2 = df2.set_index(["a", "b"]) + idf3 = df3.set_index(["a", "b"]) + result = idf1.join([idf2, idf3], how='inner') + + df_partially_merged = merge(df1, df2, on=['a', 'b'], how='inner') + expected = merge(df_partially_merged, df3, on=['a', 'b'], how='inner') + + result = result.reset_index() + + assert_frame_equal(result, expected.ix[:, result.columns]) + + # GH 11519 + df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + s = Series(np.repeat(np.arange(8), 2), + index=np.repeat(np.arange(8), 2), name='TEST') + inner = df.join(s, how='inner') + outer = df.join(s, how='outer') + left = df.join(s, how='left') + right = df.join(s, how='right') + assert_frame_equal(inner, outer) + assert_frame_equal(inner, left) + assert_frame_equal(inner, right) + + def test_join_sort(self): + left = DataFrame({'key': ['foo', 'bar', 'baz', 'foo'], + 'value': [1, 2, 3, 4]}) + right = DataFrame({'value2': ['a', 'b', 'c']}, + index=['bar', 'baz', 'foo']) + + joined = left.join(right, on='key', sort=True) + expected = DataFrame({'key': ['bar', 'baz', 'foo', 'foo'], + 'value': [2, 3, 1, 4], + 'value2': ['a', 'b', 'c', 'c']}, + index=[1, 2, 0, 3]) + assert_frame_equal(joined, expected) + + # smoke test + joined = left.join(right, on='key', sort=False) + self.assert_index_equal(joined.index, pd.Index(lrange(4))) + + def test_mixed_type_join_with_suffix(self): + # GH #916 + df = DataFrame(np.random.randn(20, 6), + columns=['a', 'b', 'c', 'd', 'e', 'f']) + df.insert(0, 'id', 0) + df.insert(5, 'dt', 'foo') + + grouped = df.groupby('id') + mn = grouped.mean() + cn = grouped.count() + + # it works! + mn.join(cn, rsuffix='_right') + + def test_join_many(self): + df = DataFrame(np.random.randn(10, 6), columns=list('abcdef')) + df_list = [df[['a', 'b']], df[['c', 'd']], df[['e', 'f']]] + + joined = df_list[0].join(df_list[1:]) + tm.assert_frame_equal(joined, df) + + df_list = [df[['a', 'b']][:-2], + df[['c', 'd']][2:], df[['e', 'f']][1:9]] + + def _check_diff_index(df_list, result, exp_index): + reindexed = [x.reindex(exp_index) for x in df_list] + expected = reindexed[0].join(reindexed[1:]) + tm.assert_frame_equal(result, expected) + + # different join types + joined = df_list[0].join(df_list[1:], how='outer') + _check_diff_index(df_list, joined, df.index) + + joined = df_list[0].join(df_list[1:]) + _check_diff_index(df_list, joined, df_list[0].index) + + joined = df_list[0].join(df_list[1:], how='inner') + _check_diff_index(df_list, joined, df.index[2:8]) + + self.assertRaises(ValueError, df_list[0].join, df_list[1:], on='a') + + def test_join_many_mixed(self): + df = DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) + df['key'] = ['foo', 'bar'] * 4 + df1 = df.ix[:, ['A', 'B']] + df2 = df.ix[:, ['C', 'D']] + df3 = df.ix[:, ['key']] + + result = df1.join([df2, df3]) + assert_frame_equal(result, df) + + def test_join_dups(self): + + # joining dups + df = concat([DataFrame(np.random.randn(10, 4), + columns=['A', 'A', 'B', 'B']), + DataFrame(np.random.randint(0, 10, size=20) + .reshape(10, 2), + columns=['A', 'C'])], + axis=1) + + expected = concat([df, df], axis=1) + result = df.join(df, rsuffix='_2') + result.columns = expected.columns + assert_frame_equal(result, expected) + + # GH 4975, invalid join on dups + w = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + x = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + y = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + z = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) + + dta = x.merge(y, left_index=True, right_index=True).merge( + z, left_index=True, right_index=True, how="outer") + dta = dta.merge(w, left_index=True, right_index=True) + expected = concat([x, y, z, w], axis=1) + expected.columns = ['x_x', 'y_x', 'x_y', + 'y_y', 'x_x', 'y_x', 'x_y', 'y_y'] + assert_frame_equal(dta, expected) + + def test_panel_join(self): + panel = tm.makePanel() + tm.add_nans(panel) + + p1 = panel.ix[:2, :10, :3] + p2 = panel.ix[2:, 5:, 2:] + + # left join + result = p1.join(p2) + expected = p1.copy() + expected['ItemC'] = p2['ItemC'] + tm.assert_panel_equal(result, expected) + + # right join + result = p1.join(p2, how='right') + expected = p2.copy() + expected['ItemA'] = p1['ItemA'] + expected['ItemB'] = p1['ItemB'] + expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) + tm.assert_panel_equal(result, expected) + + # inner join + result = p1.join(p2, how='inner') + expected = panel.ix[:, 5:10, 2:3] + tm.assert_panel_equal(result, expected) + + # outer join + result = p1.join(p2, how='outer') + expected = p1.reindex(major=panel.major_axis, + minor=panel.minor_axis) + expected = expected.join(p2.reindex(major=panel.major_axis, + minor=panel.minor_axis)) + tm.assert_panel_equal(result, expected) + + def test_panel_join_overlap(self): + panel = tm.makePanel() + tm.add_nans(panel) + + p1 = panel.ix[['ItemA', 'ItemB', 'ItemC']] + p2 = panel.ix[['ItemB', 'ItemC']] + + # Expected index is + # + # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 + joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') + p1_suf = p1.ix[['ItemB', 'ItemC']].add_suffix('_p1') + p2_suf = p2.ix[['ItemB', 'ItemC']].add_suffix('_p2') + no_overlap = panel.ix[['ItemA']] + expected = no_overlap.join(p1_suf.join(p2_suf)) + tm.assert_panel_equal(joined, expected) + + def test_panel_join_many(self): + tm.K = 10 + panel = tm.makePanel() + tm.K = 4 + + panels = [panel.ix[:2], panel.ix[2:6], panel.ix[6:]] + + joined = panels[0].join(panels[1:]) + tm.assert_panel_equal(joined, panel) + + panels = [panel.ix[:2, :-5], panel.ix[2:6, 2:], panel.ix[6:, 5:-7]] + + data_dict = {} + for p in panels: + data_dict.update(p.iteritems()) + + joined = panels[0].join(panels[1:], how='inner') + expected = pd.Panel.from_dict(data_dict, intersect=True) + tm.assert_panel_equal(joined, expected) + + joined = panels[0].join(panels[1:], how='outer') + expected = pd.Panel.from_dict(data_dict, intersect=False) + tm.assert_panel_equal(joined, expected) + + # edge cases + self.assertRaises(ValueError, panels[0].join, panels[1:], + how='outer', lsuffix='foo', rsuffix='bar') + self.assertRaises(ValueError, panels[0].join, panels[1:], + how='right') + + +def _check_join(left, right, result, join_col, how='left', + lsuffix='_x', rsuffix='_y'): + + # some smoke tests + for c in join_col: + assert(result[c].notnull().all()) + + left_grouped = left.groupby(join_col) + right_grouped = right.groupby(join_col) + + for group_key, group in result.groupby(join_col): + l_joined = _restrict_to_columns(group, left.columns, lsuffix) + r_joined = _restrict_to_columns(group, right.columns, rsuffix) + + try: + lgroup = left_grouped.get_group(group_key) + except KeyError: + if how in ('left', 'inner'): + raise AssertionError('key %s should not have been in the join' + % str(group_key)) + + _assert_all_na(l_joined, left.columns, join_col) + else: + _assert_same_contents(l_joined, lgroup) + + try: + rgroup = right_grouped.get_group(group_key) + except KeyError: + if how in ('right', 'inner'): + raise AssertionError('key %s should not have been in the join' + % str(group_key)) + + _assert_all_na(r_joined, right.columns, join_col) + else: + _assert_same_contents(r_joined, rgroup) + + +def _restrict_to_columns(group, columns, suffix): + found = [c for c in group.columns + if c in columns or c.replace(suffix, '') in columns] + + # filter + group = group.ix[:, found] + + # get rid of suffixes, if any + group = group.rename(columns=lambda x: x.replace(suffix, '')) + + # put in the right order... + group = group.ix[:, columns] + + return group + + +def _assert_same_contents(join_chunk, source): + NA_SENTINEL = -1234567 # drop_duplicates not so NA-friendly... + + jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values + svalues = source.fillna(NA_SENTINEL).drop_duplicates().values + + rows = set(tuple(row) for row in jvalues) + assert(len(rows) == len(source)) + assert(all(tuple(row) in rows for row in svalues)) + + +def _assert_all_na(join_chunk, source_columns, join_col): + for c in source_columns: + if c in join_col: + continue + assert(join_chunk[c].isnull().all()) + + +def _join_by_hand(a, b, how='left'): + join_index = a.index.join(b.index, how=how) + + a_re = a.reindex(join_index) + b_re = b.reindex(join_index) + + result_columns = a.columns.append(b.columns) + + for col, s in compat.iteritems(b_re): + a_re[col] = s + return a_re.reindex(columns=result_columns) + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 6c448de741e0c..396b095fabbd6 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -9,23 +9,17 @@ import random import pandas as pd -from pandas.compat import range, lrange, lzip +from pandas.compat import lrange, lzip from pandas.tools.merge import merge, concat, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, slow) -from pandas import (DataFrame, Index, MultiIndex, - Series, date_range, Categorical, - compat) -import pandas.algos as algos +from pandas import DataFrame, Index, MultiIndex, Series, Categorical import pandas.util.testing as tm -a_ = np.array - N = 50 NGROUPS = 8 -JOIN_TYPES = ['inner', 'outer', 'left', 'right'] def get_test_data(ngroups=NGROUPS, n=N): @@ -58,496 +52,16 @@ def setUp(self): n=N // 5), 'value': np.random.randn(N // 5)}) - index, data = tm.getMixedTypeDict() - self.target = DataFrame(data, index=index) - - # Join on string value - self.source = DataFrame({'MergedA': data['A'], 'MergedD': data['D']}, - index=data['C']) - self.left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) self.right = DataFrame({'v2': np.random.randn(4)}, index=['d', 'b', 'c', 'a']) - def test_cython_left_outer_join(self): - left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) - right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) - max_group = 5 - - ls, rs = algos.left_outer_join(left, right, max_group) - - exp_ls = left.argsort(kind='mergesort') - exp_rs = right.argsort(kind='mergesort') - - exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, - 6, 6, 7, 7, 8, 8, 9, 10]) - exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, - 4, 5, 4, 5, 4, 5, -1, -1]) - - exp_ls = exp_ls.take(exp_li) - exp_ls[exp_li == -1] = -1 - - exp_rs = exp_rs.take(exp_ri) - exp_rs[exp_ri == -1] = -1 - - self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) - self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) - - def test_cython_right_outer_join(self): - left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) - right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64) - max_group = 5 - - rs, ls = algos.left_outer_join(right, left, max_group) - - exp_ls = left.argsort(kind='mergesort') - exp_rs = right.argsort(kind='mergesort') - - # 0 1 1 1 - exp_li = a_([0, 1, 2, 3, 4, 5, 3, 4, 5, 3, 4, 5, - # 2 2 4 - 6, 7, 8, 6, 7, 8, -1]) - exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, - 4, 4, 4, 5, 5, 5, 6]) - - exp_ls = exp_ls.take(exp_li) - exp_ls[exp_li == -1] = -1 - - exp_rs = exp_rs.take(exp_ri) - exp_rs[exp_ri == -1] = -1 - - self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) - self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) - - def test_cython_inner_join(self): - left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64) - right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64) - max_group = 5 - - ls, rs = algos.inner_join(left, right, max_group) - - exp_ls = left.argsort(kind='mergesort') - exp_rs = right.argsort(kind='mergesort') - - exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, - 6, 6, 7, 7, 8, 8]) - exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, - 4, 5, 4, 5, 4, 5]) - - exp_ls = exp_ls.take(exp_li) - exp_ls[exp_li == -1] = -1 - - exp_rs = exp_rs.take(exp_ri) - exp_rs[exp_ri == -1] = -1 - - self.assert_numpy_array_equal(ls, exp_ls, check_dtype=False) - self.assert_numpy_array_equal(rs, exp_rs, check_dtype=False) - - def test_left_outer_join(self): - joined_key2 = merge(self.df, self.df2, on='key2') - _check_join(self.df, self.df2, joined_key2, ['key2'], how='left') - - joined_both = merge(self.df, self.df2) - _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], - how='left') - - def test_right_outer_join(self): - joined_key2 = merge(self.df, self.df2, on='key2', how='right') - _check_join(self.df, self.df2, joined_key2, ['key2'], how='right') - - joined_both = merge(self.df, self.df2, how='right') - _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], - how='right') - - def test_full_outer_join(self): - joined_key2 = merge(self.df, self.df2, on='key2', how='outer') - _check_join(self.df, self.df2, joined_key2, ['key2'], how='outer') - - joined_both = merge(self.df, self.df2, how='outer') - _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], - how='outer') - - def test_inner_join(self): - joined_key2 = merge(self.df, self.df2, on='key2', how='inner') - _check_join(self.df, self.df2, joined_key2, ['key2'], how='inner') - - joined_both = merge(self.df, self.df2, how='inner') - _check_join(self.df, self.df2, joined_both, ['key1', 'key2'], - how='inner') - - def test_handle_overlap(self): - joined = merge(self.df, self.df2, on='key2', - suffixes=['.foo', '.bar']) - - self.assertIn('key1.foo', joined) - self.assertIn('key1.bar', joined) - - def test_handle_overlap_arbitrary_key(self): - joined = merge(self.df, self.df2, - left_on='key2', right_on='key1', - suffixes=['.foo', '.bar']) - self.assertIn('key1.foo', joined) - self.assertIn('key2.bar', joined) - def test_merge_common(self): joined = merge(self.df, self.df2) exp = merge(self.df, self.df2, on=['key1', 'key2']) tm.assert_frame_equal(joined, exp) - def test_join_on(self): - target = self.target - source = self.source - - merged = target.join(source, on='C') - self.assert_series_equal(merged['MergedA'], target['A'], - check_names=False) - self.assert_series_equal(merged['MergedD'], target['D'], - check_names=False) - - # join with duplicates (fix regression from DataFrame/Matrix merge) - df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']}) - df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c']) - joined = df.join(df2, on='key') - expected = DataFrame({'key': ['a', 'a', 'b', 'b', 'c'], - 'value': [0, 0, 1, 1, 2]}) - assert_frame_equal(joined, expected) - - # Test when some are missing - df_a = DataFrame([[1], [2], [3]], index=['a', 'b', 'c'], - columns=['one']) - df_b = DataFrame([['foo'], ['bar']], index=[1, 2], - columns=['two']) - df_c = DataFrame([[1], [2]], index=[1, 2], - columns=['three']) - joined = df_a.join(df_b, on='one') - joined = joined.join(df_c, on='one') - self.assertTrue(np.isnan(joined['two']['c'])) - self.assertTrue(np.isnan(joined['three']['c'])) - - # merge column not p resent - self.assertRaises(KeyError, target.join, source, on='E') - - # overlap - source_copy = source.copy() - source_copy['A'] = 0 - self.assertRaises(ValueError, target.join, source_copy, on='A') - - def test_join_on_fails_with_different_right_index(self): - with tm.assertRaises(ValueError): - df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), - 'b': np.random.randn(3)}) - df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), - 'b': np.random.randn(10)}, - index=tm.makeCustomIndex(10, 2)) - merge(df, df2, left_on='a', right_index=True) - - def test_join_on_fails_with_different_left_index(self): - with tm.assertRaises(ValueError): - df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), - 'b': np.random.randn(3)}, - index=tm.makeCustomIndex(10, 2)) - df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), - 'b': np.random.randn(10)}) - merge(df, df2, right_on='b', left_index=True) - - def test_join_on_fails_with_different_column_counts(self): - with tm.assertRaises(ValueError): - df = DataFrame({'a': np.random.choice(['m', 'f'], size=3), - 'b': np.random.randn(3)}) - df2 = DataFrame({'a': np.random.choice(['m', 'f'], size=10), - 'b': np.random.randn(10)}, - index=tm.makeCustomIndex(10, 2)) - merge(df, df2, right_on='a', left_on=['a', 'b']) - - def test_join_on_fails_with_wrong_object_type(self): - # GH12081 - wrongly_typed = [Series([0, 1]), 2, 'str', None, np.array([0, 1])] - df = DataFrame({'a': [1, 1]}) - - for obj in wrongly_typed: - with tm.assertRaisesRegexp(ValueError, str(type(obj))): - merge(obj, df, left_on='a', right_on='a') - with tm.assertRaisesRegexp(ValueError, str(type(obj))): - merge(df, obj, left_on='a', right_on='a') - - def test_join_on_pass_vector(self): - expected = self.target.join(self.source, on='C') - del expected['C'] - - join_col = self.target.pop('C') - result = self.target.join(self.source, on=join_col) - assert_frame_equal(result, expected) - - def test_join_with_len0(self): - # nothing to merge - merged = self.target.join(self.source.reindex([]), on='C') - for col in self.source: - self.assertIn(col, merged) - self.assertTrue(merged[col].isnull().all()) - - merged2 = self.target.join(self.source.reindex([]), on='C', - how='inner') - self.assert_index_equal(merged2.columns, merged.columns) - self.assertEqual(len(merged2), 0) - - def test_join_on_inner(self): - df = DataFrame({'key': ['a', 'a', 'd', 'b', 'b', 'c']}) - df2 = DataFrame({'value': [0, 1]}, index=['a', 'b']) - - joined = df.join(df2, on='key', how='inner') - - expected = df.join(df2, on='key') - expected = expected[expected['value'].notnull()] - self.assert_series_equal(joined['key'], expected['key'], - check_dtype=False) - self.assert_series_equal(joined['value'], expected['value'], - check_dtype=False) - self.assert_index_equal(joined.index, expected.index) - - def test_join_on_singlekey_list(self): - df = DataFrame({'key': ['a', 'a', 'b', 'b', 'c']}) - df2 = DataFrame({'value': [0, 1, 2]}, index=['a', 'b', 'c']) - - # corner cases - joined = df.join(df2, on=['key']) - expected = df.join(df2, on='key') - - assert_frame_equal(joined, expected) - - def test_join_on_series(self): - result = self.target.join(self.source['MergedA'], on='C') - expected = self.target.join(self.source[['MergedA']], on='C') - assert_frame_equal(result, expected) - - def test_join_on_series_buglet(self): - # GH #638 - df = DataFrame({'a': [1, 1]}) - ds = Series([2], index=[1], name='b') - result = df.join(ds, on='a') - expected = DataFrame({'a': [1, 1], - 'b': [2, 2]}, index=df.index) - tm.assert_frame_equal(result, expected) - - def test_join_index_mixed(self): - df1 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, - index=np.arange(10), - columns=['A', 'B', 'C', 'D']) - self.assertEqual(df1['B'].dtype, np.int64) - self.assertEqual(df1['D'].dtype, np.bool_) - - df2 = DataFrame({'A': 1., 'B': 2, 'C': 'foo', 'D': True}, - index=np.arange(0, 10, 2), - columns=['A', 'B', 'C', 'D']) - - # overlap - joined = df1.join(df2, lsuffix='_one', rsuffix='_two') - expected_columns = ['A_one', 'B_one', 'C_one', 'D_one', - 'A_two', 'B_two', 'C_two', 'D_two'] - df1.columns = expected_columns[:4] - df2.columns = expected_columns[4:] - expected = _join_by_hand(df1, df2) - assert_frame_equal(joined, expected) - - # no overlapping blocks - df1 = DataFrame(index=np.arange(10)) - df1['bool'] = True - df1['string'] = 'foo' - - df2 = DataFrame(index=np.arange(5, 15)) - df2['int'] = 1 - df2['float'] = 1. - - for kind in JOIN_TYPES: - - joined = df1.join(df2, how=kind) - expected = _join_by_hand(df1, df2, how=kind) - assert_frame_equal(joined, expected) - - joined = df2.join(df1, how=kind) - expected = _join_by_hand(df2, df1, how=kind) - assert_frame_equal(joined, expected) - - def test_join_empty_bug(self): - # generated an exception in 0.4.3 - x = DataFrame() - x.join(DataFrame([3], index=[0], columns=['A']), how='outer') - - def test_join_unconsolidated(self): - # GH #331 - a = DataFrame(randn(30, 2), columns=['a', 'b']) - c = Series(randn(30)) - a['c'] = c - d = DataFrame(randn(30, 1), columns=['q']) - - # it works! - a.join(d) - d.join(a) - - def test_join_multiindex(self): - index1 = MultiIndex.from_arrays([['a', 'a', 'a', 'b', 'b', 'b'], - [1, 2, 3, 1, 2, 3]], - names=['first', 'second']) - - index2 = MultiIndex.from_arrays([['b', 'b', 'b', 'c', 'c', 'c'], - [1, 2, 3, 1, 2, 3]], - names=['first', 'second']) - - df1 = DataFrame(data=np.random.randn(6), index=index1, - columns=['var X']) - df2 = DataFrame(data=np.random.randn(6), index=index2, - columns=['var Y']) - - df1 = df1.sortlevel(0) - df2 = df2.sortlevel(0) - - joined = df1.join(df2, how='outer') - ex_index = index1._tuple_index.union(index2._tuple_index) - expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) - expected.index.names = index1.names - assert_frame_equal(joined, expected) - self.assertEqual(joined.index.names, index1.names) - - df1 = df1.sortlevel(1) - df2 = df2.sortlevel(1) - - joined = df1.join(df2, how='outer').sortlevel(0) - ex_index = index1._tuple_index.union(index2._tuple_index) - expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) - expected.index.names = index1.names - - assert_frame_equal(joined, expected) - self.assertEqual(joined.index.names, index1.names) - - def test_join_inner_multiindex(self): - key1 = ['bar', 'bar', 'bar', 'foo', 'foo', 'baz', 'baz', 'qux', - 'qux', 'snap'] - key2 = ['two', 'one', 'three', 'one', 'two', 'one', 'two', 'two', - 'three', 'one'] - - data = np.random.randn(len(key1)) - data = DataFrame({'key1': key1, 'key2': key2, - 'data': data}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], - labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], - [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], - names=['first', 'second']) - to_join = DataFrame(np.random.randn(10, 3), index=index, - columns=['j_one', 'j_two', 'j_three']) - - joined = data.join(to_join, on=['key1', 'key2'], how='inner') - expected = merge(data, to_join.reset_index(), - left_on=['key1', 'key2'], - right_on=['first', 'second'], how='inner', - sort=False) - - expected2 = merge(to_join, data, - right_on=['key1', 'key2'], left_index=True, - how='inner', sort=False) - assert_frame_equal(joined, expected2.reindex_like(joined)) - - expected2 = merge(to_join, data, right_on=['key1', 'key2'], - left_index=True, how='inner', sort=False) - - expected = expected.drop(['first', 'second'], axis=1) - expected.index = joined.index - - self.assertTrue(joined.index.is_monotonic) - assert_frame_equal(joined, expected) - - # _assert_same_contents(expected, expected2.ix[:, expected.columns]) - - def test_join_hierarchical_mixed(self): - # GH 2024 - df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=['a', 'b', 'c']) - new_df = df.groupby(['a']).agg({'b': [np.mean, np.sum]}) - other_df = DataFrame( - [(1, 2, 3), (7, 10, 6)], columns=['a', 'b', 'd']) - other_df.set_index('a', inplace=True) - # GH 9455, 12219 - with tm.assert_produces_warning(UserWarning): - result = merge(new_df, other_df, left_index=True, right_index=True) - self.assertTrue(('b', 'mean') in result) - self.assertTrue('b' in result) - - def test_join_float64_float32(self): - - a = DataFrame(randn(10, 2), columns=['a', 'b'], dtype=np.float64) - b = DataFrame(randn(10, 1), columns=['c'], dtype=np.float32) - joined = a.join(b) - self.assertEqual(joined.dtypes['a'], 'float64') - self.assertEqual(joined.dtypes['b'], 'float64') - self.assertEqual(joined.dtypes['c'], 'float32') - - a = np.random.randint(0, 5, 100).astype('int64') - b = np.random.random(100).astype('float64') - c = np.random.random(100).astype('float32') - df = DataFrame({'a': a, 'b': b, 'c': c}) - xpdf = DataFrame({'a': a, 'b': b, 'c': c}) - s = DataFrame(np.random.random(5).astype('float32'), columns=['md']) - rs = df.merge(s, left_on='a', right_index=True) - self.assertEqual(rs.dtypes['a'], 'int64') - self.assertEqual(rs.dtypes['b'], 'float64') - self.assertEqual(rs.dtypes['c'], 'float32') - self.assertEqual(rs.dtypes['md'], 'float32') - - xp = xpdf.merge(s, left_on='a', right_index=True) - assert_frame_equal(rs, xp) - - def test_join_many_non_unique_index(self): - df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]}) - df2 = DataFrame({"a": [1, 1], "b": [1, 2], "d": [100, 200]}) - df3 = DataFrame({"a": [1, 1], "b": [1, 2], "e": [1000, 2000]}) - idf1 = df1.set_index(["a", "b"]) - idf2 = df2.set_index(["a", "b"]) - idf3 = df3.set_index(["a", "b"]) - - result = idf1.join([idf2, idf3], how='outer') - - df_partially_merged = merge(df1, df2, on=['a', 'b'], how='outer') - expected = merge(df_partially_merged, df3, on=['a', 'b'], how='outer') - - result = result.reset_index() - expected = expected[result.columns] - expected['a'] = expected.a.astype('int64') - expected['b'] = expected.b.astype('int64') - assert_frame_equal(result, expected) - - df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]}) - df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]}) - df3 = DataFrame( - {"a": [1, 1, 1], "b": [1, 1, 2], "e": [1000, 2000, 3000]}) - idf1 = df1.set_index(["a", "b"]) - idf2 = df2.set_index(["a", "b"]) - idf3 = df3.set_index(["a", "b"]) - result = idf1.join([idf2, idf3], how='inner') - - df_partially_merged = merge(df1, df2, on=['a', 'b'], how='inner') - expected = merge(df_partially_merged, df3, on=['a', 'b'], how='inner') - - result = result.reset_index() - - assert_frame_equal(result, expected.ix[:, result.columns]) - - # GH 11519 - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - s = Series(np.repeat(np.arange(8), 2), - index=np.repeat(np.arange(8), 2), name='TEST') - inner = df.join(s, how='inner') - outer = df.join(s, how='outer') - left = df.join(s, how='left') - right = df.join(s, how='right') - assert_frame_equal(inner, outer) - assert_frame_equal(inner, left) - assert_frame_equal(inner, right) - def test_merge_index_singlekey_right_vs_left(self): left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'], 'v1': np.random.randn(7)}) @@ -651,23 +165,6 @@ def test_merge_nocopy(self): merged['d'] = 'peekaboo' self.assertTrue((right['d'] == 'peekaboo').all()) - def test_join_sort(self): - left = DataFrame({'key': ['foo', 'bar', 'baz', 'foo'], - 'value': [1, 2, 3, 4]}) - right = DataFrame({'value2': ['a', 'b', 'c']}, - index=['bar', 'baz', 'foo']) - - joined = left.join(right, on='key', sort=True) - expected = DataFrame({'key': ['bar', 'baz', 'foo', 'foo'], - 'value': [2, 3, 1, 4], - 'value2': ['a', 'b', 'c', 'c']}, - index=[1, 2, 0, 3]) - assert_frame_equal(joined, expected) - - # smoke test - joined = left.join(right, on='key', sort=False) - self.assert_index_equal(joined.index, pd.Index(lrange(4))) - def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame @@ -737,20 +234,6 @@ def test_handle_join_key_pass_array(self): merged = merge(left, right, left_index=True, right_on=key, how='outer') self.assert_series_equal(merged['key_0'], Series(key, name='key_0')) - def test_mixed_type_join_with_suffix(self): - # GH #916 - df = DataFrame(np.random.randn(20, 6), - columns=['a', 'b', 'c', 'd', 'e', 'f']) - df.insert(0, 'id', 0) - df.insert(5, 'dt', 'foo') - - grouped = df.groupby('id') - mn = grouped.mean() - cn = grouped.count() - - # it works! - mn.join(cn, rsuffix='_right') - def test_no_overlap_more_informative_error(self): dt = datetime.now() df1 = DataFrame({'x': ['a']}, index=[dt]) @@ -963,68 +446,6 @@ def _constructor(self): tm.assertIsInstance(result, NotADataFrame) - def test_empty_dtype_coerce(self): - - # xref to #12411 - # xref to #12045 - # xref to #11594 - # see below - - # 10571 - df1 = DataFrame(data=[[1, None], [2, None]], columns=['a', 'b']) - df2 = DataFrame(data=[[3, None], [4, None]], columns=['a', 'b']) - result = concat([df1, df2]) - expected = df1.dtypes - assert_series_equal(result.dtypes, expected) - - def test_dtype_coerceion(self): - - # 12411 - df = DataFrame({'date': [pd.Timestamp('20130101').tz_localize('UTC'), - pd.NaT]}) - - result = concat([df.iloc[[0]], df.iloc[[1]]]) - assert_series_equal(result.dtypes, df.dtypes) - - # 12045 - import datetime - df = DataFrame({'date': [datetime.datetime(2012, 1, 1), - datetime.datetime(1012, 1, 2)]}) - result = concat([df.iloc[[0]], df.iloc[[1]]]) - assert_series_equal(result.dtypes, df.dtypes) - - # 11594 - df = DataFrame({'text': ['some words'] + [None] * 9}) - result = concat([df.iloc[[0]], df.iloc[[1]]]) - assert_series_equal(result.dtypes, df.dtypes) - - def test_append_dtype_coerce(self): - - # GH 4993 - # appending with datetime will incorrectly convert datetime64 - import datetime as dt - from pandas import NaT - - df1 = DataFrame(index=[1, 2], data=[dt.datetime(2013, 1, 1, 0, 0), - dt.datetime(2013, 1, 2, 0, 0)], - columns=['start_time']) - df2 = DataFrame(index=[4, 5], data=[[dt.datetime(2013, 1, 3, 0, 0), - dt.datetime(2013, 1, 3, 6, 10)], - [dt.datetime(2013, 1, 4, 0, 0), - dt.datetime(2013, 1, 4, 7, 10)]], - columns=['start_time', 'end_time']) - - expected = concat([Series([NaT, NaT, dt.datetime(2013, 1, 3, 6, 10), - dt.datetime(2013, 1, 4, 7, 10)], - name='end_time'), - Series([dt.datetime(2013, 1, 1, 0, 0), - dt.datetime(2013, 1, 2, 0, 0), - dt.datetime(2013, 1, 3, 0, 0), - dt.datetime(2013, 1, 4, 0, 0)], - name='start_time')], axis=1) - result = df1.append(df2, ignore_index=True) - assert_frame_equal(result, expected) - def test_join_append_timedeltas(self): import datetime as dt @@ -1140,239 +561,6 @@ def test_merge_on_periods(self): self.assertEqual(result['value_x'].dtype, 'object') self.assertEqual(result['value_y'].dtype, 'object') - def test_concat_NaT_series(self): - # GH 11693 - # test for merging NaT series with datetime series. - x = Series(date_range('20151124 08:00', '20151124 09:00', - freq='1h', tz='US/Eastern')) - y = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') - expected = Series([x[0], x[1], pd.NaT, pd.NaT]) - - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - - # all NaT with tz - expected = Series(pd.NaT, index=range(4), - dtype='datetime64[ns, US/Eastern]') - result = pd.concat([y, y], ignore_index=True) - tm.assert_series_equal(result, expected) - - # without tz - x = pd.Series(pd.date_range('20151124 08:00', - '20151124 09:00', freq='1h')) - y = pd.Series(pd.date_range('20151124 10:00', - '20151124 11:00', freq='1h')) - y[:] = pd.NaT - expected = pd.Series([x[0], x[1], pd.NaT, pd.NaT]) - result = pd.concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - - # all NaT without tz - x[:] = pd.NaT - expected = pd.Series(pd.NaT, index=range(4), - dtype='datetime64[ns]') - result = pd.concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - - def test_concat_tz_frame(self): - df2 = DataFrame(dict(A=pd.Timestamp('20130102', tz='US/Eastern'), - B=pd.Timestamp('20130603', tz='CET')), - index=range(5)) - - # concat - df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) - assert_frame_equal(df2, df3) - - def test_concat_tz_series(self): - # GH 11755 - # tz and no tz - x = Series(date_range('20151124 08:00', - '20151124 09:00', - freq='1h', tz='UTC')) - y = Series(date_range('2012-01-01', '2012-01-02')) - expected = Series([x[0], x[1], y[0], y[1]], - dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - - # GH 11887 - # concat tz and object - x = Series(date_range('20151124 08:00', - '20151124 09:00', - freq='1h', tz='UTC')) - y = Series(['a', 'b']) - expected = Series([x[0], x[1], y[0], y[1]], - dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - - # 12217 - # 12306 fixed I think - - # Concat'ing two UTC times - first = pd.DataFrame([[datetime(2016, 1, 1)]]) - first[0] = first[0].dt.tz_localize('UTC') - - second = pd.DataFrame([[datetime(2016, 1, 2)]]) - second[0] = second[0].dt.tz_localize('UTC') - - result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, UTC]') - - # Concat'ing two London times - first = pd.DataFrame([[datetime(2016, 1, 1)]]) - first[0] = first[0].dt.tz_localize('Europe/London') - - second = pd.DataFrame([[datetime(2016, 1, 2)]]) - second[0] = second[0].dt.tz_localize('Europe/London') - - result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') - - # Concat'ing 2+1 London times - first = pd.DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]]) - first[0] = first[0].dt.tz_localize('Europe/London') - - second = pd.DataFrame([[datetime(2016, 1, 3)]]) - second[0] = second[0].dt.tz_localize('Europe/London') - - result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') - - # Concat'ing 1+2 London times - first = pd.DataFrame([[datetime(2016, 1, 1)]]) - first[0] = first[0].dt.tz_localize('Europe/London') - - second = pd.DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]]) - second[0] = second[0].dt.tz_localize('Europe/London') - - result = pd.concat([first, second]) - self.assertEqual(result[0].dtype, 'datetime64[ns, Europe/London]') - - def test_concat_tz_series_with_datetimelike(self): - # GH 12620 - # tz and timedelta - x = [pd.Timestamp('2011-01-01', tz='US/Eastern'), - pd.Timestamp('2011-02-01', tz='US/Eastern')] - y = [pd.Timedelta('1 day'), pd.Timedelta('2 day')] - result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) - tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) - - # tz and period - y = [pd.Period('2011-03', freq='M'), pd.Period('2011-04', freq='M')] - result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) - tm.assert_series_equal(result, pd.Series(x + y, dtype='object')) - - def test_concat_tz_series_tzlocal(self): - # GH 13583 - tm._skip_if_no_dateutil() - import dateutil - x = [pd.Timestamp('2011-01-01', tz=dateutil.tz.tzlocal()), - pd.Timestamp('2011-02-01', tz=dateutil.tz.tzlocal())] - y = [pd.Timestamp('2012-01-01', tz=dateutil.tz.tzlocal()), - pd.Timestamp('2012-02-01', tz=dateutil.tz.tzlocal())] - result = concat([pd.Series(x), pd.Series(y)], ignore_index=True) - tm.assert_series_equal(result, pd.Series(x + y)) - self.assertEqual(result.dtype, 'datetime64[ns, tzlocal()]') - - def test_concat_period_series(self): - x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) - y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D')) - expected = Series([x[0], x[1], y[0], y[1]], dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') - - # different freq - x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) - y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M')) - expected = Series([x[0], x[1], y[0], y[1]], dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') - - x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) - y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) - expected = Series([x[0], x[1], y[0], y[1]], dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') - - # non-period - x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) - y = Series(pd.DatetimeIndex(['2015-11-01', '2015-12-01'])) - expected = Series([x[0], x[1], y[0], y[1]], dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') - - x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) - y = Series(['A', 'B']) - expected = Series([x[0], x[1], y[0], y[1]], dtype='object') - result = concat([x, y], ignore_index=True) - tm.assert_series_equal(result, expected) - self.assertEqual(result.dtype, 'object') - - def test_concat_empty_series(self): - # GH 11082 - s1 = pd.Series([1, 2, 3], name='x') - s2 = pd.Series(name='y') - res = pd.concat([s1, s2], axis=1) - exp = pd.DataFrame({'x': [1, 2, 3], 'y': [np.nan, np.nan, np.nan]}) - tm.assert_frame_equal(res, exp) - - s1 = pd.Series([1, 2, 3], name='x') - s2 = pd.Series(name='y') - res = pd.concat([s1, s2], axis=0) - # name will be reset - exp = pd.Series([1, 2, 3]) - tm.assert_series_equal(res, exp) - - # empty Series with no name - s1 = pd.Series([1, 2, 3], name='x') - s2 = pd.Series(name=None) - res = pd.concat([s1, s2], axis=1) - exp = pd.DataFrame({'x': [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, - columns=['x', 0]) - tm.assert_frame_equal(res, exp) - - def test_default_index(self): - # is_series and ignore_index - s1 = pd.Series([1, 2, 3], name='x') - s2 = pd.Series([4, 5, 6], name='y') - res = pd.concat([s1, s2], axis=1, ignore_index=True) - self.assertIsInstance(res.columns, pd.RangeIndex) - exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) - # use check_index_type=True to check the result have - # RangeIndex (default index) - tm.assert_frame_equal(res, exp, check_index_type=True, - check_column_type=True) - - # is_series and all inputs have no names - s1 = pd.Series([1, 2, 3]) - s2 = pd.Series([4, 5, 6]) - res = pd.concat([s1, s2], axis=1, ignore_index=False) - self.assertIsInstance(res.columns, pd.RangeIndex) - exp = pd.DataFrame([[1, 4], [2, 5], [3, 6]]) - exp.columns = pd.RangeIndex(2) - tm.assert_frame_equal(res, exp, check_index_type=True, - check_column_type=True) - - # is_dataframe and ignore_index - df1 = pd.DataFrame({'A': [1, 2], 'B': [5, 6]}) - df2 = pd.DataFrame({'A': [3, 4], 'B': [7, 8]}) - - res = pd.concat([df1, df2], axis=0, ignore_index=True) - exp = pd.DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], - columns=['A', 'B']) - tm.assert_frame_equal(res, exp, check_index_type=True, - check_column_type=True) - - res = pd.concat([df1, df2], axis=1, ignore_index=True) - exp = pd.DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]]) - tm.assert_frame_equal(res, exp, check_index_type=True, - check_column_type=True) - def test_indicator(self): # PR #10054. xref #7412 and closes #8790. df1 = DataFrame({'col1': [0, 1], 'col_left': [ @@ -2134,90 +1322,6 @@ def f(): self.assertRaises(NotImplementedError, f) -def _check_join(left, right, result, join_col, how='left', - lsuffix='_x', rsuffix='_y'): - - # some smoke tests - for c in join_col: - assert(result[c].notnull().all()) - - left_grouped = left.groupby(join_col) - right_grouped = right.groupby(join_col) - - for group_key, group in result.groupby(join_col): - l_joined = _restrict_to_columns(group, left.columns, lsuffix) - r_joined = _restrict_to_columns(group, right.columns, rsuffix) - - try: - lgroup = left_grouped.get_group(group_key) - except KeyError: - if how in ('left', 'inner'): - raise AssertionError('key %s should not have been in the join' - % str(group_key)) - - _assert_all_na(l_joined, left.columns, join_col) - else: - _assert_same_contents(l_joined, lgroup) - - try: - rgroup = right_grouped.get_group(group_key) - except KeyError: - if how in ('right', 'inner'): - raise AssertionError('key %s should not have been in the join' - % str(group_key)) - - _assert_all_na(r_joined, right.columns, join_col) - else: - _assert_same_contents(r_joined, rgroup) - - -def _restrict_to_columns(group, columns, suffix): - found = [c for c in group.columns - if c in columns or c.replace(suffix, '') in columns] - - # filter - group = group.ix[:, found] - - # get rid of suffixes, if any - group = group.rename(columns=lambda x: x.replace(suffix, '')) - - # put in the right order... - group = group.ix[:, columns] - - return group - - -def _assert_same_contents(join_chunk, source): - NA_SENTINEL = -1234567 # drop_duplicates not so NA-friendly... - - jvalues = join_chunk.fillna(NA_SENTINEL).drop_duplicates().values - svalues = source.fillna(NA_SENTINEL).drop_duplicates().values - - rows = set(tuple(row) for row in jvalues) - assert(len(rows) == len(source)) - assert(all(tuple(row) in rows for row in svalues)) - - -def _assert_all_na(join_chunk, source_columns, join_col): - for c in source_columns: - if c in join_col: - continue - assert(join_chunk[c].isnull().all()) - - -def _join_by_hand(a, b, how='left'): - join_index = a.index.join(b.index, how=how) - - a_re = a.reindex(join_index) - b_re = b.reindex(join_index) - - result_columns = a.columns.append(b.columns) - - for col, s in compat.iteritems(b_re): - a_re[col] = s - return a_re.reindex(columns=result_columns) - - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) From a711b4251c765c0c4b9d1c8deb985162dfaf09ae Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 14 Jul 2016 04:44:18 -0400 Subject: [PATCH 29/44] BF(TST): allow AttributeError being raised (in addition to TypeError) from mpl (#13641) Closes #13570 --- pandas/tests/test_graphics.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 3a5b0117948b7..5493eb37c358b 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1330,7 +1330,8 @@ def test_plot(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) df = DataFrame({'x': [1, 2], 'y': [3, 4]}) - with tm.assertRaises(TypeError): + # mpl >= 1.5.2 (or slightly below) throw AttributError + with tm.assertRaises((TypeError, AttributeError)): df.plot.line(blarg=True) df = DataFrame(np.random.rand(10, 3), From 084ceaee135627680f4dd00115c3d6c7d930a22d Mon Sep 17 00:00:00 2001 From: gfyoung Date: Thu, 14 Jul 2016 06:20:50 -0400 Subject: [PATCH 30/44] API, DEPR: Raise and Deprecate Reshape for Pandas Objects Author: gfyoung Closes #13012 from gfyoung/categorical-reshape-validate and squashes the following commits: 3ad161d [gfyoung] API: Prevent invalid arguments to Categorical.reshape --- doc/source/whatsnew/v0.19.0.txt | 3 ++ pandas/core/categorical.py | 23 +++++++-- pandas/core/internals.py | 26 +++++++++- pandas/core/series.py | 14 ++++-- pandas/indexes/base.py | 10 ++++ pandas/io/packers.py | 7 +-- pandas/tests/indexes/test_base.py | 6 +++ pandas/tests/series/test_analytics.py | 68 ++++++++++++++++----------- pandas/tests/test_categorical.py | 37 +++++++++++++-- 9 files changed, 151 insertions(+), 43 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index bef02a06135de..688f3b7ff6ada 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -256,6 +256,7 @@ API changes ~~~~~~~~~~~ +- ``Index.reshape`` will raise a ``NotImplementedError`` exception when called (:issue: `12882`) - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception (:issue:`10001`) - ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules. New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`) - An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`) @@ -449,6 +450,8 @@ Furthermore: Deprecations ^^^^^^^^^^^^ +- ``Categorical.reshape`` has been deprecated and will be removed in a subsequent release (:issue:`12882`) +- ``Series.reshape`` has been deprecated and will be removed in a subsequent release (:issue:`12882`) - ``compact_ints`` and ``use_unsigned`` have been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13320`) - ``buffer_lines`` has been deprecated in ``pd.read_csv()`` and will be removed in a future version (:issue:`13360`) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 79d8bfbf57f12..1d1a9f990e61a 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -383,11 +383,28 @@ def itemsize(self): def reshape(self, new_shape, *args, **kwargs): """ - An ndarray-compatible method that returns - `self` because categorical instances cannot - actually be reshaped. + DEPRECATED: calling this method will raise an error in a + future release. + + An ndarray-compatible method that returns `self` because + `Categorical` instances cannot actually be reshaped. + + Parameters + ---------- + new_shape : int or tuple of ints + A 1-D array of integers that correspond to the new + shape of the `Categorical`. For more information on + the parameter, please refer to `np.reshape`. """ + warn("reshape is deprecated and will raise " + "in a subsequent release", FutureWarning, stacklevel=2) + nv.validate_reshape(args, kwargs) + + # while the 'new_shape' parameter has no effect, + # we should still enforce valid shape parameters + np.reshape(self.codes, new_shape) + return self @property diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 363ac8249eb06..ff12cfddbe9cd 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1839,7 +1839,7 @@ def convert(self, *args, **kwargs): try: values = values.reshape(shape) values = _block_shape(values, ndim=self.ndim) - except AttributeError: + except (AttributeError, NotImplementedError): pass newb = make_block(values, ndim=self.ndim, placement=[rl]) blocks.append(newb) @@ -3616,7 +3616,7 @@ def value_getitem(placement): return value else: if value.ndim == self.ndim - 1: - value = value.reshape((1,) + value.shape) + value = _safe_reshape(value, (1,) + value.shape) def value_getitem(placement): return value @@ -4686,6 +4686,28 @@ def rrenamer(x): _transform_index(right, rrenamer)) +def _safe_reshape(arr, new_shape): + """ + If possible, reshape `arr` to have shape `new_shape`, + with a couple of exceptions (see gh-13012): + + 1) If `arr` is a Categorical or Index, `arr` will be + returned as is. + 2) If `arr` is a Series, the `_values` attribute will + be reshaped and returned. + + Parameters + ---------- + arr : array-like, object to be reshaped + new_shape : int or tuple of ints, the new shape + """ + if isinstance(arr, ABCSeries): + arr = arr._values + if not isinstance(arr, Categorical): + arr = arr.reshape(new_shape) + return arr + + def _transform_index(index, func): """ Apply function to all values found in index. diff --git a/pandas/core/series.py b/pandas/core/series.py index 2c7f298dde2ec..b933f68cfad62 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -843,14 +843,22 @@ def repeat(self, reps, *args, **kwargs): def reshape(self, *args, **kwargs): """ - Return the values attribute of `self` with shape `args`. - However, if the specified shape matches exactly the current - shape, `self` is returned for compatibility reasons. + DEPRECATED: calling this method will raise an error in a + future release. Please call ``.values.reshape(...)`` instead. + + return an ndarray with the values shape + if the specified shape matches exactly the current shape, then + return self (for compat) See also -------- numpy.ndarray.reshape """ + warnings.warn("reshape is deprecated and will raise " + "in a subsequent release. Please use " + ".values.reshape(...) instead", FutureWarning, + stacklevel=2) + if len(args) == 1 and hasattr(args[0], '__iter__'): shape = args[0] else: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index 5c9938c932da2..b013d6ccb0b8e 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -957,6 +957,16 @@ def rename(self, name, inplace=False): """ return self.set_names([name], inplace=inplace) + def reshape(self, *args, **kwargs): + """ + NOT IMPLEMENTED: do not call this method, as reshaping is not + supported for Index objects and will raise an error. + + Reshape an Index. + """ + raise NotImplementedError("reshaping is not supported " + "for Index objects") + @property def _has_complex_internals(self): # to disable groupby tricks in MultiIndex diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 14e2c9b371296..94f390955dddd 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -61,7 +61,7 @@ from pandas.core.generic import NDFrame from pandas.core.common import PerformanceWarning from pandas.io.common import get_filepath_or_buffer -from pandas.core.internals import BlockManager, make_block +from pandas.core.internals import BlockManager, make_block, _safe_reshape import pandas.core.internals as internals from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer, ExtType @@ -622,8 +622,9 @@ def decode(obj): axes = obj[u'axes'] def create_block(b): - values = unconvert(b[u'values'], dtype_for(b[u'dtype']), - b[u'compress']).reshape(b[u'shape']) + values = _safe_reshape(unconvert( + b[u'values'], dtype_for(b[u'dtype']), + b[u'compress']), b[u'shape']) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 67869901b068e..06662e52e3a6f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1413,6 +1413,12 @@ def test_take_fill_value(self): with tm.assertRaises(IndexError): idx.take(np.array([1, -5])) + def test_reshape_raise(self): + msg = "reshaping is not supported" + idx = pd.Index([0, 1, 2]) + tm.assertRaisesRegexp(NotImplementedError, msg, + idx.reshape, idx.shape) + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): # GH6552 idx = pd.Index([0, 1, 2]) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index d9e2d8096c8d7..34cfb2f0c1529 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1554,49 +1554,63 @@ def test_shift_categorical(self): assert_index_equal(s.values.categories, sp1.values.categories) assert_index_equal(s.values.categories, sn2.values.categories) - def test_reshape_non_2d(self): - # GH 4554 - x = Series(np.random.random(201), name='x') - self.assertTrue(x.reshape(x.shape, ) is x) + def test_reshape_deprecate(self): + x = Series(np.random.random(10), name='x') + tm.assert_produces_warning(FutureWarning, x.reshape, x.shape) - # GH 2719 - a = Series([1, 2, 3, 4]) - result = a.reshape(2, 2) - expected = a.values.reshape(2, 2) - tm.assert_numpy_array_equal(result, expected) - self.assertIsInstance(result, type(expected)) + def test_reshape_non_2d(self): + # see gh-4554 + with tm.assert_produces_warning(FutureWarning): + x = Series(np.random.random(201), name='x') + self.assertTrue(x.reshape(x.shape, ) is x) + + # see gh-2719 + with tm.assert_produces_warning(FutureWarning): + a = Series([1, 2, 3, 4]) + result = a.reshape(2, 2) + expected = a.values.reshape(2, 2) + tm.assert_numpy_array_equal(result, expected) + self.assertIsInstance(result, type(expected)) def test_reshape_2d_return_array(self): x = Series(np.random.random(201), name='x') - result = x.reshape((-1, 1)) - self.assertNotIsInstance(result, Series) - result2 = np.reshape(x, (-1, 1)) - self.assertNotIsInstance(result2, Series) + with tm.assert_produces_warning(FutureWarning): + result = x.reshape((-1, 1)) + self.assertNotIsInstance(result, Series) - result = x[:, None] - expected = x.reshape((-1, 1)) - assert_almost_equal(result, expected) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result2 = np.reshape(x, (-1, 1)) + self.assertNotIsInstance(result2, Series) + + with tm.assert_produces_warning(FutureWarning): + result = x[:, None] + expected = x.reshape((-1, 1)) + assert_almost_equal(result, expected) def test_reshape_bad_kwarg(self): a = Series([1, 2, 3, 4]) - msg = "'foo' is an invalid keyword argument for this function" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "'foo' is an invalid keyword argument for this function" + tm.assertRaisesRegexp(TypeError, msg, a.reshape, (2, 2), foo=2) - msg = "reshape\(\) got an unexpected keyword argument 'foo'" - tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "reshape\(\) got an unexpected keyword argument 'foo'" + tm.assertRaisesRegexp(TypeError, msg, a.reshape, a.shape, foo=2) def test_numpy_reshape(self): a = Series([1, 2, 3, 4]) - result = np.reshape(a, (2, 2)) - expected = a.values.reshape(2, 2) - tm.assert_numpy_array_equal(result, expected) - self.assertIsInstance(result, type(expected)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = np.reshape(a, (2, 2)) + expected = a.values.reshape(2, 2) + tm.assert_numpy_array_equal(result, expected) + self.assertIsInstance(result, type(expected)) - result = np.reshape(a, a.shape) - tm.assert_series_equal(result, a) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = np.reshape(a, a.shape) + tm.assert_series_equal(result, a) def test_unstack(self): from numpy import nan diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2ca1fc71df20a..dd39861ac3114 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -4058,13 +4058,40 @@ def test_numpy_repeat(self): msg = "the 'axis' parameter is not supported" tm.assertRaisesRegexp(ValueError, msg, np.repeat, cat, 2, axis=1) + def test_reshape(self): + cat = pd.Categorical([], categories=["a", "b"]) + tm.assert_produces_warning(FutureWarning, cat.reshape, 0) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical([], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape(0), cat) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical([], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape((5, -1)), cat) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape(cat.shape), cat) + + with tm.assert_produces_warning(FutureWarning): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(cat.reshape(cat.size), cat) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "can only specify one unknown dimension" + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + tm.assertRaisesRegexp(ValueError, msg, cat.reshape, (-2, -1)) + def test_numpy_reshape(self): - cat = pd.Categorical(["a", "b"], categories=["a", "b"]) - self.assert_categorical_equal(np.reshape(cat, cat.shape), cat) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + self.assert_categorical_equal(np.reshape(cat, cat.shape), cat) - msg = "the 'order' parameter is not supported" - tm.assertRaisesRegexp(ValueError, msg, np.reshape, - cat, cat.shape, order='F') + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + msg = "the 'order' parameter is not supported" + tm.assertRaisesRegexp(ValueError, msg, np.reshape, + cat, cat.shape, order='F') def test_na_actions(self): From 3f6d4bdd63d9a1ae27e587bd033e507f7a5e1109 Mon Sep 17 00:00:00 2001 From: yui-knk Date: Thu, 14 Jul 2016 06:47:32 -0400 Subject: [PATCH 31/44] CLN: Fix compile time warnings Author: yui-knk Closes #13643 from yui-knk/warning2 and squashes the following commits: ee3a4fb [yui-knk] CLN: Fix compile time warnings --- pandas/src/datetime/np_datetime.c | 2 +- pandas/src/ujson/python/objToJSON.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/src/datetime/np_datetime.c b/pandas/src/datetime/np_datetime.c index c30b404d2b8b2..80703c8b08de6 100644 --- a/pandas/src/datetime/np_datetime.c +++ b/pandas/src/datetime/np_datetime.c @@ -576,7 +576,7 @@ void pandas_datetime_to_datetimestruct(npy_datetime val, PANDAS_DATETIMEUNIT fr, } PANDAS_DATETIMEUNIT get_datetime64_unit(PyObject *obj) { - return ((PyDatetimeScalarObject *) obj)->obmeta.base; + return (PANDAS_DATETIMEUNIT)((PyDatetimeScalarObject *) obj)->obmeta.base; } diff --git a/pandas/src/ujson/python/objToJSON.c b/pandas/src/ujson/python/objToJSON.c index 1080e9548ba56..75de63acbd7d6 100644 --- a/pandas/src/ujson/python/objToJSON.c +++ b/pandas/src/ujson/python/objToJSON.c @@ -493,7 +493,7 @@ static void *NpyDateTimeScalarToJSON(JSOBJ _obj, JSONTypeContext *tc, void *outV PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *) _obj; PRINTMARK(); - pandas_datetime_to_datetimestruct(obj->obval, obj->obmeta.base, &dts); + pandas_datetime_to_datetimestruct(obj->obval, (PANDAS_DATETIMEUNIT)obj->obmeta.base, &dts); return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen); } From c9a27ede0925ddbaa8d3ec9efd3c332a636505cf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 14 Jul 2016 16:26:07 +0200 Subject: [PATCH 32/44] CLN: fix some issues in asv benchmark suite (#13630) * CLN: fix params list * Fix issue in asv.conf.json for win32+other environment Fix mistaken exclusion of virtualenv or existing:same on win32 in the config. Credits: @pv * CLN: remove DataMatrix * ASV: fix exlusion of tables package for non-conda environments --- asv_bench/asv.conf.json | 6 +++--- asv_bench/benchmarks/indexing.py | 20 -------------------- asv_bench/benchmarks/inference.py | 10 +++++----- asv_bench/benchmarks/join_merge.py | 16 ---------------- 4 files changed, 8 insertions(+), 44 deletions(-) diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json index 7b9fe353df2e3..f5fa849464881 100644 --- a/asv_bench/asv.conf.json +++ b/asv_bench/asv.conf.json @@ -77,11 +77,11 @@ // On conda install pytables, otherwise tables {"environment_type": "conda", "tables": ""}, {"environment_type": "conda", "pytables": null}, - {"environment_type": "virtualenv", "tables": null}, - {"environment_type": "virtualenv", "pytables": ""}, + {"environment_type": "(?!conda).*", "tables": null}, + {"environment_type": "(?!conda).*", "pytables": ""}, // On conda&win32, install libpython {"sys_platform": "(?!win32).*", "libpython": ""}, - {"sys_platform": "win32", "libpython": null}, + {"environment_type": "conda", "sys_platform": "win32", "libpython": null}, {"environment_type": "(?!conda).*", "libpython": ""} ], "include": [], diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 53d37a8161f43..094ae23a92fad 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -19,24 +19,6 @@ def time_dataframe_getitem_scalar(self): self.df[self.col][self.idx] -class datamatrix_getitem_scalar(object): - goal_time = 0.2 - - def setup(self): - try: - self.klass = DataMatrix - except: - self.klass = DataFrame - self.index = tm.makeStringIndex(1000) - self.columns = tm.makeStringIndex(30) - self.df = self.klass(np.random.rand(1000, 30), index=self.index, columns=self.columns) - self.idx = self.index[100] - self.col = self.columns[10] - - def time_datamatrix_getitem_scalar(self): - self.df[self.col][self.idx] - - class series_get_value(object): goal_time = 0.2 @@ -498,5 +480,3 @@ def setup(self): def time_float_loc(self): self.ind.get_loc(0) - - diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 6809c351beade..ee9d3104be4b1 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -143,12 +143,12 @@ class to_numeric(object): param_names = ['data', 'downcast'] params = [ - [(['1'] * N / 2) + ([2] * N / 2), - (['-1'] * N / 2) + ([2] * N / 2), - np.repeat(np.array('1970-01-01', '1970-01-02', + [(['1'] * (N / 2)) + ([2] * (N / 2)), + (['-1'] * (N / 2)) + ([2] * (N / 2)), + np.repeat(np.array(['1970-01-01', '1970-01-02'], dtype='datetime64[D]'), N), - (['1.1'] * N / 2) + ([2] * N / 2), - ([1] * N / 2) + ([2] * N / 2), + (['1.1'] * (N / 2)) + ([2] * (N / 2)), + ([1] * (N / 2)) + ([2] * (N / 2)), np.repeat(np.int32(1), N)], [None, 'integer', 'signed', 'unsigned', 'float'], ] diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 39ebd9cb1cb73..dcd07911f2ff0 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -179,10 +179,6 @@ def setup(self): self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D']) except: pass - try: - self.DataFrame = DataMatrix - except: - pass self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, }) self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D']) self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D']) @@ -210,10 +206,6 @@ def setup(self): self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D']) except: pass - try: - self.DataFrame = DataMatrix - except: - pass self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, }) self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D']) self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D']) @@ -241,10 +233,6 @@ def setup(self): self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D']) except: pass - try: - self.DataFrame = DataMatrix - except: - pass self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, }) self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D']) self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D']) @@ -272,10 +260,6 @@ def setup(self): self.df_multi = DataFrame(np.random.randn(len(self.index2), 4), index=self.index2, columns=['A', 'B', 'C', 'D']) except: pass - try: - self.DataFrame = DataMatrix - except: - pass self.df = pd.DataFrame({'data1': np.random.randn(100000), 'data2': np.random.randn(100000), 'key1': self.key1, 'key2': self.key2, }) self.df_key1 = pd.DataFrame(np.random.randn(len(self.level1), 4), index=self.level1, columns=['A', 'B', 'C', 'D']) self.df_key2 = pd.DataFrame(np.random.randn(len(self.level2), 4), index=self.level2, columns=['A', 'B', 'C', 'D']) From 05b976c9339bad84f488c8d6813ed19232c9255c Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 14 Jul 2016 20:06:52 -0400 Subject: [PATCH 33/44] TST: add tests for Timestamp.toordinal/fromordinal follow-up for #13593 Author: sinhrks Closes #13610 from sinhrks/depr_timestamp_offset2 and squashes the following commits: 28f8d41 [sinhrks] TST: add tests for Timestamp.toordinal --- pandas/tseries/tests/test_tslib.py | 27 +++++++++++++++++++++++++++ pandas/tslib.pyx | 21 ++++++++++++++++++--- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index ce88edcf4249b..31d6393c1c26e 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -255,6 +255,18 @@ def test_constructor_keyword(self): hour=1, minute=2, second=3, microsecond=999999)), repr(Timestamp('2015-11-12 01:02:03.999999'))) + def test_constructor_fromordinal(self): + base = datetime.datetime(2000, 1, 1) + + ts = Timestamp.fromordinal(base.toordinal(), freq='D') + self.assertEqual(base, ts) + self.assertEqual(ts.freq, 'D') + self.assertEqual(base.toordinal(), ts.toordinal()) + + ts = Timestamp.fromordinal(base.toordinal(), tz='US/Eastern') + self.assertEqual(pd.Timestamp('2000-01-01', tz='US/Eastern'), ts) + self.assertEqual(base.toordinal(), ts.toordinal()) + def test_constructor_offset_depr(self): # GH 12160 with tm.assert_produces_warning(FutureWarning, @@ -270,6 +282,21 @@ def test_constructor_offset_depr(self): with tm.assertRaisesRegexp(TypeError, msg): Timestamp('2011-01-01', offset='D', freq='D') + def test_constructor_offset_depr_fromordinal(self): + # GH 12160 + base = datetime.datetime(2000, 1, 1) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + ts = Timestamp.fromordinal(base.toordinal(), offset='D') + self.assertEqual(pd.Timestamp('2000-01-01'), ts) + self.assertEqual(ts.freq, 'D') + self.assertEqual(base.toordinal(), ts.toordinal()) + + msg = "Can only specify freq or offset, not both" + with tm.assertRaisesRegexp(TypeError, msg): + Timestamp.fromordinal(base.toordinal(), offset='D', freq='D') + def test_conversion(self): # GH 9255 ts = Timestamp('2000-01-01') diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 650b4c7979d8d..2af08f2713262 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -235,12 +235,14 @@ class Timestamp(_Timestamp): ---------- ts_input : datetime-like, str, int, float Value to be converted to Timestamp - offset : str, DateOffset + freq : str, DateOffset Offset which Timestamp will have tz : string, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will have. unit : string numpy unit used for conversion, if ts_input is int or float + offset : str, DateOffset + Deprecated, use freq The other two forms mimic the parameters from ``datetime.datetime``. They can be passed by either position or keyword, but not both mixed together. @@ -262,8 +264,21 @@ class Timestamp(_Timestamp): @classmethod def fromordinal(cls, ordinal, freq=None, tz=None, offset=None): - """ passed an ordinal, translate and convert to a ts - note: by definition there cannot be any tz info on the ordinal itself """ + """ + passed an ordinal, translate and convert to a ts + note: by definition there cannot be any tz info on the ordinal itself + + Parameters + ---------- + ordinal : int + date corresponding to a proleptic Gregorian ordinal + freq : str, DateOffset + Offset which Timestamp will have + tz : string, pytz.timezone, dateutil.tz.tzfile or None + Time zone for time which Timestamp will have. + offset : str, DateOffset + Deprecated, use freq + """ return cls(datetime.fromordinal(ordinal), freq=freq, tz=tz, offset=offset) @classmethod From 71a06752a7040a75402f3e30a82b96e10816b492 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 14 Jul 2016 20:12:33 -0400 Subject: [PATCH 34/44] CLN: Initialization coincides with mapping, hence with uniqueness check - [x] tests added / passed - [x] passes ``git diff upstream/master | flake8 --diff`` Rebased version of https://github.com/pydata/pandas/pull/10229 which was [actually not](h ttps://github.com/pydata/pandas/pull/10229#issuecomment-131470116) fixed by https://github.com/pydata/pandas/pull/10199. Nothing particular relevant, just wanted to delete this branch locally and noticed it still applies: you'll judge what to do of it. Author: Pietro Battiston Closes #13594 from toobaz/fix_checkunique and squashes the following commits: a63bd12 [Pietro Battiston] CLN: Initialization coincides with mapping, hence with uniqueness check --- pandas/index.pyx | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/index.pyx b/pandas/index.pyx index 71717dd2d771b..bc985100692fc 100644 --- a/pandas/index.pyx +++ b/pandas/index.pyx @@ -80,7 +80,7 @@ cdef class IndexEngine: cdef: bint unique, monotonic_inc, monotonic_dec - bint initialized, monotonic_check, unique_check + bint initialized, monotonic_check def __init__(self, vgetter, n): self.vgetter = vgetter @@ -91,7 +91,6 @@ cdef class IndexEngine: self.monotonic_check = 0 self.unique = 0 - self.unique_check = 0 self.monotonic_inc = 0 self.monotonic_dec = 0 @@ -211,8 +210,8 @@ cdef class IndexEngine: property is_unique: def __get__(self): - if not self.unique_check: - self._do_unique_check() + if not self.initialized: + self.initialize() return self.unique == 1 @@ -246,9 +245,6 @@ cdef class IndexEngine: cdef _get_index_values(self): return self.vgetter() - cdef inline _do_unique_check(self): - self._ensure_mapping_populated() - def _call_monotonic(self, values): raise NotImplementedError @@ -270,7 +266,6 @@ cdef class IndexEngine: if len(self.mapping) == len(values): self.unique = 1 - self.unique_check = 1 self.initialized = 1 From 0a70b5fef3ae2363fea040ea47dd52247811c8c8 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Thu, 14 Jul 2016 20:26:01 -0400 Subject: [PATCH 35/44] API: Change Period('NAT') to return NaT closes #12759 closes #13582 Author: sinhrks Closes #13609 from sinhrks/period_nat and squashes the following commits: 9305c36 [sinhrks] COMPAT: Period(NaT) now returns pd.NaT --- doc/source/whatsnew/v0.19.0.txt | 39 +++ pandas/src/period.pyx | 269 ++++++++++--------- pandas/tests/indexes/test_datetimelike.py | 9 +- pandas/tseries/period.py | 49 ++-- pandas/tseries/tests/test_base.py | 26 +- pandas/tseries/tests/test_period.py | 305 +++++++++++++--------- pandas/tseries/tests/test_tslib.py | 7 + pandas/tslib.pyx | 5 +- 8 files changed, 407 insertions(+), 302 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 688f3b7ff6ada..c9f501c682a18 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -446,6 +446,45 @@ Furthermore: - Passing duplicated ``percentiles`` will now raise a ``ValueError``. - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) +.. _whatsnew_0190.api.periodnat: + +``Period('NaT')`` now returns ``pd.NaT`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previously, ``Period`` has its own ``Period('NaT')`` representation different from ``pd.NaT``. Now ``Period('NaT')`` has been changed to return ``pd.NaT``. (:issue:`12759`, :issue:`13582`) + +Previous Behavior: + +.. code-block:: ipython + + In [5]: pd.Period('NaT', freq='D') + Out[5]: Period('NaT', 'D') + +New Behavior: + +.. ipython:: python + + pd.Period('NaT') + + +To be compat with ``Period`` addition and subtraction, ``pd.NaT`` now supports addition and subtraction with ``int``. Previously it raises ``ValueError``. + +Previous Behavior: + +.. code-block:: ipython + + In [5]: pd.NaT + 1 + ... + ValueError: Cannot add integral value to Timestamp without freq. + +New Behavior: + +.. ipython:: python + + pd.NaT + 1 + pd.NaT - 1 + + .. _whatsnew_0190.deprecations: Deprecations diff --git a/pandas/src/period.pyx b/pandas/src/period.pyx index af2e295ae0cfc..37f265ede07e7 100644 --- a/pandas/src/period.pyx +++ b/pandas/src/period.pyx @@ -472,7 +472,11 @@ def extract_ordinals(ndarray[object] values, freq): except AttributeError: p = Period(p, freq=freq) - ordinals[i] = p.ordinal + if p is tslib.NaT: + # input may contain NaT-like string + ordinals[i] = tslib.iNaT + else: + ordinals[i] = p.ordinal return ordinals @@ -665,24 +669,8 @@ class IncompatibleFrequency(ValueError): pass -cdef class Period(object): - """ - Represents an period of time +cdef class _Period(object): - Parameters - ---------- - value : Period or compat.string_types, default None - The time period represented (e.g., '4Q2005') - freq : str, default None - One of pandas period strings or corresponding objects - year : int, default None - month : int, default 1 - quarter : int, default None - day : int, default 1 - hour : int, default 0 - minute : int, default 0 - second : int, default 0 - """ cdef public: int64_t ordinal object freq @@ -711,97 +699,22 @@ cdef class Period(object): @classmethod def _from_ordinal(cls, ordinal, freq): """ fast creation from an ordinal and freq that are already validated! """ - self = Period.__new__(cls) - self.ordinal = ordinal - self.freq = cls._maybe_convert_freq(freq) - return self - - def __init__(self, value=None, freq=None, ordinal=None, - year=None, month=1, quarter=None, day=1, - hour=0, minute=0, second=0): - # freq points to a tuple (base, mult); base is one of the defined - # periods such as A, Q, etc. Every five minutes would be, e.g., - # ('T', 5) but may be passed in as a string like '5T' - - # ordinal is the period offset from the gregorian proleptic epoch - - if ordinal is not None and value is not None: - raise ValueError(("Only value or ordinal but not both should be " - "given but not both")) - elif ordinal is not None: - if not lib.is_integer(ordinal): - raise ValueError("Ordinal must be an integer") - if freq is None: - raise ValueError('Must supply freq for ordinal value') - - elif value is None: - if freq is None: - raise ValueError("If value is None, freq cannot be None") - ordinal = _ordinal_from_fields(year, month, quarter, day, - hour, minute, second, freq) - - elif isinstance(value, Period): - other = value - if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq): - ordinal = other.ordinal - freq = other.freq - else: - converted = other.asfreq(freq) - ordinal = converted.ordinal - - elif is_null_datetimelike(value) or value in tslib._nat_strings: - ordinal = tslib.iNaT - if freq is None: - raise ValueError("If value is NaT, freq cannot be None " - "because it cannot be inferred") - - elif isinstance(value, compat.string_types) or lib.is_integer(value): - if lib.is_integer(value): - value = str(value) - value = value.upper() - dt, _, reso = parse_time_string(value, freq) - - if freq is None: - try: - freq = frequencies.Resolution.get_freq(reso) - except KeyError: - raise ValueError("Invalid frequency or could not infer: %s" % reso) - - elif isinstance(value, datetime): - dt = value - if freq is None: - raise ValueError('Must supply freq for datetime value') - elif isinstance(value, np.datetime64): - dt = Timestamp(value) - if freq is None: - raise ValueError('Must supply freq for datetime value') - elif isinstance(value, date): - dt = datetime(year=value.year, month=value.month, day=value.day) - if freq is None: - raise ValueError('Must supply freq for datetime value') - else: - msg = "Value must be Period, string, integer, or datetime" - raise ValueError(msg) - - base, mult = frequencies.get_freq_code(freq) - - if ordinal is None: - self.ordinal = get_period_ordinal(dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, - dt.microsecond, 0, base) + if ordinal == tslib.iNaT: + return tslib.NaT else: + self = _Period.__new__(cls) self.ordinal = ordinal - - self.freq = self._maybe_convert_freq(freq) + self.freq = cls._maybe_convert_freq(freq) + return self def __richcmp__(self, other, op): if isinstance(other, Period): if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return _nat_scalar_rules[op] return PyObject_RichCompareBool(self.ordinal, other.ordinal, op) + elif other is tslib.NaT: + return _nat_scalar_rules[op] # index/series like elif hasattr(other, '_typ'): return NotImplemented @@ -824,10 +737,7 @@ cdef class Period(object): offset_nanos = tslib._delta_to_nanoseconds(offset) if nanos % offset_nanos == 0: - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + (nanos // offset_nanos) + ordinal = self.ordinal + (nanos // offset_nanos) return Period(ordinal=ordinal, freq=self.freq) msg = 'Input cannot be converted to Period(freq={0})' raise IncompatibleFrequency(msg.format(self.freqstr)) @@ -835,10 +745,7 @@ cdef class Period(object): freqstr = frequencies.get_standard_freq(other) base = frequencies.get_base_alias(freqstr) if base == self.freq.rule_code: - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + other.n + ordinal = self.ordinal + other.n return Period(ordinal=ordinal, freq=self.freq) msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -853,10 +760,7 @@ cdef class Period(object): elif other is tslib.NaT: return tslib.NaT elif lib.is_integer(other): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal + other * self.freq.n + ordinal = self.ordinal + other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) else: # pragma: no cover return NotImplemented @@ -872,17 +776,12 @@ cdef class Period(object): neg_other = -other return self + neg_other elif lib.is_integer(other): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - ordinal = self.ordinal - other * self.freq.n + ordinal = self.ordinal - other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) elif isinstance(other, Period): if other.freq != self.freq: msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT: - return Period(ordinal=tslib.iNaT, freq=self.freq) return self.ordinal - other.ordinal elif getattr(other, '_typ', None) == 'periodindex': return -other.__sub__(self) @@ -914,16 +813,13 @@ cdef class Period(object): base1, mult1 = frequencies.get_freq_code(self.freq) base2, mult2 = frequencies.get_freq_code(freq) - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal + # mult1 can't be negative or 0 + end = how == 'E' + if end: + ordinal = self.ordinal + mult1 - 1 else: - # mult1 can't be negative or 0 - end = how == 'E' - if end: - ordinal = self.ordinal + mult1 - 1 - else: - ordinal = self.ordinal - ordinal = period_asfreq(ordinal, base1, base2, end) + ordinal = self.ordinal + ordinal = period_asfreq(ordinal, base1, base2, end) return Period(ordinal=ordinal, freq=freq) @@ -933,12 +829,9 @@ cdef class Period(object): @property def end_time(self): - if self.ordinal == tslib.iNaT: - ordinal = self.ordinal - else: - # freq.n can't be negative or 0 - # ordinal = (self + self.freq.n).start_time.value - 1 - ordinal = (self + 1).start_time.value - 1 + # freq.n can't be negative or 0 + # ordinal = (self + self.freq.n).start_time.value - 1 + ordinal = (self + 1).start_time.value - 1 return Timestamp(ordinal) def to_timestamp(self, freq=None, how='start', tz=None): @@ -1199,8 +1092,114 @@ cdef class Period(object): return period_format(self.ordinal, base, fmt) -def _ordinal_from_fields(year, month, quarter, day, hour, minute, - second, freq): +class Period(_Period): + """ + Represents an period of time + + Parameters + ---------- + value : Period or compat.string_types, default None + The time period represented (e.g., '4Q2005') + freq : str, default None + One of pandas period strings or corresponding objects + year : int, default None + month : int, default 1 + quarter : int, default None + day : int, default 1 + hour : int, default 0 + minute : int, default 0 + second : int, default 0 + """ + + def __new__(cls, value=None, freq=None, ordinal=None, + year=None, month=None, quarter=None, day=None, + hour=None, minute=None, second=None): + # freq points to a tuple (base, mult); base is one of the defined + # periods such as A, Q, etc. Every five minutes would be, e.g., + # ('T', 5) but may be passed in as a string like '5T' + + # ordinal is the period offset from the gregorian proleptic epoch + + cdef _Period self + + if ordinal is not None and value is not None: + raise ValueError(("Only value or ordinal but not both should be " + "given but not both")) + elif ordinal is not None: + if not lib.is_integer(ordinal): + raise ValueError("Ordinal must be an integer") + if freq is None: + raise ValueError('Must supply freq for ordinal value') + + elif value is None: + if (year is None and month is None and quarter is None and + day is None and hour is None and minute is None and second is None): + ordinal = tslib.iNaT + else: + if freq is None: + raise ValueError("If value is None, freq cannot be None") + + # set defaults + month = 1 if month is None else month + day = 1 if day is None else day + hour = 0 if hour is None else hour + minute = 0 if minute is None else minute + second = 0 if second is None else second + + ordinal = _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq) + + elif isinstance(value, Period): + other = value + if freq is None or frequencies.get_freq_code(freq) == frequencies.get_freq_code(other.freq): + ordinal = other.ordinal + freq = other.freq + else: + converted = other.asfreq(freq) + ordinal = converted.ordinal + + elif is_null_datetimelike(value) or value in tslib._nat_strings: + ordinal = tslib.iNaT + + elif isinstance(value, compat.string_types) or lib.is_integer(value): + if lib.is_integer(value): + value = str(value) + value = value.upper() + dt, _, reso = parse_time_string(value, freq) + + if freq is None: + try: + freq = frequencies.Resolution.get_freq(reso) + except KeyError: + raise ValueError("Invalid frequency or could not infer: %s" % reso) + + elif isinstance(value, datetime): + dt = value + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif isinstance(value, np.datetime64): + dt = Timestamp(value) + if freq is None: + raise ValueError('Must supply freq for datetime value') + elif isinstance(value, date): + dt = datetime(year=value.year, month=value.month, day=value.day) + if freq is None: + raise ValueError('Must supply freq for datetime value') + else: + msg = "Value must be Period, string, integer, or datetime" + raise ValueError(msg) + + if ordinal is None: + base, mult = frequencies.get_freq_code(freq) + ordinal = get_period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.microsecond, 0, base) + + return cls._from_ordinal(ordinal, freq) + + +def _ordinal_from_fields(year, month, quarter, day, + hour, minute, second, freq): base, mult = frequencies.get_freq_code(freq) if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 9eba481a66685..5c21f71d64660 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -741,14 +741,7 @@ def test_astype(self): result = idx.astype(object) expected = Index([Period('2016-05-16', freq='D')] + [Period(NaT, freq='D')] * 3, dtype='object') - # Hack because of lack of support for Period null checking (GH12759) - tm.assert_index_equal(result[:1], expected[:1]) - result_arr = np.asarray([p.ordinal for p in result], dtype=np.int64) - expected_arr = np.asarray([p.ordinal for p in expected], - dtype=np.int64) - tm.assert_numpy_array_equal(result_arr, expected_arr) - # TODO: When GH12759 is resolved, change the above hack to: - # tm.assert_index_equal(result, expected) # now, it raises. + tm.assert_index_equal(result, expected) result = idx.astype(int) expected = Int64Index([16937] + [-9223372036854775808] * 3, diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 45f634050a5d8..dffb71cff526a 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -92,13 +92,14 @@ def wrapper(self, other): result[mask] = nat_result return result + elif other is tslib.NaT: + result = np.empty(len(self.values), dtype=bool) + result.fill(nat_result) else: other = Period(other, freq=self.freq) func = getattr(self.values, opname) result = func(other.ordinal) - if other.ordinal == tslib.iNaT: - result.fill(nat_result) mask = self.values == tslib.iNaT if mask.any(): result[mask] = nat_result @@ -235,7 +236,7 @@ def _from_arraylike(cls, data, freq, tz): data = _ensure_int64(data) if freq is None: raise ValueError('freq not specified') - data = np.array([Period(x, freq=freq).ordinal for x in data], + data = np.array([Period(x, freq=freq) for x in data], dtype=np.int64) except (TypeError, ValueError): data = _ensure_object(data) @@ -322,15 +323,18 @@ def _na_value(self): return self._box_func(tslib.iNaT) def __contains__(self, key): - if not isinstance(key, Period) or key.freq != self.freq: - if isinstance(key, compat.string_types): - try: - self.get_loc(key) - return True - except Exception: - return False + if isinstance(key, Period): + if key.freq != self.freq: + return False + else: + return key.ordinal in self._engine + else: + try: + self.get_loc(key) + return True + except Exception: + return False return False - return key.ordinal in self._engine def __array_wrap__(self, result, context=None): """ @@ -622,17 +626,13 @@ def _sub_period(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - if other.ordinal == tslib.iNaT: - new_data = np.empty(len(self)) - new_data.fill(np.nan) - else: - asi8 = self.asi8 - new_data = asi8 - other.ordinal + asi8 = self.asi8 + new_data = asi8 - other.ordinal - if self.hasnans: - mask = asi8 == tslib.iNaT - new_data = new_data.astype(np.float64) - new_data[mask] = np.nan + if self.hasnans: + mask = asi8 == tslib.iNaT + new_data = new_data.astype(np.float64) + new_data[mask] = np.nan # result must be Int64Index or Float64Index return Index(new_data, name=self.name) @@ -740,8 +740,10 @@ def get_loc(self, key, method=None, tolerance=None): # we cannot construct the Period # as we have an invalid type raise KeyError(key) + try: - return Index.get_loc(self, key.ordinal, method, tolerance) + ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal + return Index.get_loc(self, ordinal, method, tolerance) except KeyError: raise KeyError(key) @@ -1044,8 +1046,7 @@ def _get_ordinal_range(start, end, periods, freq, mult=1): if is_start_per and is_end_per and start.freq != end.freq: raise ValueError('Start and end must have same freq') - if ((is_start_per and start.ordinal == tslib.iNaT) or - (is_end_per and end.ordinal == tslib.iNaT)): + if (start is tslib.NaT or end is tslib.NaT): raise ValueError('Start and end must not be NaT') if freq is None: diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 68cea17ba3fc9..958a10c329a46 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1587,17 +1587,16 @@ def test_asobject_tolist(self): result = idx.asobject self.assertTrue(isinstance(result, Index)) self.assertEqual(result.dtype, object) + tm.assert_index_equal(result, expected) for i in [0, 1, 3]: - self.assertTrue(result[i], expected[i]) - self.assertTrue(result[2].ordinal, pd.tslib.iNaT) - self.assertTrue(result[2].freq, 'D') + self.assertEqual(result[i], expected[i]) + self.assertIs(result[2], pd.NaT) self.assertEqual(result.name, expected.name) result_list = idx.tolist() for i in [0, 1, 3]: - self.assertTrue(result_list[i], expected_list[i]) - self.assertTrue(result_list[2].ordinal, pd.tslib.iNaT) - self.assertTrue(result_list[2].freq, 'D') + self.assertEqual(result_list[i], expected_list[i]) + self.assertIs(result_list[2], pd.NaT) def test_minmax(self): @@ -1623,18 +1622,15 @@ def test_minmax(self): # Return NaT obj = PeriodIndex([], freq='M') result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') + self.assertIs(result, tslib.NaT) obj = PeriodIndex([pd.NaT], freq='M') result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') + self.assertIs(result, tslib.NaT) obj = PeriodIndex([pd.NaT, pd.NaT, pd.NaT], freq='M') result = getattr(obj, op)() - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') + self.assertIs(result, tslib.NaT) def test_numpy_minmax(self): pr = pd.period_range(start='2016-01-15', end='2016-01-20') @@ -1735,9 +1731,9 @@ def test_representation_to_series(self): 2 2013 dtype: object""" - exp6 = """0 2011-01-01 09:00 -1 2012-02-01 10:00 -2 NaT + exp6 = """0 2011-01-01 09:00 +1 2012-02-01 10:00 +2 NaT dtype: object""" exp7 = """0 2013Q1 diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 591fa19aad585..8d217ff0753a6 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -36,14 +36,17 @@ def test_quarterly_negative_ordinals(self): p = Period(ordinal=-1, freq='Q-DEC') self.assertEqual(p.year, 1969) self.assertEqual(p.quarter, 4) + self.assertIsInstance(p, Period) p = Period(ordinal=-2, freq='Q-DEC') self.assertEqual(p.year, 1969) self.assertEqual(p.quarter, 3) + self.assertIsInstance(p, Period) p = Period(ordinal=-2, freq='M') self.assertEqual(p.year, 1969) self.assertEqual(p.month, 11) + self.assertIsInstance(p, Period) def test_period_cons_quarterly(self): # bugs in scikits.timeseries @@ -67,6 +70,7 @@ def test_period_cons_annual(self): stamp = exp.to_timestamp('D', how='end') + timedelta(days=30) p = Period(stamp, freq=freq) self.assertEqual(p, exp + 1) + self.assertIsInstance(p, Period) def test_period_cons_weekly(self): for num in range(10, 17): @@ -77,34 +81,46 @@ def test_period_cons_weekly(self): result = Period(daystr, freq=freq) expected = Period(daystr, freq='D').asfreq(freq) self.assertEqual(result, expected) + self.assertIsInstance(result, Period) + + def test_period_from_ordinal(self): + p = pd.Period('2011-01', freq='M') + res = pd.Period._from_ordinal(p.ordinal, freq='M') + self.assertEqual(p, res) + self.assertIsInstance(res, Period) def test_period_cons_nat(self): p = Period('NaT', freq='M') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, 'M') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) p = Period('nat', freq='W-SUN') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, 'W-SUN') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) p = Period(tslib.iNaT, freq='D') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, 'D') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) p = Period(tslib.iNaT, freq='3D') - self.assertEqual(p.ordinal, tslib.iNaT) - self.assertEqual(p.freq, offsets.Day(3)) - self.assertEqual(p.freqstr, '3D') - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) + self.assertIs(p, pd.NaT) + + p = Period('NaT') + self.assertIs(p, pd.NaT) + + p = Period(tslib.iNaT) + self.assertIs(p, pd.NaT) + + def test_cons_null_like(self): + # check Timestamp compat + self.assertIs(Timestamp('NaT'), pd.NaT) + self.assertIs(Period('NaT'), pd.NaT) + + self.assertIs(Timestamp(None), pd.NaT) + self.assertIs(Period(None), pd.NaT) - self.assertRaises(ValueError, Period, 'NaT') + self.assertIs(Timestamp(float('nan')), pd.NaT) + self.assertIs(Period(float('nan')), pd.NaT) + + self.assertIs(Timestamp(np.nan), pd.NaT) + self.assertIs(Period(np.nan), pd.NaT) def test_period_cons_mult(self): p1 = Period('2011-01', freq='3M') @@ -197,13 +213,6 @@ def test_timestamp_tz_arg_dateutil_from_string(self): freq='M').to_timestamp(tz='dateutil/Europe/Brussels') self.assertEqual(p.tz, gettz('Europe/Brussels')) - def test_timestamp_nat_tz(self): - t = Period('NaT', freq='M').to_timestamp() - self.assertTrue(t is tslib.NaT) - - t = Period('NaT', freq='M').to_timestamp(tz='Asia/Tokyo') - self.assertTrue(t is tslib.NaT) - def test_timestamp_mult(self): p = pd.Period('2011-01', freq='M') self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) @@ -213,12 +222,6 @@ def test_timestamp_mult(self): self.assertEqual(p.to_timestamp(how='S'), pd.Timestamp('2011-01-01')) self.assertEqual(p.to_timestamp(how='E'), pd.Timestamp('2011-03-31')) - def test_timestamp_nat_mult(self): - for freq in ['M', '3M']: - p = pd.Period('NaT', freq=freq) - self.assertTrue(p.to_timestamp(how='S') is pd.NaT) - self.assertTrue(p.to_timestamp(how='E') is pd.NaT) - def test_period_constructor(self): i1 = Period('1/1/2005', freq='M') i2 = Period('Jan 2005') @@ -552,9 +555,6 @@ def _ex(p): result = p.to_timestamp('5S', how='start') self.assertEqual(result, expected) - p = Period('NaT', freq='W') - self.assertTrue(p.to_timestamp() is tslib.NaT) - def test_start_time(self): freq_lst = ['A', 'Q', 'M', 'D', 'H', 'T', 'S'] xp = datetime(2012, 1, 1) @@ -566,9 +566,6 @@ def test_start_time(self): self.assertEqual(Period('2012', freq='W').start_time, datetime(2011, 12, 26)) - p = Period('NaT', freq='W') - self.assertTrue(p.start_time is tslib.NaT) - def test_end_time(self): p = Period('2012', freq='A') @@ -607,9 +604,6 @@ def _ex(*args): xp = _ex(2012, 1, 16) self.assertEqual(xp, p.end_time) - p = Period('NaT', freq='W') - self.assertTrue(p.end_time is tslib.NaT) - def test_anchor_week_end_time(self): def _ex(*args): return Timestamp(Timestamp(datetime(*args)).value - 1) @@ -758,15 +752,14 @@ def test_properties_secondly(self): def test_properties_nat(self): p_nat = Period('NaT', freq='M') t_nat = pd.Timestamp('NaT') + self.assertIs(p_nat, t_nat) + # confirm Period('NaT') work identical with Timestamp('NaT') for f in ['year', 'month', 'day', 'hour', 'minute', 'second', 'week', 'dayofyear', 'quarter', 'days_in_month']: self.assertTrue(np.isnan(getattr(p_nat, f))) self.assertTrue(np.isnan(getattr(t_nat, f))) - for f in ['weekofyear', 'dayofweek', 'weekday', 'qyear']: - self.assertTrue(np.isnan(getattr(p_nat, f))) - def test_pnow(self): dt = datetime.now() @@ -789,7 +782,7 @@ def test_constructor_corner(self): self.assertRaises(ValueError, Period, 1.6, freq='D') self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') - self.assertRaises(ValueError, Period) + self.assertIs(Period(None), pd.NaT) self.assertRaises(ValueError, Period, month=1) p = Period('2007-01-01', freq='D') @@ -1526,12 +1519,6 @@ def test_conv_secondly(self): self.assertEqual(ival_S.asfreq('S'), ival_S) - def test_asfreq_nat(self): - p = Period('NaT', freq='A') - result = p.asfreq('M') - self.assertEqual(result.ordinal, tslib.iNaT) - self.assertEqual(result.freq, 'M') - def test_asfreq_mult(self): # normal freq to mult freq p = Period(freq='A', year=2007) @@ -1603,21 +1590,6 @@ def test_asfreq_mult(self): self.assertEqual(result.ordinal, expected.ordinal) self.assertEqual(result.freq, expected.freq) - def test_asfreq_mult_nat(self): - # normal freq to mult freq - for p in [Period('NaT', freq='A'), Period('NaT', freq='3A'), - Period('NaT', freq='2M'), Period('NaT', freq='3D')]: - for freq in ['3A', offsets.YearEnd(3)]: - result = p.asfreq(freq) - expected = Period('NaT', freq='3A') - self.assertEqual(result.ordinal, pd.tslib.iNaT) - self.assertEqual(result.freq, expected.freq) - - result = p.asfreq(freq, how='S') - expected = Period('NaT', freq='3A') - self.assertEqual(result.ordinal, pd.tslib.iNaT) - self.assertEqual(result.freq, expected.freq) - class TestPeriodIndex(tm.TestCase): def setUp(self): @@ -1995,6 +1967,19 @@ def test_getitem_datetime(self): rs = ts[dt1:dt4] tm.assert_series_equal(rs, ts) + def test_getitem_nat(self): + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertEqual(idx[0], pd.Period('2011-01', freq='M')) + self.assertIs(idx[1], tslib.NaT) + + s = pd.Series([0, 1, 2], index=idx) + self.assertEqual(s[pd.NaT], 1) + + s = pd.Series(idx, index=idx) + self.assertEqual(s[pd.Period('2011-01', freq='M')], + pd.Period('2011-01', freq='M')) + self.assertIs(s[pd.NaT], tslib.NaT) + def test_slice_with_negative_step(self): ts = Series(np.arange(20), period_range('2014-01', periods=20, freq='M')) @@ -2038,6 +2023,20 @@ def test_contains(self): self.assertFalse(Period('2007-01', freq='D') in rng) self.assertFalse(Period('2007-01', freq='2M') in rng) + def test_contains_nat(self): + # GH13582 + idx = period_range('2007-01', freq='M', periods=10) + self.assertFalse(pd.NaT in idx) + self.assertFalse(None in idx) + self.assertFalse(float('nan') in idx) + self.assertFalse(np.nan in idx) + + idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M') + self.assertTrue(pd.NaT in idx) + self.assertTrue(None in idx) + self.assertTrue(float('nan') in idx) + self.assertTrue(np.nan in idx) + def test_sub(self): rng = period_range('2007-01', periods=50) @@ -3292,6 +3291,17 @@ def test_get_loc_msg(self): except KeyError as inst: self.assertEqual(inst.args[0], bad_period) + def test_get_loc_nat(self): + didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03']) + pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M') + + # check DatetimeIndex compat + for idx in [didx, pidx]: + self.assertEqual(idx.get_loc(pd.NaT), 1) + self.assertEqual(idx.get_loc(None), 1) + self.assertEqual(idx.get_loc(float('nan')), 1) + self.assertEqual(idx.get_loc(np.nan), 1) + def test_append_concat(self): # #1815 d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC') @@ -3576,95 +3586,87 @@ def test_add_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) + # freq is Tick for freq in ['D', '2D', '3D']: p = Period('NaT', freq=freq) for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - self.assertEqual((p + o).ordinal, tslib.iNaT) + self.assertIs(p + o, tslib.NaT) if not isinstance(o, np.timedelta64): - self.assertEqual((o + p).ordinal, tslib.iNaT) + self.assertIs(o + p, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p + o + self.assertIs(p + o, tslib.NaT) if isinstance(o, np.timedelta64): with tm.assertRaises(TypeError): o + p else: - with tm.assertRaises(period.IncompatibleFrequency): - o + p + self.assertIs(o + p, tslib.NaT) def test_sub_pdnat(self): # GH 13071 @@ -3749,24 +3751,22 @@ def test_sub_offset_nat(self): for freq in ['A', '2A', '3A']: p = Period('NaT', freq=freq) for o in [offsets.YearEnd(2)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) for o in [offsets.MonthEnd(2), offsets.MonthEnd(12)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, 'D'), timedelta(365)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) # freq is Tick for freq in ['D', '2D', '3D']: @@ -3774,37 +3774,33 @@ def test_sub_offset_nat(self): for o in [offsets.Day(5), offsets.Hour(24), np.timedelta64(2, 'D'), np.timedelta64(3600 * 24, 's'), timedelta(-2), timedelta(hours=48)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, 'h'), timedelta(hours=23)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) for freq in ['H', '2H', '3H']: p = Period('NaT', freq=freq) for o in [offsets.Day(2), offsets.Hour(3), np.timedelta64(3, 'h'), np.timedelta64(3600, 's'), timedelta(minutes=120), timedelta(days=4, minutes=180)]: - self.assertEqual((p - o).ordinal, tslib.iNaT) + self.assertIs(p - o, tslib.NaT) for o in [offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, 's'), timedelta(hours=23, minutes=30)]: - with tm.assertRaises(period.IncompatibleFrequency): - p - o + self.assertIs(p - o, tslib.NaT) def test_nat_ops(self): for freq in ['M', '2M', '3M']: p = Period('NaT', freq=freq) - self.assertEqual((p + 1).ordinal, tslib.iNaT) - self.assertEqual((1 + p).ordinal, tslib.iNaT) - self.assertEqual((p - 1).ordinal, tslib.iNaT) - self.assertEqual((p - Period('2011-01', freq=freq)).ordinal, - tslib.iNaT) - self.assertEqual((Period('2011-01', freq=freq) - p).ordinal, - tslib.iNaT) + self.assertIs(p + 1, tslib.NaT) + self.assertIs(1 + p, tslib.NaT) + self.assertIs(p - 1, tslib.NaT) + self.assertIs(p - Period('2011-01', freq=freq), tslib.NaT) + self.assertIs(Period('2011-01', freq=freq) - p, tslib.NaT) def test_period_ops_offset(self): p = Period('2011-04-01', freq='D') @@ -3830,18 +3826,17 @@ class TestPeriodIndexSeriesMethods(tm.TestCase): def _check(self, values, func, expected): idx = pd.PeriodIndex(values) result = func(idx) - tm.assert_index_equal(result, pd.PeriodIndex(expected)) + if isinstance(expected, pd.Index): + tm.assert_index_equal(result, expected) + else: + # comp op results in bool + tm.assert_numpy_array_equal(result, expected) s = pd.Series(values) result = func(s) - exp = pd.Series(expected) - # Period(NaT) != Period(NaT) - - lmask = result.map(lambda x: x.ordinal != tslib.iNaT) - rmask = exp.map(lambda x: x.ordinal != tslib.iNaT) - tm.assert_series_equal(lmask, rmask) - tm.assert_series_equal(result[lmask], exp[rmask]) + exp = pd.Series(expected, name=values.name) + tm.assert_series_equal(result, exp) def test_pi_ops(self): idx = PeriodIndex(['2011-01', '2011-02', '2011-03', @@ -3962,7 +3957,7 @@ def test_pi_sub_period(self): exp = pd.Index([12, 11, 10, 9], name='idx') tm.assert_index_equal(result, exp) - exp = pd.Index([np.nan, np.nan, np.nan, np.nan], name='idx') + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) @@ -3987,10 +3982,82 @@ def test_pi_sub_period_nat(self): exp = pd.Index([12, np.nan, 10, 9], name='idx') tm.assert_index_equal(result, exp) - exp = pd.Index([np.nan, np.nan, np.nan, np.nan], name='idx') + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) + def test_pi_comp_period(self): + idx = PeriodIndex(['2011-01', '2011-02', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > pd.Period('2011-03', freq='M') + exp = np.array([False, False, False, True], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, True, True, False], dtype=np.bool) + self._check(idx, f, exp) + + def test_pi_comp_period_nat(self): + idx = PeriodIndex(['2011-01', 'NaT', '2011-03', + '2011-04'], freq='M', name='idx') + + f = lambda x: x == pd.Period('2011-03', freq='M') + exp = np.array([False, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') == x + self._check(idx, f, exp) + + f = lambda x: x == tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT == x + self._check(idx, f, exp) + + f = lambda x: x != pd.Period('2011-03', freq='M') + exp = np.array([True, True, False, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: pd.Period('2011-03', freq='M') != x + self._check(idx, f, exp) + + f = lambda x: x != tslib.NaT + exp = np.array([True, True, True, True], dtype=np.bool) + self._check(idx, f, exp) + f = lambda x: tslib.NaT != x + self._check(idx, f, exp) + + f = lambda x: pd.Period('2011-03', freq='M') >= x + exp = np.array([True, False, True, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x < pd.Period('2011-03', freq='M') + exp = np.array([True, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: x > tslib.NaT + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + + f = lambda x: tslib.NaT >= x + exp = np.array([False, False, False, False], dtype=np.bool) + self._check(idx, f, exp) + class TestPeriodRepresentation(tm.TestCase): """ diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py index 31d6393c1c26e..6696c03a070f7 100644 --- a/pandas/tseries/tests/test_tslib.py +++ b/pandas/tseries/tests/test_tslib.py @@ -1224,6 +1224,13 @@ def test_nat_arithmetic(self): self.assertIs(left - right, pd.NaT) self.assertIs(right - left, pd.NaT) + # int addition / subtraction + for (left, right) in [(pd.NaT, 2), (pd.NaT, 0), (pd.NaT, -3)]: + self.assertIs(right + left, pd.NaT) + self.assertIs(left + right, pd.NaT) + self.assertIs(left - right, pd.NaT) + self.assertIs(right - left, pd.NaT) + def test_nat_arithmetic_index(self): # GH 11718 diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 2af08f2713262..c681cebd84836 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -1097,7 +1097,10 @@ cdef class _Timestamp(datetime): return Timestamp(self.value + other_int, tz=self.tzinfo, freq=self.freq) elif is_integer_object(other): - if self.freq is None: + if self is NaT: + # to be compat with Period + return NaT + elif self.freq is None: raise ValueError("Cannot add integral value to Timestamp " "without freq.") return Timestamp((self.freq * other).apply(self), freq=self.freq) From 1bee56ed9aa96ffe99aa62d5e8c0212d6dc947ee Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 15 Jul 2016 06:20:39 -0400 Subject: [PATCH 36/44] BUG: construction of Series with integers on windows not default to int64 closes #13646 Author: Jeff Reback Closes #13661 from jreback/foo and squashes the following commits: e26f9bf [Jeff Reback] BUG: construction of Series with integers on windows not defaulting to int64 --- doc/source/whatsnew/v0.19.0.txt | 2 +- pandas/core/series.py | 2 +- pandas/tests/frame/test_operators.py | 2 +- pandas/tests/series/test_constructors.py | 11 +++++++++++ pandas/types/cast.py | 2 +- 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index c9f501c682a18..747fc70f858b4 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -534,7 +534,7 @@ Bug Fixes - Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) - Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) - Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) - +- Bug in ``Series`` construction from a tuple of integers on windows not returning default dtype (int64) (:issue:`13646`) - Bug in ``.groupby(..).resample(..)`` when the same object is called multiple times (:issue:`13174`) - Bug in ``.to_records()`` when index name is a unicode string (:issue:`13172`) diff --git a/pandas/core/series.py b/pandas/core/series.py index b933f68cfad62..3c1f834c3d479 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2820,7 +2820,7 @@ def _try_cast(arr, take_fast_path): subarr = data.copy() return subarr - elif isinstance(data, list) and len(data) > 0: + elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, False) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index e2e0f568e4098..c91585a28d867 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1196,7 +1196,7 @@ def test_alignment_non_pandas(self): align = pd.core.ops._align_method_FRAME - for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3])]: + for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype=np.intp)]: tm.assert_series_equal(align(df, val, 'index'), Series([1, 2, 3], index=df.index)) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index b7ec4d570f18b..c8e04f1ffd75f 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -109,6 +109,17 @@ def test_constructor_iterator(self): result = Series(range(10), dtype='int64') assert_series_equal(result, expected) + def test_constructor_list_like(self): + + # make sure that we are coercing different + # list-likes to standard dtypes and not + # platform specific + expected = Series([1, 2, 3], dtype='int64') + for obj in [[1, 2, 3], (1, 2, 3), + np.array([1, 2, 3], dtype='int64')]: + result = Series(obj, index=[0, 1, 2]) + assert_series_equal(result, expected) + def test_constructor_generator(self): gen = (i for i in range(10)) diff --git a/pandas/types/cast.py b/pandas/types/cast.py index e55cb91d36430..ca23d8d26a426 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -33,7 +33,7 @@ def _possibly_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple)): - values = lib.list_to_object_array(values) + values = lib.list_to_object_array(list(values)) if getattr(values, 'dtype', None) == np.object_: if hasattr(values, '_values'): values = values._values From d7c028d4965932160fa3b69f56c716b1454c42a5 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Fri, 15 Jul 2016 06:25:54 -0400 Subject: [PATCH 37/44] CLN: Removed levels attribute from Categorical Deprecated back in `0.15.0` and therefore long overdue. Closes #8376. Author: gfyoung Closes #13612 from gfyoung/categorical-levels-remove and squashes the following commits: f1254df [gfyoung] MAINT: Relocated backwards compat categorical pickle tests f3321cb [gfyoung] CLN: Removed levels attribute from Categorical --- doc/source/whatsnew/v0.19.0.txt | 1 + pandas/core/categorical.py | 30 +---------- .../tests/data/categorical_0_14_1.pickle | 0 .../tests/data/categorical_0_15_2.pickle | Bin pandas/io/tests/test_pickle.py | 38 +++++++++++++ pandas/tests/test_categorical.py | 50 ------------------ setup.py | 4 +- 7 files changed, 43 insertions(+), 80 deletions(-) rename pandas/{ => io}/tests/data/categorical_0_14_1.pickle (100%) rename pandas/{ => io}/tests/data/categorical_0_15_2.pickle (100%) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 747fc70f858b4..0b9695125c0a9 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -506,6 +506,7 @@ Removal of prior version deprecations/changes - ``DataFrame.to_csv()`` has dropped the ``engine`` parameter, as was deprecated in 0.17.1 (:issue:`11274`, :issue:`13419`) - ``DataFrame.to_dict()`` has dropped the ``outtype`` parameter in favor of ``orient`` (:issue:`13627`, :issue:`8486`) +- ``pd.Categorical`` has dropped the ``levels`` attribute in favour of ``categories`` (:issue:`8376`) .. _whatsnew_0190.performance: diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 1d1a9f990e61a..a26cc5125db78 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -228,8 +228,8 @@ class Categorical(PandasObject): __array_priority__ = 1000 _typ = 'categorical' - def __init__(self, values, categories=None, ordered=False, name=None, - fastpath=False, levels=None): + def __init__(self, values, categories=None, ordered=False, + name=None, fastpath=False): if fastpath: # fast path @@ -245,17 +245,6 @@ def __init__(self, values, categories=None, ordered=False, name=None, "name=\"something\")'") warn(msg, UserWarning, stacklevel=2) - # TODO: Remove after deprecation period in 2017/ after 0.18 - if levels is not None: - warn("Creating a 'Categorical' with 'levels' is deprecated, use " - "'categories' instead", FutureWarning, stacklevel=2) - if categories is None: - categories = levels - else: - raise ValueError("Cannot pass in both 'categories' and " - "(deprecated) 'levels', use only " - "'categories'", stacklevel=2) - # sanitize input if is_categorical_dtype(values): @@ -580,21 +569,6 @@ def _get_categories(self): categories = property(fget=_get_categories, fset=_set_categories, doc=_categories_doc) - def _set_levels(self, levels): - """ set new levels (deprecated, use "categories") """ - warn("Assigning to 'levels' is deprecated, use 'categories'", - FutureWarning, stacklevel=2) - self.categories = levels - - def _get_levels(self): - """ Gets the levels (deprecated, use "categories") """ - warn("Accessing 'levels' is deprecated, use 'categories'", - FutureWarning, stacklevel=2) - return self.categories - - # TODO: Remove after deprecation period in 2017/ after 0.18 - levels = property(fget=_get_levels, fset=_set_levels) - _ordered = None def _set_ordered(self, value): diff --git a/pandas/tests/data/categorical_0_14_1.pickle b/pandas/io/tests/data/categorical_0_14_1.pickle similarity index 100% rename from pandas/tests/data/categorical_0_14_1.pickle rename to pandas/io/tests/data/categorical_0_14_1.pickle diff --git a/pandas/tests/data/categorical_0_15_2.pickle b/pandas/io/tests/data/categorical_0_15_2.pickle similarity index 100% rename from pandas/tests/data/categorical_0_15_2.pickle rename to pandas/io/tests/data/categorical_0_15_2.pickle diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 55c14fee9e3ed..6019144d59698 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -231,6 +231,44 @@ def python_unpickler(path): result = python_unpickler(path) self.compare_element(result, expected, typ) + def test_pickle_v0_14_1(self): + + # we have the name warning + # 10482 + with tm.assert_produces_warning(UserWarning): + cat = pd.Categorical(values=['a', 'b', 'c'], + categories=['a', 'b', 'c', 'd'], + name='foobar', ordered=False) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_14_1.pickle') + # This code was executed once on v0.14.1 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + + def test_pickle_v0_15_2(self): + # ordered -> _ordered + # GH 9347 + + # we have the name warning + # 10482 + with tm.assert_produces_warning(UserWarning): + cat = pd.Categorical(values=['a', 'b', 'c'], + categories=['a', 'b', 'c', 'd'], + name='foobar', ordered=False) + pickle_path = os.path.join(tm.get_data_path(), + 'categorical_0_15_2.pickle') + # This code was executed once on v0.15.2 to generate the pickle: + # + # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], + # name='foobar') + # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) + # + tm.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index dd39861ac3114..1edd9443fe356 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1559,18 +1559,6 @@ def test_deprecated_labels(self): res = cat.labels self.assert_numpy_array_equal(res, exp) - def test_deprecated_levels(self): - # TODO: levels is deprecated and should be removed in 0.18 or 2017, - # whatever is earlier - cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) - exp = cat.categories - with tm.assert_produces_warning(FutureWarning): - res = cat.levels - self.assert_index_equal(res, exp) - with tm.assert_produces_warning(FutureWarning): - res = pd.Categorical([1, 2, 3, np.nan], levels=[1, 2, 3]) - self.assert_index_equal(res.categories, exp) - def test_removed_names_produces_warning(self): # 10482 @@ -4431,44 +4419,6 @@ def test_dt_accessor_api_for_categorical(self): invalid.dt self.assertFalse(hasattr(invalid, 'str')) - def test_pickle_v0_14_1(self): - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_14_1.pickle') - # This code was executed once on v0.14.1 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - self.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - - def test_pickle_v0_15_2(self): - # ordered -> _ordered - # GH 9347 - - # we have the name warning - # 10482 - with tm.assert_produces_warning(UserWarning): - cat = pd.Categorical(values=['a', 'b', 'c'], - categories=['a', 'b', 'c', 'd'], - name='foobar', ordered=False) - pickle_path = os.path.join(tm.get_data_path(), - 'categorical_0_15_2.pickle') - # This code was executed once on v0.15.2 to generate the pickle: - # - # cat = Categorical(labels=np.arange(3), levels=['a', 'b', 'c', 'd'], - # name='foobar') - # with open(pickle_path, 'wb') as f: pickle.dump(cat, f) - # - self.assert_categorical_equal(cat, pd.read_pickle(pickle_path)) - def test_concat_categorical(self): # See GH 10177 df1 = pd.DataFrame( diff --git a/setup.py b/setup.py index 650357588570a..c77ca4d9e60fe 100755 --- a/setup.py +++ b/setup.py @@ -589,6 +589,7 @@ def pxd(name): 'tests/data/legacy_msgpack/*/*.msgpack', 'tests/data/*.csv*', 'tests/data/*.dta', + 'tests/data/*.pickle', 'tests/data/*.txt', 'tests/data/*.xls', 'tests/data/*.xlsx', @@ -605,8 +606,7 @@ def pxd(name): 'tests/data/html_encoding/*.html', 'tests/json/data/*.json'], 'pandas.tools': ['tests/data/*.csv'], - 'pandas.tests': ['data/*.pickle', - 'data/*.csv'], + 'pandas.tests': ['data/*.csv'], 'pandas.tests.formats': ['data/*.csv'], 'pandas.tests.indexes': ['data/*.pickle'], 'pandas.tseries.tests': ['data/*.pickle', From 401b0ed720a4b756fec01de9311c1e01dc5af35d Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Wed, 6 Jul 2016 22:14:41 +0100 Subject: [PATCH 38/44] Fix bug in contains when looking up a string in a non-monotonic datetime index and the object in question is first in the index. --- pandas/tests/indexes/test_datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 804a0f4ed5cc1..77f64a89f63be 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time, date, datetime +from datetime import timedelta, time import numpy as np From 1a86b3ac958f553be2147cb19857927c67766868 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Sat, 9 Jul 2016 22:49:58 +0100 Subject: [PATCH 39/44] Added more exhaustive tests for __contains__. --- pandas/tests/indexes/test_datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 77f64a89f63be..804a0f4ed5cc1 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time +from datetime import timedelta, time, date, datetime import numpy as np From 3bf7cceb7f98316ab80eb3a4f00692a4e4f75ccb Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Wed, 6 Jul 2016 22:14:41 +0100 Subject: [PATCH 40/44] Fix bug in contains when looking up a string in a non-monotonic datetime index and the object in question is first in the index. --- pandas/tests/indexes/test_datetimelike.py | 11 ++++++ pandas/tseries/base.py | 48 ++++++++++------------- 2 files changed, 32 insertions(+), 27 deletions(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 5c21f71d64660..1714a5694dac8 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -722,6 +722,17 @@ def test_fillna_datetime64(self): dtype=object) self.assert_index_equal(idx.fillna('x'), exp) + def test_contains(self): + #GH13572 + dates = ['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02'] + monotonic = pd.to_datetime(sorted(dates)) + non_monotonic = pd.to_datetime(['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02']) + for idx in [non_monotonic, monotonic]: + self.assertNotIn('2015-01-06', idx) + self.assertNotIn(pd.Timestamp('2015-01-06'), idx) + for dt in reversed(dates): + self.assertIn(dt, idx) + self.assertIn(pd.Timestamp(dt), idx) class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index fe0440170383b..42631d442a990 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -9,16 +9,10 @@ from pandas.compat.numpy import function as nv import numpy as np -from pandas.types.common import (is_integer, is_float, - is_bool_dtype, _ensure_int64, - is_scalar, - is_list_like) -from pandas.types.generic import (ABCIndex, ABCSeries, - ABCPeriodIndex, ABCIndexClass) -from pandas.types.missing import isnull -from pandas.core import common as com, algorithms -from pandas.core.common import AbstractMethodError +from pandas.core import common as com, algorithms +from pandas.core.common import (is_integer, is_float, is_bool_dtype, + AbstractMethodError) import pandas.formats.printing as printing import pandas.tslib as tslib import pandas._period as prlib @@ -117,9 +111,9 @@ def _join_i8_wrapper(joinf, dtype, with_indexers=True): @staticmethod def wrapper(left, right): - if isinstance(left, (np.ndarray, ABCIndex, ABCSeries)): + if isinstance(left, (np.ndarray, com.ABCIndex, com.ABCSeries)): left = left.view('i8') - if isinstance(right, (np.ndarray, ABCIndex, ABCSeries)): + if isinstance(right, (np.ndarray, com.ABCIndex, com.ABCSeries)): right = right.view('i8') results = joinf(left, right) if with_indexers: @@ -139,16 +133,16 @@ def _evaluate_compare(self, other, op): # coerce to a similar object if not isinstance(other, type(self)): - if not is_list_like(other): + if not com.is_list_like(other): # scalar other = [other] - elif is_scalar(lib.item_from_zerodim(other)): + elif lib.isscalar(lib.item_from_zerodim(other)): # ndarray scalar other = [other.item()] other = type(self)(other) # compare - result = op(self.asi8, other.asi8) + result = getattr(self.asi8, op)(other.asi8) # technically we could support bool dtyped Index # for now just return the indexing array directly @@ -180,7 +174,7 @@ def _ensure_localized(self, result): # reconvert to local tz if getattr(self, 'tz', None) is not None: - if not isinstance(result, ABCIndexClass): + if not isinstance(result, com.ABCIndexClass): result = self._simple_new(result) result = result.tz_localize(self.tz) return result @@ -208,7 +202,7 @@ def _format_with_header(self, header, **kwargs): def __contains__(self, key): try: res = self.get_loc(key) - return is_scalar(res) or type(res) == slice or np.any(res) + return lib.isscalar(res) or type(res) == slice or np.any(res) except (KeyError, TypeError, ValueError): return False @@ -219,7 +213,7 @@ def __getitem__(self, key): """ is_int = is_integer(key) - if is_scalar(key) and not is_int: + if lib.isscalar(key) and not is_int: raise ValueError getitem = self._data.__getitem__ @@ -288,7 +282,7 @@ def _nat_new(self, box=True): return result attribs = self._get_attributes_dict() - if not isinstance(self, ABCPeriodIndex): + if not isinstance(self, com.ABCPeriodIndex): attribs['freq'] = None return self._simple_new(result, **attribs) @@ -318,7 +312,7 @@ def sort_values(self, return_indexer=False, ascending=True): attribs = self._get_attributes_dict() freq = attribs['freq'] - if freq is not None and not isinstance(self, ABCPeriodIndex): + if freq is not None and not isinstance(self, com.ABCPeriodIndex): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: @@ -334,7 +328,7 @@ def sort_values(self, return_indexer=False, ascending=True): def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): nv.validate_take(tuple(), kwargs) - indices = _ensure_int64(indices) + indices = com._ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) if isinstance(maybe_slice, slice): @@ -346,7 +340,7 @@ def take(self, indices, axis=0, allow_fill=True, na_value=tslib.iNaT) # keep freq in PeriodIndex, reset otherwise - freq = self.freq if isinstance(self, ABCPeriodIndex) else None + freq = self.freq if isinstance(self, com.ABCPeriodIndex) else None return self._shallow_copy(taken, freq=freq) def get_duplicates(self): @@ -551,7 +545,7 @@ def _convert_scalar_indexer(self, key, kind=None): # we don't allow integer/float indexing for loc # we don't allow float indexing for ix/getitem - if is_scalar(key): + if lib.isscalar(key): is_int = is_integer(key) is_flt = is_float(key) if kind in ['loc'] and (is_int or is_flt): @@ -597,7 +591,7 @@ def __add__(self, other): elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(other) - elif is_integer(other): + elif com.is_integer(other): return self.shift(other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._add_datelike(other) @@ -625,7 +619,7 @@ def __sub__(self, other): elif isinstance(other, (DateOffset, timedelta, np.timedelta64, tslib.Timedelta)): return self._add_delta(-other) - elif is_integer(other): + elif com.is_integer(other): return self.shift(-other) elif isinstance(other, (tslib.Timestamp, datetime)): return self._sub_datelike(other) @@ -753,7 +747,7 @@ def repeat(self, repeats, *args, **kwargs): def where(self, cond, other=None): """ - .. versionadded:: 0.19.0 + .. versionadded:: 0.18.2 Return an Index of same shape as self and whose corresponding entries are from self where cond is True and otherwise are from @@ -797,9 +791,9 @@ def summary(self, name=None): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ - if lib.isscalar(other) and isnull(other): + if lib.isscalar(other) and com.isnull(other): other = tslib.iNaT - elif isinstance(other, ABCIndexClass): + elif isinstance(other, com.ABCIndexClass): # convert tz if needed if getattr(other, 'tz', None) is not None: From 0f5a4e0e6dbfed36255d13d66836e10d44b975a1 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Sat, 9 Jul 2016 22:49:58 +0100 Subject: [PATCH 41/44] Added more exhaustive tests for __contains__. --- pandas/tests/indexes/test_datetimelike.py | 105 +++++++++++++++++++--- 1 file changed, 94 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 1714a5694dac8..52cb2964c4ea2 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time +from datetime import timedelta, time, date, datetime import numpy as np @@ -19,6 +19,9 @@ class DatetimeLike(Base): + def create_nonmonotonic_index(self): + return self.create_index()[[2, 0, 3, 4, 1]] + def test_shift_identity(self): idx = self.create_index() @@ -64,6 +67,9 @@ def setUp(self): def create_index(self): return date_range('20130101', periods=5) + def create_elem_outside_index(self): + return pd.Timestamp('20130106') + def test_shift(self): # test shift for datetimeIndex and non datetimeIndex @@ -723,16 +729,36 @@ def test_fillna_datetime64(self): self.assert_index_equal(idx.fillna('x'), exp) def test_contains(self): - #GH13572 - dates = ['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02'] - monotonic = pd.to_datetime(sorted(dates)) - non_monotonic = pd.to_datetime(['2015-01-03', '2015-01-01', '2015-01-04', '2015-01-05', '2015-01-02']) - for idx in [non_monotonic, monotonic]: - self.assertNotIn('2015-01-06', idx) - self.assertNotIn(pd.Timestamp('2015-01-06'), idx) - for dt in reversed(dates): - self.assertIn(dt, idx) - self.assertIn(pd.Timestamp(dt), idx) + # GH13572 + monotonic = self.create_index() + ascending_nat_first = monotonic.insert( + 0, pd.NaT) # Not monotonic after inserting NaT + ascending_nat_last = monotonic.insert(5, pd.NaT) + non_monotonic = self.create_nonmonotonic_index() + non_monotonic_nat_first = non_monotonic.insert(0, pd.NaT) + non_monotonic_nat_last = non_monotonic.insert(5, pd.NaT) + idx_with_nat = [ascending_nat_first, ascending_nat_last, + non_monotonic_nat_first, non_monotonic_nat_last] + idx_no_nat = [monotonic, non_monotonic] + for idx in idx_no_nat + idx_with_nat: + elem = self.create_elem_outside_index() + elem_str = str(elem) + elem_date_str = str(elem.date()) + for e in [elem, elem_str, elem_date_str, elem.date(), elem.to_datetime()]: + self.assertNotIn(e, idx) + for elem in monotonic: + elem_str = str(elem) + elem_date_str = str(elem.date()) + for e in [elem, elem_str, elem_date_str, elem.date(), elem.to_datetime()]: + self.assertIn(e, idx) + nat_elems = [pd.NaT, None, float('nan'), np.nan] + for idx in idx_no_nat: + for nn in nat_elems: + self.assertNotIn(nn, idx) + for idx in idx_with_nat: + for nn in nat_elems: + self.assertIn(nn, idx) + class TestPeriodIndex(DatetimeLike, tm.TestCase): _holder = PeriodIndex @@ -745,6 +771,9 @@ def setUp(self): def create_index(self): return period_range('20130101', periods=5, freq='D') + def create_elem_outside_index(self): + return pd.Period('20130106') + def test_astype(self): # GH 13149, GH 13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') @@ -922,6 +951,35 @@ def test_no_millisecond_field(self): with self.assertRaises(AttributeError): DatetimeIndex([]).millisecond + def test_contains(self): + # GH13572 + monotonic = self.create_index() + ascending_nat_first = monotonic.insert( + 0, pd.NaT) # Not monotonic after inserting NaT + ascending_nat_last = monotonic.insert(5, pd.NaT) + non_monotonic = self.create_nonmonotonic_index() + non_monotonic_nat_first = non_monotonic.insert(0, pd.NaT) + non_monotonic_nat_last = non_monotonic.insert(5, pd.NaT) + idx_with_nat = [ascending_nat_first, ascending_nat_last, + non_monotonic_nat_first, non_monotonic_nat_last] + idx_no_nat = [monotonic, non_monotonic] + for idx in idx_no_nat + idx_with_nat: + elem = self.create_elem_outside_index() + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertNotIn(e, idx) + for elem in monotonic: + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertIn(e, idx) + nat_elems = [pd.Period('NaT', freq='D')] + for idx in idx_no_nat: + for nn in nat_elems: + self.assertNotIn(nn, idx) + for idx in idx_with_nat: + for nn in nat_elems: + self.assertIn(nn, idx) + class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex @@ -934,6 +992,9 @@ def setUp(self): def create_index(self): return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1) + def create_elem_outside_index(self): + return pd.Timedelta(days=5, hours=1) + def test_shift(self): # test shift for TimedeltaIndex # err8083 @@ -1122,3 +1183,25 @@ def test_fillna_timedelta(self): exp = pd.Index( [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) self.assert_index_equal(idx.fillna('x'), exp) + + def test_contains(self): + # GH13572 + monotonic = self.create_index() + ascending_nat_first = monotonic.insert( + 0, pd.NaT) # Not monotonic after inserting NaT + ascending_nat_last = monotonic.insert(5, pd.NaT) + non_monotonic = self.create_nonmonotonic_index() + non_monotonic_nat_first = non_monotonic.insert(0, pd.NaT) + non_monotonic_nat_last = non_monotonic.insert(5, pd.NaT) + idx_with_nat = [ascending_nat_first, ascending_nat_last, + non_monotonic_nat_first, non_monotonic_nat_last] + idx_no_nat = [monotonic, non_monotonic] + for idx in idx_no_nat + idx_with_nat: + elem = self.create_elem_outside_index() + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertNotIn(e, idx) + for elem in monotonic: + elem_str = str(elem) + for e in [elem, elem_str]: + self.assertIn(e, idx) From 783ea6d0c21cb255f8edac1657e1c50c12a74f59 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Wed, 6 Jul 2016 22:14:41 +0100 Subject: [PATCH 42/44] Fix bug in contains when looking up a string in a non-monotonic datetime index and the object in question is first in the index. --- pandas/tests/indexes/test_datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 52cb2964c4ea2..79c2ea284de38 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time, date, datetime +from datetime import timedelta, time import numpy as np From 592a09ddbb1c684e67e9c9ae0ee6be88e5c1d612 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Sat, 9 Jul 2016 22:49:58 +0100 Subject: [PATCH 43/44] Added more exhaustive tests for __contains__. --- pandas/tests/indexes/test_datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 79c2ea284de38..52cb2964c4ea2 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time +from datetime import timedelta, time, date, datetime import numpy as np From 690e03492d5ef423206ba7c100cb8a45e87e12d3 Mon Sep 17 00:00:00 2001 From: ilmarinen Date: Wed, 6 Jul 2016 22:14:41 +0100 Subject: [PATCH 44/44] Fix bug in contains when looking up a string in a non-monotonic datetime index and the object in question is first in the index. --- pandas/tests/indexes/test_datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 52cb2964c4ea2..79c2ea284de38 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from datetime import timedelta, time, date, datetime +from datetime import timedelta, time import numpy as np