diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 352acee23df2d..cf604822d6eea 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -278,7 +278,7 @@ Please try to maintain backward compatibility. *pandas* has lots of users with l Adding tests is one of the most common requests after code is pushed to *pandas*. Therefore, it is worth getting in the habit of writing tests ahead of time so this is never an issue. -Like many packages, *pandas* uses the [Nose testing system](http://nose.readthedocs.org/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). +Like many packages, *pandas* uses the [Nose testing system](https://nose.readthedocs.io/en/latest/index.html) and the convenient extensions in [numpy.testing](http://docs.scipy.org/doc/numpy/reference/routines.testing.html). #### Writing tests @@ -323,7 +323,7 @@ Performance matters and it is worth considering whether your code has introduced > > The asv benchmark suite was translated from the previous framework, vbench, so many stylistic issues are likely a result of automated transformation of the code. -To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](http://asv.readthedocs.org/en/latest/installing.html). +To use asv you will need either `conda` or `virtualenv`. For more details please check the [asv installation webpage](https://asv.readthedocs.io/en/latest/installing.html). To install asv: @@ -360,7 +360,7 @@ This command is equivalent to: This will launch every test only once, display stderr from the benchmarks, and use your local `python` that comes from your `$PATH`. -Information on how to write a benchmark can be found in the [asv documentation](http://asv.readthedocs.org/en/latest/writing_benchmarks.html). +Information on how to write a benchmark can be found in the [asv documentation](https://asv.readthedocs.io/en/latest/writing_benchmarks.html). #### Running the vbench performance test suite (phasing out) diff --git a/ci/prep_cython_cache.sh b/ci/prep_cython_cache.sh index 6f16dce2fb431..cadc356b641f9 100755 --- a/ci/prep_cython_cache.sh +++ b/ci/prep_cython_cache.sh @@ -3,8 +3,8 @@ ls "$HOME/.cache/" PYX_CACHE_DIR="$HOME/.cache/pyxfiles" -pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx"` -pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx"` +pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd"` +pyx_cache_file_list=`find ${PYX_CACHE_DIR} -name "*.pyx" -o -name "*.pxd"` CACHE_File="$HOME/.cache/cython_files.tar" diff --git a/ci/submit_cython_cache.sh b/ci/submit_cython_cache.sh index 4f60df0ccb2d8..5c98c3df61736 100755 --- a/ci/submit_cython_cache.sh +++ b/ci/submit_cython_cache.sh @@ -2,7 +2,7 @@ CACHE_File="$HOME/.cache/cython_files.tar" PYX_CACHE_DIR="$HOME/.cache/pyxfiles" -pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx"` +pyx_file_list=`find ${TRAVIS_BUILD_DIR} -name "*.pyx" -o -name "*.pxd"` rm -rf $CACHE_File rm -rf $PYX_CACHE_DIR diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 19318aad3d53d..e5aa6b577270a 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1794,18 +1794,18 @@ The following functions are available for one dimensional object arrays or scala - :meth:`~pandas.to_datetime` (conversion to datetime objects) - .. ipython:: python + .. ipython:: python - import datetime - m = ['2016-07-09', datetime.datetime(2016, 3, 2)] - pd.to_datetime(m) + import datetime + m = ['2016-07-09', datetime.datetime(2016, 3, 2)] + pd.to_datetime(m) - :meth:`~pandas.to_timedelta` (conversion to timedelta objects) - .. ipython:: python + .. ipython:: python - m = ['5us', pd.Timedelta('1day')] - pd.to_timedelta(m) + m = ['5us', pd.Timedelta('1day')] + pd.to_timedelta(m) To force a conversion, we can pass in an ``errors`` argument, which specifies how pandas should deal with elements that cannot be converted to desired dtype or object. By default, ``errors='raise'``, meaning that any errors encountered diff --git a/doc/source/conf.py b/doc/source/conf.py index fd3a2493a53e8..4f916c6ba5290 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -295,7 +295,7 @@ 'python': ('http://docs.python.org/3', None), 'numpy': ('http://docs.scipy.org/doc/numpy', None), 'scipy': ('http://docs.scipy.org/doc/scipy/reference', None), - 'py': ('http://pylib.readthedocs.org/en/latest/', None) + 'py': ('https://pylib.readthedocs.io/en/latest/', None) } import glob autosummary_generate = glob.glob("*.rst") diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 7f336abcaa6d7..446a40a7ec4b4 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -360,7 +360,7 @@ follow the Numpy Docstring Standard (see above), but you don't need to install this because a local copy of numpydoc is included in the *pandas* source code. `nbconvert `_ and -`nbformat `_ are required to build +`nbformat `_ are required to build the Jupyter notebooks included in the documentation. If you have a conda environment named ``pandas_dev``, you can install the extra @@ -490,7 +490,7 @@ Adding tests is one of the most common requests after code is pushed to *pandas* it is worth getting in the habit of writing tests ahead of time so this is never an issue. Like many packages, *pandas* uses the `Nose testing system -`_ and the convenient +`_ and the convenient extensions in `numpy.testing `_. @@ -569,7 +569,7 @@ supports both python2 and python3. To use all features of asv, you will need either ``conda`` or ``virtualenv``. For more details please check the `asv installation -webpage `_. +webpage `_. To install asv:: @@ -624,7 +624,7 @@ This will display stderr from the benchmarks, and use your local ``python`` that comes from your ``$PATH``. Information on how to write a benchmark and how to use asv can be found in the -`asv documentation `_. +`asv documentation `_. .. _contributing.gbq_integration_tests: diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 38a816060e1bc..27462a08b0011 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -877,7 +877,7 @@ The :ref:`Plotting ` docs. `__ `Generate Embedded plots in excel files using Pandas, Vincent and xlsxwriter -`__ +`__ `Boxplot for each quartile of a stratifying variable `__ diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 6063e3e8bce45..cc69367017aed 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -41,12 +41,6 @@ categories of functionality and methods in separate sections. Series ------ -.. warning:: - - In 0.13.0 ``Series`` has internally been refactored to no longer sub-class ``ndarray`` - but instead subclass ``NDFrame``, similarly to the rest of the pandas containers. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring`) - :class:`Series` is a one-dimensional labeled array capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.). The axis labels are collectively referred to as the **index**. The basic method to create a Series is to call: diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 17ebd1f163f4f..087b265ee83f2 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -145,7 +145,7 @@ API `pandas-datareader `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``pandas-datareader`` is a remote data access library for pandas. ``pandas.io`` from pandas < 0.17.0 is now refactored/split-off to and importable from ``pandas_datareader`` (PyPI:``pandas-datareader``). Many/most of the supported APIs have at least a documentation paragraph in the `pandas-datareader docs `_: +``pandas-datareader`` is a remote data access library for pandas. ``pandas.io`` from pandas < 0.17.0 is now refactored/split-off to and importable from ``pandas_datareader`` (PyPI:``pandas-datareader``). Many/most of the supported APIs have at least a documentation paragraph in the `pandas-datareader docs `_: The following data feeds are available: @@ -170,7 +170,7 @@ PyDatastream is a Python interface to the SOAP API to return indexed Pandas DataFrames or Panels with financial data. This package requires valid credentials for this API (non free). -`pandaSDMX `__ +`pandaSDMX `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pandaSDMX is an extensible library to retrieve and acquire statistical data and metadata disseminated in @@ -215,7 +215,7 @@ dimensional arrays, rather than the tabular data for which pandas excels. Out-of-core ------------- -`Dask `__ +`Dask `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dask is a flexible parallel computing library for analytics. Dask diff --git a/doc/source/install.rst b/doc/source/install.rst index 6295e6f6cbb68..73685e0be8e7e 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -189,7 +189,7 @@ pandas is equipped with an exhaustive set of unit tests covering about 97% of the codebase as of this writing. To run it on your machine to verify that everything is working (and you have all of the dependencies, soft and hard, installed), make sure you have `nose -`__ and run: +`__ and run: :: diff --git a/doc/source/io.rst b/doc/source/io.rst index d436fa52918d3..811fca4344121 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1481,7 +1481,7 @@ function takes a number of arguments. Only the first is required. - ``encoding``: a string representing the encoding to use if the contents are non-ASCII, for python versions prior to 3 - ``line_terminator``: Character sequence denoting line end (default '\\n') - - ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL) + - ``quoting``: Set quoting rules as in csv module (default csv.QUOTE_MINIMAL). Note that if you have set a `float_format` then floats are converted to strings and csv.QUOTE_NONNUMERIC will treat them as non-numeric - ``quotechar``: Character used to quote fields (default '"') - ``doublequote``: Control quoting of ``quotechar`` in fields (default True) - ``escapechar``: Character used to escape ``sep`` and ``quotechar`` when @@ -2639,8 +2639,8 @@ config options ` ``io.excel.xlsx.writer`` and ``io.excel.xls.writer``. pandas will fall back on `openpyxl`_ for ``.xlsx`` files if `Xlsxwriter`_ is not available. -.. _XlsxWriter: http://xlsxwriter.readthedocs.org -.. _openpyxl: http://openpyxl.readthedocs.org/ +.. _XlsxWriter: https://xlsxwriter.readthedocs.io +.. _openpyxl: https://openpyxl.readthedocs.io/ .. _xlwt: http://www.python-excel.org To specify which writer you want to use, you can pass an engine keyword @@ -2775,6 +2775,7 @@ both on the writing (serialization), and reading (deserialization). as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. As a result of writing format changes and other issues: + +----------------------+------------------------+ | Packed with | Can be unpacked with | +======================+========================+ diff --git a/doc/source/r_interface.rst b/doc/source/r_interface.rst index f3df1ebdf25cb..bde97d88a0ee7 100644 --- a/doc/source/r_interface.rst +++ b/doc/source/r_interface.rst @@ -17,7 +17,7 @@ rpy2 / R interface In v0.16.0, the ``pandas.rpy`` interface has been **deprecated and will be removed in a future version**. Similar functionality can be accessed - through the `rpy2 `__ project. + through the `rpy2 `__ project. See the :ref:`updating ` section for a guide to port your code from the ``pandas.rpy`` to ``rpy2`` functions. diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst index e92798ea17448..c25e734a046b2 100644 --- a/doc/source/tutorials.rst +++ b/doc/source/tutorials.rst @@ -138,7 +138,7 @@ Modern Pandas Excel charts with pandas, vincent and xlsxwriter ------------------------------------------------ -- `Using Pandas and XlsxWriter to create Excel charts `_ +- `Using Pandas and XlsxWriter to create Excel charts `_ Various Tutorials ----------------- diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst index 77dc249aeb788..2a1f2cc47d48e 100644 --- a/doc/source/whatsnew.rst +++ b/doc/source/whatsnew.rst @@ -18,6 +18,8 @@ What's New These are new features and improvements of note in each release. +.. include:: whatsnew/v0.19.1.txt + .. include:: whatsnew/v0.19.0.txt .. include:: whatsnew/v0.18.1.txt diff --git a/doc/source/whatsnew/v0.14.0.txt b/doc/source/whatsnew/v0.14.0.txt index a91e0ab9e4961..181cd401c85d6 100644 --- a/doc/source/whatsnew/v0.14.0.txt +++ b/doc/source/whatsnew/v0.14.0.txt @@ -401,7 +401,7 @@ through SQLAlchemy (:issue:`2717`, :issue:`4163`, :issue:`5950`, :issue:`6292`). All databases supported by SQLAlchemy can be used, such as PostgreSQL, MySQL, Oracle, Microsoft SQL server (see documentation of SQLAlchemy on `included dialects -`_). +`_). The functionality of providing DBAPI connection objects will only be supported for sqlite3 in the future. The ``'mysql'`` flavor is deprecated. diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index fc13224d3fe6e..9cb299593076d 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -141,7 +141,7 @@ as well as the ``.sum()`` operation. Releasing of the GIL could benefit an application that uses threads for user interactions (e.g. QT_), or performing multi-threaded computations. A nice example of a library that can handle these types of computation-in-parallel is the dask_ library. -.. _dask: https://dask.readthedocs.org/en/latest/ +.. _dask: https://dask.readthedocs.io/en/latest/ .. _QT: https://wiki.python.org/moin/PyQt .. _whatsnew_0170.plot: diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 60847469aa02c..8e7e95c071ea4 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -1560,6 +1560,6 @@ Bug Fixes - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment. - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`) - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`) -- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) +- ``PeriodIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`) - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`) - Bug in ``Index.copy()`` where ``name`` parameter was ignored (:issue:`14302`) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt new file mode 100644 index 0000000000000..daceb18a53457 --- /dev/null +++ b/doc/source/whatsnew/v0.19.1.txt @@ -0,0 +1,48 @@ +.. _whatsnew_0191: + +v0.19.1 (????, 2016) +--------------------- + +This is a minor bug-fix release from 0.19.0 and includes a large number of +bug fixes along with several new features, enhancements, and performance improvements. +We recommend that all users upgrade to this version. + +Highlights include: + + +.. contents:: What's new in v0.19.1 + :local: + :backlinks: none + + +.. _whatsnew_0191.performance: + +Performance Improvements +~~~~~~~~~~~~~~~~~~~~~~~~ + + + + + + + +.. _whatsnew_0191.bug_fixes: + +Bug Fixes +~~~~~~~~~ + + + + +- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) + + + + + + + + +- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`) +- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`) +- Bug in ``asfreq``, where frequency wasn't set for empty Series (:issue:`14320`) \ No newline at end of file diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6fb0090dea114..1798a35168265 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1345,7 +1345,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, The newline character or character sequence to use in the output file quoting : optional constant from csv module - defaults to csv.QUOTE_MINIMAL + defaults to csv.QUOTE_MINIMAL. If you have set a `float_format` + then floats are comverted to strings and thus csv.QUOTE_NONNUMERIC + will treat them as non-numeric quotechar : string (length 1), default '\"' character used to quote fields doublequote : boolean, default True diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 1ab5dbb737739..0c465da24a17e 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -116,12 +116,27 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, return result - def _verify_integrity(self): - """Raises ValueError if length of levels and labels don't match or any - label would exceed level bounds""" + def _verify_integrity(self, labels=None, levels=None): + """ + + Parameters + ---------- + labels : optional list + Labels to check for validity. Defaults to current labels. + levels : optional list + Levels to check for validity. Defaults to current levels. + + Raises + ------ + ValueError + * if length of levels and labels don't match or any label would + exceed level bounds + """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. - labels, levels = self.labels, self.levels + labels = labels or self.labels + levels = levels or self.levels + if len(levels) != len(labels): raise ValueError("Length of levels and labels must match. NOTE:" " this index is in an inconsistent state.") @@ -162,6 +177,9 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, new_levels[l] = _ensure_index(v, copy=copy)._shallow_copy() new_levels = FrozenList(new_levels) + if verify_integrity: + self._verify_integrity(levels=new_levels) + names = self.names self._levels = new_levels if any(names): @@ -170,9 +188,6 @@ def _set_levels(self, levels, level=None, copy=False, validate=True, self._tuples = None self._reset_cache() - if verify_integrity: - self._verify_integrity() - def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): """ @@ -268,13 +283,13 @@ def _set_labels(self, labels, level=None, copy=False, validate=True, lab, lev, copy=copy)._shallow_copy() new_labels = FrozenList(new_labels) + if verify_integrity: + self._verify_integrity(labels=new_labels) + self._labels = new_labels self._tuples = None self._reset_cache() - if verify_integrity: - self._verify_integrity() - def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): """ diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index e5aaba26135e7..b7cd8a1c01224 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -4,21 +4,21 @@ from datetime import datetime -from numpy import nan import numpy as np +from numpy import nan -from pandas.compat import lrange -from pandas import DataFrame, Series, Index, Timestamp import pandas as pd -from pandas.util.testing import (assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) - -import pandas.util.testing as tm +from pandas import DataFrame, Index, Series, Timestamp +from pandas.compat import lrange from pandas.tests.frame.common import TestData +import pandas.util.testing as tm +from pandas.util.testing import (assertRaisesRegexp, + assert_frame_equal, + assert_series_equal) + class TestDataFrameConcatCommon(tm.TestCase, TestData): @@ -324,6 +324,29 @@ def test_join_multiindex_leftright(self): assert_frame_equal(df2.join(df1, how='left'), exp[['value2', 'value1']]) + def test_concat_named_keys(self): + # GH 14252 + df = pd.DataFrame({'foo': [1, 2], 'bar': [0.1, 0.2]}) + index = Index(['a', 'b'], name='baz') + concatted_named_from_keys = pd.concat([df, df], keys=index) + expected_named = pd.DataFrame( + {'foo': [1, 2, 1, 2], 'bar': [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((['a', 'b'], [0, 1]), + names=['baz', None])) + assert_frame_equal(concatted_named_from_keys, expected_named) + + index_no_name = Index(['a', 'b'], name=None) + concatted_named_from_names = pd.concat( + [df, df], keys=index_no_name, names=['baz']) + assert_frame_equal(concatted_named_from_names, expected_named) + + concatted_unnamed = pd.concat([df, df], keys=index_no_name) + expected_unnamed = pd.DataFrame( + {'foo': [1, 2, 1, 2], 'bar': [0.1, 0.2, 0.1, 0.2]}, + index=pd.MultiIndex.from_product((['a', 'b'], [0, 1]), + names=[None, None])) + assert_frame_equal(concatted_unnamed, expected_unnamed) + class TestDataFrameCombineFirst(tm.TestCase, TestData): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 9758c2b9c805e..55fd169e26eb7 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -323,6 +323,13 @@ def test_asfreq_datetimeindex(self): ts = df['A'].asfreq('B') tm.assertIsInstance(ts.index, DatetimeIndex) + def test_asfreq_datetimeindex_empty_series(self): + # GH 14340 + empty = Series(index=pd.DatetimeIndex([])).asfreq('H') + normal = Series(index=pd.DatetimeIndex(["2016-09-29 11:00"]), + data=[3]).asfreq('H') + self.assertEqual(empty.index.freq, normal.index.freq) + def test_first_last_valid(self): N = len(self.frame.index) mat = randn(N) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index cd9ce0102ca1e..fdc5a2eaec812 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -149,14 +149,14 @@ def test_set_levels(self): levels = self.index.levels new_levels = [[lev + 'a' for lev in level] for level in levels] - def assert_matching(actual, expected): + def assert_matching(actual, expected, check_dtype=False): # avoid specifying internal representation # as much as possible self.assertEqual(len(actual), len(expected)) for act, exp in zip(actual, expected): act = np.asarray(act) - exp = np.asarray(exp, dtype=np.object_) - tm.assert_numpy_array_equal(act, exp) + exp = np.asarray(exp) + tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype) # level changing [w/o mutation] ind2 = self.index.set_levels(new_levels) @@ -204,6 +204,31 @@ def assert_matching(actual, expected): assert_matching(ind2.levels, new_levels) assert_matching(self.index.levels, levels) + # illegal level changing should not change levels + # GH 13754 + original_index = self.index.copy() + for inplace in [True, False]: + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_levels(['c'], level=0, inplace=inplace) + assert_matching(self.index.levels, original_index.levels, + check_dtype=True) + + with assertRaisesRegexp(ValueError, "^On"): + self.index.set_labels([0, 1, 2, 3, 4, 5], level=0, + inplace=inplace) + assert_matching(self.index.labels, original_index.labels, + check_dtype=True) + + with assertRaisesRegexp(TypeError, "^Levels"): + self.index.set_levels('c', level=0, inplace=inplace) + assert_matching(self.index.levels, original_index.levels, + check_dtype=True) + + with assertRaisesRegexp(TypeError, "^Labels"): + self.index.set_labels(1, level=0, inplace=inplace) + assert_matching(self.index.labels, original_index.labels, + check_dtype=True) + def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 8cdde8d92b28f..a8c43195f5552 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1369,7 +1369,8 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, clean_keys.append(k) clean_objs.append(v) objs = clean_objs - keys = clean_keys + name = getattr(keys, 'name', None) + keys = Index(clean_keys, name=name) if len(objs) == 0: raise ValueError('All objects passed were None') @@ -1454,7 +1455,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, self.axis = axis self.join_axes = join_axes self.keys = keys - self.names = names + self.names = names or getattr(keys, 'names', None) self.levels = levels self.ignore_index = ignore_index diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index f1a209053445a..7ce97f94ea5ef 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1344,7 +1344,10 @@ def asfreq(obj, freq, method=None, how=None, normalize=False): return new_obj else: if len(obj.index) == 0: - return obj.copy() + new_index = obj.index._shallow_copy(freq=to_offset(freq)) + new_obj = obj.copy() + new_obj.index = new_index + return new_obj dti = date_range(obj.index[0], obj.index[-1], freq=freq) dti.name = obj.index.name rs = obj.reindex(dti, method=method) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 8a86fcba32ecb..a6d58fa3e7ef3 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -867,7 +867,7 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) self.assertFalse(idx.hasnans) tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.int64)) + np.array([], dtype=np.intp)) idx = pd.DatetimeIndex(['2011-01-01', 'NaT'], tz=tz) self.assertTrue(idx._can_hold_na) @@ -875,7 +875,7 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) self.assertTrue(idx.hasnans) tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.int64)) + np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 @@ -1717,7 +1717,7 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) self.assertFalse(idx.hasnans) tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.int64)) + np.array([], dtype=np.intp)) idx = pd.TimedeltaIndex(['1 days', 'NaT']) self.assertTrue(idx._can_hold_na) @@ -1725,7 +1725,7 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) self.assertTrue(idx.hasnans) tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.int64)) + np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 @@ -2714,7 +2714,7 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) self.assertFalse(idx.hasnans) tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([], dtype=np.int64)) + np.array([], dtype=np.intp)) idx = pd.PeriodIndex(['2011-01-01', 'NaT'], freq='D') self.assertTrue(idx._can_hold_na) @@ -2722,7 +2722,7 @@ def test_nat(self): tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) self.assertTrue(idx.hasnans) tm.assert_numpy_array_equal(idx._nan_idxs, - np.array([1], dtype=np.int64)) + np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index a85a606075911..c7e4f03fcd792 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -17,7 +17,8 @@ from pytz import NonExistentTimeError import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal, set_timezone +from pandas.util.testing import (assert_frame_equal, assert_series_equal, + set_timezone) from pandas.compat import lrange, zip try: @@ -535,6 +536,44 @@ def test_ambiguous_nat(self): # right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')] self.assert_numpy_array_equal(di_test.values, localized.values) + def test_ambiguous_bool(self): + # make sure that we are correctly accepting bool values as ambiguous + + # gh-14402 + t = Timestamp('2015-11-01 01:00:03') + expected0 = Timestamp('2015-11-01 01:00:03-0500', tz='US/Central') + expected1 = Timestamp('2015-11-01 01:00:03-0600', tz='US/Central') + + def f(): + t.tz_localize('US/Central') + self.assertRaises(pytz.AmbiguousTimeError, f) + + result = t.tz_localize('US/Central', ambiguous=True) + self.assertEqual(result, expected0) + + result = t.tz_localize('US/Central', ambiguous=False) + self.assertEqual(result, expected1) + + s = Series([t]) + expected0 = Series([expected0]) + expected1 = Series([expected1]) + + def f(): + s.dt.tz_localize('US/Central') + self.assertRaises(pytz.AmbiguousTimeError, f) + + result = s.dt.tz_localize('US/Central', ambiguous=True) + assert_series_equal(result, expected0) + + result = s.dt.tz_localize('US/Central', ambiguous=[True]) + assert_series_equal(result, expected0) + + result = s.dt.tz_localize('US/Central', ambiguous=False) + assert_series_equal(result, expected1) + + result = s.dt.tz_localize('US/Central', ambiguous=[False]) + assert_series_equal(result, expected1) + def test_nonexistent_raise_coerce(self): # See issue 13057 from pytz.exceptions import NonExistentTimeError @@ -629,14 +668,14 @@ def test_localized_at_time_between_time(self): result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr( 'US/Eastern')) - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(self.tzstr('US/Eastern')) - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz('US/Eastern'))) def test_string_index_alias_tz_aware(self): @@ -723,7 +762,7 @@ def test_frame_no_datetime64_dtype(self): result = df.get_dtype_counts().sort_index() expected = Series({'datetime64[ns]': 2, str(tz_expected): 2}).sort_index() - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) def test_hongkong_tz_convert(self): # #1673 @@ -1324,7 +1363,7 @@ def test_append_aware(self): exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], tz='US/Eastern') exp = Series([1, 2], index=exp_index) - self.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) self.assertEqual(ts_result.index.tz, rng1.tz) rng1 = date_range('1/1/2011 01:00', periods=1, freq='H', tz='UTC') @@ -1336,7 +1375,7 @@ def test_append_aware(self): exp_index = DatetimeIndex(['2011-01-01 01:00', '2011-01-01 02:00'], tz='UTC') exp = Series([1, 2], index=exp_index) - self.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) utc = rng1.tz self.assertEqual(utc, ts_result.index.tz) @@ -1352,7 +1391,7 @@ def test_append_aware(self): exp_index = Index([Timestamp('1/1/2011 01:00', tz='US/Eastern'), Timestamp('1/1/2011 02:00', tz='US/Central')]) exp = Series([1, 2], index=exp_index) - self.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) def test_append_dst(self): rng1 = date_range('1/1/2016 01:00', periods=3, freq='H', @@ -1368,7 +1407,7 @@ def test_append_dst(self): '2016-08-01 02:00', '2016-08-01 03:00'], tz='US/Eastern') exp = Series([1, 2, 3, 10, 11, 12], index=exp_index) - tm.assert_series_equal(ts_result, exp) + assert_series_equal(ts_result, exp) self.assertEqual(ts_result.index.tz, rng1.tz) def test_append_aware_naive(self): @@ -1429,7 +1468,7 @@ def test_arith_utc_convert(self): expected = uts1 + uts2 self.assertEqual(result.index.tz, pytz.UTC) - tm.assert_series_equal(result, expected) + assert_series_equal(result, expected) def test_intersection(self): rng = date_range('1/1/2011', periods=100, freq='H', tz='utc') diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 9073ad0abd535..bab45595cd60f 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -4155,6 +4155,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, """ cdef: ndarray[int64_t] trans, deltas, idx_shifted + ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) int64_t *tdata int64_t v, left, right @@ -4190,11 +4191,18 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, infer_dst = True elif ambiguous == 'NaT': fill = True + elif isinstance(ambiguous, bool): + is_dst = True + if ambiguous: + ambiguous_array = np.ones(len(vals), dtype=bool) + else: + ambiguous_array = np.zeros(len(vals), dtype=bool) elif hasattr(ambiguous, '__iter__'): is_dst = True if len(ambiguous) != len(vals): raise ValueError( "Length of ambiguous bool-array must be the same size as vals") + ambiguous_array = np.asarray(ambiguous) trans, deltas, typ = _get_dst_info(tz) @@ -4286,7 +4294,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if infer_dst and dst_hours[i] != NPY_NAT: result[i] = dst_hours[i] elif is_dst: - if ambiguous[i]: + if ambiguous_array[i]: result[i] = left else: result[i] = right