diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 6f91eba1ad239..c7d731249f9cf 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -10,6 +10,6 @@
#### Output of ``pd.show_versions()``
-# Paste the output here
+# Paste the output here pd.show_versions() here
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
index 0f9689dadcbb0..2e394ed4268f3 100644
--- a/asv_bench/benchmarks/inference.py
+++ b/asv_bench/benchmarks/inference.py
@@ -148,12 +148,12 @@ class to_numeric(object):
N = 500000
data_dict = {
- 'string-int': (['1'] * (N / 2)) + ([2] * (N / 2)),
- 'string-nint': (['-1'] * (N / 2)) + ([2] * (N / 2)),
+ 'string-int': (['1'] * (N // 2)) + ([2] * (N // 2)),
+ 'string-nint': (['-1'] * (N // 2)) + ([2] * (N // 2)),
'datetime64': np.repeat(np.array(['1970-01-01', '1970-01-02'],
dtype='datetime64[D]'), N),
- 'string-float': (['1.1'] * (N / 2)) + ([2] * (N / 2)),
- 'int-list': ([1] * (N / 2)) + ([2] * (N / 2)),
+ 'string-float': (['1.1'] * (N // 2)) + ([2] * (N // 2)),
+ 'int-list': ([1] * (N // 2)) + ([2] * (N // 2)),
'int32': np.repeat(np.int32(1), N)
}
diff --git a/ci/lint.sh b/ci/lint.sh
index a866b04445f96..115a2cdaf7899 100755
--- a/ci/lint.sh
+++ b/ci/lint.sh
@@ -7,16 +7,10 @@ source activate pandas
RET=0
if [ "$LINT" ]; then
- echo "Linting"
- for path in 'api' 'core' 'indexes' 'types' 'formats' 'io' 'stats' 'compat' 'sparse' 'tools' 'tseries' 'tests' 'computation' 'util'
- do
- echo "linting -> pandas/$path"
- flake8 pandas/$path --filename '*.py'
- if [ $? -ne "0" ]; then
- RET=1
- fi
-
- done
+ # pandas/rpy is deprecated and will be removed.
+ # pandas/src is C code, so no need to search there.
+ echo "Linting *.py"
+ flake8 pandas --filename '*.py' --exclude pandas/rpy,pandas/src
echo "Linting *.py DONE"
echo "Linting *.pyx"
diff --git a/ci/requirements-3.4.build b/ci/requirements-3.4.build
index 4a4bd9d433428..e6e59dcba63fe 100644
--- a/ci/requirements-3.4.build
+++ b/ci/requirements-3.4.build
@@ -1,3 +1,3 @@
numpy=1.8.1
-cython
+cython=0.24.1
libgfortran=1.0
diff --git a/ci/travis_encrypt_gbq.sh b/ci/travis_encrypt_gbq.sh
index 719db67f384e0..e404ca73a405e 100755
--- a/ci/travis_encrypt_gbq.sh
+++ b/ci/travis_encrypt_gbq.sh
@@ -1,11 +1,10 @@
#!/bin/bash
GBQ_JSON_FILE=$1
-GBQ_PROJECT_ID=$2
-if [[ $# -ne 2 ]]; then
+if [[ $# -ne 1 ]]; then
echo -e "Too few arguments.\nUsage: ./travis_encrypt_gbq.sh "\
- " "
+ ""
exit 1
fi
@@ -23,9 +22,9 @@ echo "Encrypting $GBQ_JSON_FILE..."
read -d "\n" TRAVIS_KEY TRAVIS_IV <<<$(travis encrypt-file $GBQ_JSON_FILE \
travis_gbq.json.enc -f | grep -o "\w*_iv\|\w*_key");
-echo "Adding your secure key and project id to travis_gbq_config.txt ..."
-echo -e "TRAVIS_IV_ENV=$TRAVIS_IV\nTRAVIS_KEY_ENV=$TRAVIS_KEY\n"\
-"GBQ_PROJECT_ID='$GBQ_PROJECT_ID'" > travis_gbq_config.txt
+echo "Adding your secure key to travis_gbq_config.txt ..."
+echo -e "TRAVIS_IV_ENV=$TRAVIS_IV\nTRAVIS_KEY_ENV=$TRAVIS_KEY"\
+> travis_gbq_config.txt
echo "Done. Removing file $GBQ_JSON_FILE"
rm $GBQ_JSON_FILE
diff --git a/ci/travis_gbq_config.txt b/ci/travis_gbq_config.txt
index 3b68d62f177cc..0b28cdedbd0d7 100644
--- a/ci/travis_gbq_config.txt
+++ b/ci/travis_gbq_config.txt
@@ -1,3 +1,2 @@
TRAVIS_IV_ENV=encrypted_1d9d7b1f171b_iv
TRAVIS_KEY_ENV=encrypted_1d9d7b1f171b_key
-GBQ_PROJECT_ID='pandas-travis'
diff --git a/ci/travis_process_gbq_encryption.sh b/ci/travis_process_gbq_encryption.sh
index 7ff4c08f78e37..9967d40e49f0a 100755
--- a/ci/travis_process_gbq_encryption.sh
+++ b/ci/travis_process_gbq_encryption.sh
@@ -2,10 +2,12 @@
source ci/travis_gbq_config.txt
-if [[ -n ${!TRAVIS_IV_ENV} ]]; then
+if [[ -n ${SERVICE_ACCOUNT_KEY} ]]; then
+ echo "${SERVICE_ACCOUNT_KEY}" > ci/travis_gbq.json;
+elif [[ -n ${!TRAVIS_IV_ENV} ]]; then
openssl aes-256-cbc -K ${!TRAVIS_KEY_ENV} -iv ${!TRAVIS_IV_ENV} \
-in ci/travis_gbq.json.enc -out ci/travis_gbq.json -d;
- export GBQ_PROJECT_ID=$GBQ_PROJECT_ID;
+ export GBQ_PROJECT_ID='pandas-travis';
echo 'Successfully decrypted gbq credentials'
fi
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
index a8a47a9d979c0..44ee6223d5ee1 100644
--- a/doc/source/contributing.rst
+++ b/doc/source/contributing.rst
@@ -643,20 +643,22 @@ on Travis-CI and are only accessible from the pandas-dev/pandas repository. The
credentials won't be available on forks of pandas. Here are the steps to run
gbq integration tests on a forked repository:
-#. First, complete all the steps in the `Encrypting Files Prerequisites
- `__ section.
-#. Sign into `Travis `__ using your GitHub account.
-#. Enable your forked repository of pandas for testing in `Travis
- `__.
-#. Run the following command from terminal where the current working directory
- is the ``ci`` folder::
-
- ./travis_encrypt_gbq.sh
-
-#. Create a new branch from the branch used in your pull request. Commit the
- encrypted file called ``travis_gbq.json.enc`` as well as the file
- ``travis_gbq_config.txt``, in an otherwise empty commit. DO NOT commit the
- ``*.json`` file which contains your unencrypted private key.
+#. Go to `Travis CI `__ and sign in with your GitHub
+ account.
+#. Click on the ``+`` icon next to the ``My Repositories`` list and enable
+ Travis builds for your fork.
+#. Click on the gear icon to edit your travis build, and add two environment
+ variables:
+
+ - ``GBQ_PROJECT_ID`` with the value being the ID of your BigQuery project.
+
+ - ``SERVICE_ACCOUNT_KEY`` with the value being the contents of the JSON key
+ that you downloaded for your service account. Use single quotes around
+ your JSON key to ensure that it is treated as a string.
+
+ For both environment variables, keep the "Display value in build log" option
+ DISABLED. These variables contain sensitive data and you do not want their
+ contents being exposed in build logs.
#. Your branch should be tested automatically once it is pushed. You can check
the status by visiting your Travis branches page which exists at the
following location: https://travis-ci.org/your-user-name/pandas/branches .
diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst
index a37b1e89c7cc3..087b265ee83f2 100644
--- a/doc/source/ecosystem.rst
+++ b/doc/source/ecosystem.rst
@@ -143,7 +143,7 @@ both "column wise min/max and global min/max coloring."
API
-----
-`pandas-datareader `__
+`pandas-datareader `__
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``pandas-datareader`` is a remote data access library for pandas. ``pandas.io`` from pandas < 0.17.0 is now refactored/split-off to and importable from ``pandas_datareader`` (PyPI:``pandas-datareader``). Many/most of the supported APIs have at least a documentation paragraph in the `pandas-datareader docs `_:
diff --git a/doc/source/io.rst b/doc/source/io.rst
index ae71587c8b46b..ba1bd328d2991 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -2789,7 +2789,7 @@ both on the writing (serialization), and reading (deserialization).
| 0.17 / Python 3 | >=0.18 / any Python |
+----------------------+------------------------+
| 0.18 | >= 0.18 |
- +======================+========================+
+ +----------------------+------------------------+
Reading (files packed by older versions) is backward-compatibile, except for files packed with 0.17 in Python 2, in which case only they can only be unpacked in Python 2.
diff --git a/doc/source/release.rst b/doc/source/release.rst
index d210065f04459..622e9a53ff8f0 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -37,6 +37,50 @@ analysis / manipulation tool available in any language.
* Binary installers on PyPI: http://pypi.python.org/pypi/pandas
* Documentation: http://pandas.pydata.org
+
+pandas 0.19.1
+-------------
+
+**Release date:** November 3, 2016
+
+This is a minor bug-fix release from 0.19.0 and includes some small regression fixes,
+bug fixes and performance improvements.
+
+See the :ref:`v0.19.1 Whatsnew ` page for an overview of all
+bugs that have been fixed in 0.19.1.
+
+Thanks
+~~~~~~
+
+- Adam Chainz
+- Anthonios Partheniou
+- Arash Rouhani
+- Ben Kandel
+- Brandon M. Burroughs
+- Chris
+- chris-b1
+- Chris Warth
+- David Krych
+- dubourg
+- gfyoung
+- Iván Vallés Pérez
+- Jeff Reback
+- Joe Jevnik
+- Jon M. Mease
+- Joris Van den Bossche
+- Josh Owen
+- Keshav Ramaswamy
+- Larry Ren
+- mattrijk
+- Michael Felt
+- paul-mannino
+- Piotr Chromiec
+- Robert Bradshaw
+- Sinhrks
+- Thiago Serafim
+- Tom Bird
+
+
pandas 0.19.0
-------------
diff --git a/doc/source/whatsnew/v0.13.0.txt b/doc/source/whatsnew/v0.13.0.txt
index 0944d849cfafd..6ecd4b487c798 100644
--- a/doc/source/whatsnew/v0.13.0.txt
+++ b/doc/source/whatsnew/v0.13.0.txt
@@ -600,7 +600,7 @@ Enhancements
.. ipython:: python
t = Timestamp('20130101 09:01:02')
- t + pd.datetools.Nano(123)
+ t + pd.tseries.offsets.Nano(123)
- A new method, ``isin`` for DataFrames, which plays nicely with boolean indexing. The argument to ``isin``, what we're comparing the DataFrame to, can be a DataFrame, Series, dict, or array of values. See :ref:`the docs` for more.
diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt
index 3ee4cc1dde92d..5022d9c12dd90 100644
--- a/doc/source/whatsnew/v0.19.1.txt
+++ b/doc/source/whatsnew/v0.19.1.txt
@@ -1,15 +1,12 @@
.. _whatsnew_0191:
-v0.19.1 (????, 2016)
----------------------
+v0.19.1 (November 3, 2016)
+--------------------------
-This is a minor bug-fix release from 0.19.0 and includes a large number of
-bug fixes along with several new features, enhancements, and performance improvements.
+This is a minor bug-fix release from 0.19.0 and includes some small regression fixes,
+bug fixes and performance improvements.
We recommend that all users upgrade to this version.
-Highlights include:
-
-
.. contents:: What's new in v0.19.1
:local:
:backlinks: none
@@ -21,11 +18,10 @@ Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~
- Fixed performance regression in factorization of ``Period`` data (:issue:`14338`)
+- Fixed performance regression in ``Series.asof(where)`` when ``where`` is a scalar (:issue:`14461`)
+- Improved performance in ``DataFrame.asof(where)`` when ``where`` is a scalar (:issue:`14461`)
- Improved performance in ``.to_json()`` when ``lines=True`` (:issue:`14408`)
-- Improved performance in ``Series.asof(where)`` when ``where`` is a scalar (:issue:`14461)
-- Improved performance in ``DataFrame.asof(where)`` when ``where`` is a scalar (:issue:`14461)
-
-
+- Improved performance in certain types of `loc` indexing with a MultiIndex (:issue:`14551`).
.. _whatsnew_0191.bug_fixes:
@@ -33,35 +29,27 @@ Performance Improvements
Bug Fixes
~~~~~~~~~
-
-
-
+- Source installs from PyPI will now again work without ``cython`` installed, as in previous versions (:issue:`14204`)
+- Compat with Cython 0.25 for building (:issue:`14496`)
+- Fixed regression where user-provided file handles were closed in ``read_csv`` (c engine) (:issue:`14418`).
+- Fixed regression in ``DataFrame.quantile`` when missing values where present in some columns (:issue:`14357`).
+- Fixed regression in ``Index.difference`` where the ``freq`` of a ``DatetimeIndex`` was incorrectly set (:issue:`14323`)
+- Added back ``pandas.core.common.array_equivalent`` with a deprecation warning (:issue:`14555`).
+- Bug in ``pd.read_csv`` for the C engine in which quotation marks were improperly parsed in skipped rows (:issue:`14459`)
+- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
+- Fixed regression in ``Index.append`` when categorical indices were appended (:issue:`14545`).
+- Fixed regression in ``pd.DataFrame`` where constructor fails when given dict with ``None`` value (:issue:`14381`)
+- Fixed regression in ``DatetimeIndex._maybe_cast_slice_bound`` when index is empty (:issue:`14354`).
- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)
-
-
-
- Bug in string indexing against data with ``object`` ``Index`` may raise ``AttributeError`` (:issue:`14424`)
- Corrrecly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`)
-
-
- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`).
-- Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`)
-
- Bug in groupby-transform broadcasting that could cause incorrect dtype coercion (:issue:`14457`)
-
-
- Bug in ``Series.__setitem__`` which allowed mutating read-only arrays (:issue:`14359`).
-
-
-- Source installs from PyPI will now work without ``cython`` installed, as in previous versions (:issue:`14204`)
- Bug in ``DataFrame.insert`` where multiple calls with duplicate columns can fail (:issue:`14291`)
-
- ``pd.merge()`` will raise ``ValueError`` with non-boolean parameters in passed boolean type arguments (:issue:`14434`)
-
-
- Bug in ``Timestamp`` where dates very near the minimum (1677-09) could underflow on creation (:issue:`14415`)
-
- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`)
- Bug in ``pd.concat`` where ``axis`` cannot take string parameters ``'rows'`` or ``'columns'`` (:issue:`14369`)
- Bug in ``pd.concat`` with dataframes heterogeneous in length and tuple ``keys`` (:issue:`14438`)
@@ -69,12 +57,6 @@ Bug Fixes
- Bug in ``DataFrame.to_json`` where ``lines=True`` and a value contained a ``}`` character (:issue:`14391`)
- Bug in ``df.groupby`` causing an ``AttributeError`` when grouping a single index frame by a column and the index level (:issue`14327`)
- Bug in ``df.groupby`` where ``TypeError`` raised when ``pd.Grouper(key=...)`` is passed in a list (:issue:`14334`)
-
-
-
-
-
-
-
-
-- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns`` is not scalar and ``values`` is not specified (:issue:`14380`)
+- Bug in ``pd.pivot_table`` may raise ``TypeError`` or ``ValueError`` when ``index`` or ``columns``
+ is not scalar and ``values`` is not specified (:issue:`14380`)
+- Enhancement in ``pandas.io.json.json_normalize``Added errors{'raise','ignore'} for keys not found in meta (:issue: '14505')
\ No newline at end of file
diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py
index d4d8b7e4e9747..49aa31c375e25 100644
--- a/pandas/api/tests/test_api.py
+++ b/pandas/api/tests/test_api.py
@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-
+import numpy as np
+
import pandas as pd
from pandas.core import common as com
from pandas import api
@@ -184,6 +186,11 @@ def test_deprecation_core_common(self):
for t in self.allowed:
self.check_deprecation(getattr(com, t), getattr(types, t))
+ def test_deprecation_core_common_array_equivalent(self):
+
+ with tm.assert_produces_warning(DeprecationWarning):
+ com.array_equivalent(np.array([1, 2]), np.array([1, 2]))
+
def test_deprecation_core_common_moved(self):
# these are in pandas.types.common
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 341bd3b4cc845..295947bbc1166 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -64,6 +64,15 @@ def wrapper(*args, **kwargs):
setattr(m, t, outer(t))
+# deprecate array_equivalent
+
+def array_equivalent(*args, **kwargs):
+ warnings.warn("'pandas.core.common.array_equivalent' is deprecated and "
+ "is no longer public API", DeprecationWarning, stacklevel=2)
+ from pandas.types import missing
+ return missing.array_equivalent(*args, **kwargs)
+
+
class PandasError(Exception):
pass
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 037ab900e6150..8e18b65e80385 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4008,6 +4008,8 @@ def asfreq(self, freq, method=None, how=None, normalize=False):
-------
converted : type of caller
+ Notes
+ -----
To learn more about the frequency strings, please see `this link
`__.
"""
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 2a7f896e1b871..afddb86988970 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -6,7 +6,7 @@
import warnings
import copy
-from pandas.compat import(
+from pandas.compat import (
zip, range, long, lzip,
callable, map
)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 11721a5bdac29..43beefffd448e 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -6,7 +6,6 @@
from collections import defaultdict
import numpy as np
-from numpy import percentile as _quantile
from pandas.core.base import PandasObject
@@ -1147,8 +1146,9 @@ def get_result(other):
def handle_error():
if raise_on_error:
+ # The 'detail' variable is defined in outer scope.
raise TypeError('Could not operate %s with block values %s' %
- (repr(other), str(detail)))
+ (repr(other), str(detail))) # noqa
else:
# return the values
result = np.empty(values.shape, dtype='O')
@@ -1315,16 +1315,38 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
values = self.get_values()
values, _, _, _ = self._try_coerce_args(values, values)
- mask = isnull(self.values)
- if not lib.isscalar(mask) and mask.any():
- # even though this could be a 2-d mask it appears
- # as a 1-d result
- mask = mask.reshape(values.shape)
- result_shape = tuple([values.shape[0]] + [-1] * (self.ndim - 1))
- values = _block_shape(values[~mask], ndim=self.ndim)
- if self.ndim > 1:
- values = values.reshape(result_shape)
+ def _nanpercentile1D(values, mask, q, **kw):
+ values = values[~mask]
+
+ if len(values) == 0:
+ if is_scalar(q):
+ return self._na_value
+ else:
+ return np.array([self._na_value] * len(q),
+ dtype=values.dtype)
+
+ return np.percentile(values, q, **kw)
+
+ def _nanpercentile(values, q, axis, **kw):
+
+ mask = isnull(self.values)
+ if not is_scalar(mask) and mask.any():
+ if self.ndim == 1:
+ return _nanpercentile1D(values, mask, q, **kw)
+ else:
+ # for nonconsolidatable blocks mask is 1D, but values 2D
+ if mask.ndim < values.ndim:
+ mask = mask.reshape(values.shape)
+ if axis == 0:
+ values = values.T
+ mask = mask.T
+ result = [_nanpercentile1D(val, m, q, **kw) for (val, m)
+ in zip(list(values), list(mask))]
+ result = np.array(result, dtype=values.dtype, copy=False).T
+ return result
+ else:
+ return np.percentile(values, q, axis=axis, **kw)
from pandas import Float64Index
is_empty = values.shape[axis] == 0
@@ -1343,13 +1365,13 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
else:
try:
- result = _quantile(values, np.array(qs) * 100,
- axis=axis, **kw)
+ result = _nanpercentile(values, np.array(qs) * 100,
+ axis=axis, **kw)
except ValueError:
# older numpies don't handle an array for q
- result = [_quantile(values, q * 100,
- axis=axis, **kw) for q in qs]
+ result = [_nanpercentile(values, q * 100,
+ axis=axis, **kw) for q in qs]
result = np.array(result, copy=False)
if self.ndim > 1:
@@ -1368,7 +1390,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
else:
result = np.array([self._na_value] * len(self))
else:
- result = _quantile(values, qs * 100, axis=axis, **kw)
+ result = _nanpercentile(values, qs * 100, axis=axis, **kw)
ndim = getattr(result, 'ndim', None) or 0
result = self._try_coerce_result(result)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 1c6b13885dd01..188204d83d985 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2915,8 +2915,8 @@ def create_from_value(value, index, dtype):
return subarr
- # scalar like
- if subarr.ndim == 0:
+ # scalar like, GH
+ if getattr(subarr, 'ndim', 0) == 0:
if isinstance(data, list): # pragma: no cover
subarr = np.array(data, dtype=object)
elif index is not None:
diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py
index 4d2dcd259e623..54eaf86315a88 100644
--- a/pandas/indexes/base.py
+++ b/pandas/indexes/base.py
@@ -1464,13 +1464,13 @@ def append(self, other):
names = set([obj.name for obj in to_concat])
name = None if len(names) > 1 else self.name
- typs = _concat.get_dtype_kinds(to_concat)
-
- if 'category' in typs:
- # if any of the to_concat is category
+ if self.is_categorical():
+ # if calling index is category, don't check dtype of others
from pandas.indexes.category import CategoricalIndex
return CategoricalIndex._append_same_dtype(self, to_concat, name)
+ typs = _concat.get_dtype_kinds(to_concat)
+
if len(typs) == 1:
return self._append_same_dtype(to_concat, name=name)
return _concat._concat_index_asobject(to_concat, name=name)
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index a9f452db69659..f9576d92d8a49 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -1907,6 +1907,13 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
return np.array(labels == loc, dtype=bool)
else:
# sorted, so can return slice object -> view
+ try:
+ loc = labels.dtype.type(loc)
+ except TypeError:
+ # this occurs when loc is a slice (partial string indexing)
+ # but the TypeError raised by searchsorted in this case
+ # is catched in Index._has_valid_type()
+ pass
i = labels.searchsorted(loc, side='left')
j = labels.searchsorted(loc, side='right')
return slice(i, j)
diff --git a/pandas/io/json.py b/pandas/io/json.py
index 1e258101a5d86..3b0930a36e199 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -22,10 +22,9 @@
def to_json(path_or_buf, obj, orient=None, date_format='epoch',
double_precision=10, force_ascii=True, date_unit='ms',
default_handler=None, lines=False):
-
if lines and orient != 'records':
- raise ValueError(
- "'lines' keyword only valid when 'orient' is records")
+ raise ValueError(
+ "'lines' keyword only valid when 'orient' is records")
if isinstance(obj, Series):
s = SeriesWriter(
@@ -53,7 +52,6 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch',
class Writer(object):
-
def __init__(self, obj, orient, date_format, double_precision,
ensure_ascii, date_unit, default_handler=None):
self.obj = obj
@@ -123,32 +121,38 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
file. For file URLs, a host is expected. For instance, a local file
could be ``file://localhost/path/to/table.json``
- orient
+ orient : string,
+ Indication of expected JSON string format.
+ Compatible JSON strings can be produced by ``to_json()`` with a
+ corresponding orient value.
+ The set of possible orients is:
+
+ - ``'split'`` : dict like
+ ``{index -> [index], columns -> [columns], data -> [values]}``
+ - ``'records'`` : list like
+ ``[{column -> value}, ... , {column -> value}]``
+ - ``'index'`` : dict like ``{index -> {column -> value}}``
+ - ``'columns'`` : dict like ``{column -> {index -> value}}``
+ - ``'values'`` : just the values array
+
+ The allowed and default values depend on the value
+ of the `typ` parameter.
- * `Series`
+ * when ``typ == 'series'``,
+ - allowed orients are ``{'split','records','index'}``
- default is ``'index'``
- - allowed values are: ``{'split','records','index'}``
- The Series index must be unique for orient ``'index'``.
- * `DataFrame`
+ * when ``typ == 'frame'``,
+ - allowed orients are ``{'split','records','index',
+ 'columns','values'}``
- default is ``'columns'``
- - allowed values are: {'split','records','index','columns','values'}
- - The DataFrame index must be unique for orients 'index' and
- 'columns'.
- - The DataFrame columns must be unique for orients 'index',
- 'columns', and 'records'.
-
- * The format of the JSON string
-
- - split : dict like
- ``{index -> [index], columns -> [columns], data -> [values]}``
- - records : list like
- ``[{column -> value}, ... , {column -> value}]``
- - index : dict like ``{index -> {column -> value}}``
- - columns : dict like ``{column -> {index -> value}}``
- - values : just the values array
+ - The DataFrame index must be unique for orients ``'index'`` and
+ ``'columns'``.
+ - The DataFrame columns must be unique for orients ``'index'``,
+ ``'columns'``, and ``'records'``.
typ : type of object to recover (series or frame), default 'frame'
dtype : boolean or dict, default True
@@ -197,7 +201,48 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
Returns
-------
- result : Series or DataFrame
+ result : Series or DataFrame, depending on the value of `typ`.
+
+ See Also
+ --------
+ DataFrame.to_json
+
+ Examples
+ --------
+
+ >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']],
+ ... index=['row 1', 'row 2'],
+ ... columns=['col 1', 'col 2'])
+
+ Encoding/decoding a Dataframe using ``'split'`` formatted JSON:
+
+ >>> df.to_json(orient='split')
+ '{"columns":["col 1","col 2"],
+ "index":["row 1","row 2"],
+ "data":[["a","b"],["c","d"]]}'
+ >>> pd.read_json(_, orient='split')
+ col 1 col 2
+ row 1 a b
+ row 2 c d
+
+ Encoding/decoding a Dataframe using ``'index'`` formatted JSON:
+
+ >>> df.to_json(orient='index')
+ '{"row 1":{"col 1":"a","col 2":"b"},"row 2":{"col 1":"c","col 2":"d"}}'
+ >>> pd.read_json(_, orient='index')
+ col 1 col 2
+ row 1 a b
+ row 2 c d
+
+ Encoding/decoding a Dataframe using ``'records'`` formatted JSON.
+ Note that index labels are not preserved with this encoding.
+
+ >>> df.to_json(orient='records')
+ '[{"col 1":"a","col 2":"b"},{"col 1":"c","col 2":"d"}]'
+ >>> pd.read_json(_, orient='records')
+ col 1 col 2
+ 0 a b
+ 1 c d
"""
filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
@@ -244,7 +289,6 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
class Parser(object):
-
_STAMP_UNITS = ('s', 'ms', 'us', 'ns')
_MIN_STAMPS = {
's': long(31536000),
@@ -445,8 +489,8 @@ def _parse_no_numpy(self):
if orient == "split":
decoded = dict((str(k), v)
for k, v in compat.iteritems(loads(
- json,
- precise_float=self.precise_float)))
+ json,
+ precise_float=self.precise_float)))
self.check_keys_split(decoded)
self.obj = Series(dtype=None, **decoded)
else:
@@ -520,8 +564,8 @@ def _parse_no_numpy(self):
elif orient == "split":
decoded = dict((str(k), v)
for k, v in compat.iteritems(loads(
- json,
- precise_float=self.precise_float)))
+ json,
+ precise_float=self.precise_float)))
self.check_keys_split(decoded)
self.obj = DataFrame(dtype=None, **decoded)
elif orient == "index":
@@ -548,7 +592,6 @@ def _process_converter(self, f, filt=None):
new_obj[i] = c
if needs_new_obj:
-
# possibly handle dup columns
new_obj = DataFrame(new_obj, index=self.obj.index)
new_obj.columns = self.obj.columns
@@ -581,9 +624,9 @@ def is_ok(col):
col_lower = col.lower()
if (col_lower.endswith('_at') or
col_lower.endswith('_time') or
- col_lower == 'modified' or
- col_lower == 'date' or
- col_lower == 'datetime' or
+ col_lower == 'modified' or
+ col_lower == 'date' or
+ col_lower == 'datetime' or
col_lower.startswith('timestamp')):
return True
return False
@@ -593,6 +636,7 @@ def is_ok(col):
lambda col, c: ((self.keep_default_dates and is_ok(col)) or
col in convert_dates))
+
# ---------------------------------------------------------------------
# JSON normalization routines
@@ -676,7 +720,7 @@ def nested_to_record(ds, prefix="", level=0):
def json_normalize(data, record_path=None, meta=None,
meta_prefix=None,
- record_prefix=None):
+ record_prefix=None, errors='raise'):
"""
"Normalize" semi-structured JSON data into a flat table
@@ -693,6 +737,8 @@ def json_normalize(data, record_path=None, meta=None,
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
path to records is ['foo', 'bar']
meta_prefix : string, default None
+ error: {'raise', 'ignore'}, default 'raise'
+ * ignore: will ignore keyErrors if keys listed in meta are not always present
Returns
-------
@@ -728,6 +774,7 @@ def json_normalize(data, record_path=None, meta=None,
4 Cuyahoga 1337 John Kasich Ohio OH
"""
+
def _pull_field(js, spec):
result = js
if isinstance(spec, list):
@@ -792,7 +839,13 @@ def _recursive_extract(data, path, seen_meta, level=0):
if level + 1 > len(val):
meta_val = seen_meta[key]
else:
- meta_val = _pull_field(obj, val[level:])
+ try:
+ meta_val = _pull_field(obj, val[level:])
+ except KeyError as e:
+ if errors == 'ignore':
+ meta_val = np.nan
+ else:
+ raise KeyError("Try running with errors='ignore' as the following key may not always be present: "+str(e))
meta_vals[key].append(meta_val)
records.extend(recs)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index f8cf04e08ab03..090a21632cddb 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1456,6 +1456,8 @@ def __init__(self, src, **kwds):
def close(self):
for f in self.handles:
f.close()
+
+ # close additional handles opened by C parser (for compression)
try:
self._reader.close()
except:
@@ -1759,6 +1761,9 @@ def __init__(self, f, **kwds):
self.delimiter = kwds['delimiter']
self.quotechar = kwds['quotechar']
+ if isinstance(self.quotechar, compat.text_type):
+ self.quotechar = str(self.quotechar)
+
self.escapechar = kwds['escapechar']
self.doublequote = kwds['doublequote']
self.skipinitialspace = kwds['skipinitialspace']
@@ -2191,16 +2196,16 @@ def _handle_usecols(self, columns, usecols_key):
usecols_key is used if there are string usecols.
"""
if self.usecols is not None:
- if any([isinstance(u, string_types) for u in self.usecols]):
+ if any([isinstance(col, string_types) for col in self.usecols]):
if len(columns) > 1:
raise ValueError("If using multiple headers, usecols must "
"be integers.")
col_indices = []
- for u in self.usecols:
- if isinstance(u, string_types):
- col_indices.append(usecols_key.index(u))
+ for col in self.usecols:
+ if isinstance(col, string_types):
+ col_indices.append(usecols_key.index(col))
else:
- col_indices.append(u)
+ col_indices.append(col)
else:
col_indices = self.usecols
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 47642c2e2bc28..c9f8d32e1b504 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -507,10 +507,11 @@ def _engine_builder(con):
if isinstance(con, string_types):
try:
import sqlalchemy
- con = sqlalchemy.create_engine(con)
- return con
except ImportError:
_SQLALCHEMY_INSTALLED = False
+ else:
+ con = sqlalchemy.create_engine(con)
+ return con
return con
diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py
index 4848db97194d9..4877728d9ec52 100644
--- a/pandas/io/tests/json/test_json_norm.py
+++ b/pandas/io/tests/json/test_json_norm.py
@@ -225,6 +225,59 @@ def test_nested_flattens(self):
self.assertEqual(result, expected)
+
+ def test_json_normalise_fix(self):
+ # issue 14505
+ j = {
+ "Trades": [{
+ "general": {
+ "tradeid": 100,
+ "trade_version": 1,
+ "stocks": [{
+
+ "symbol": "AAPL",
+ "name": "Apple",
+ "price": "0"
+
+ }, {
+
+ "symbol": "GOOG",
+ "name": "Google",
+ "price": "0"
+
+ }
+ ]
+ }
+ }, {
+ "general": {
+ "tradeid": 100,
+ "stocks": [{
+
+ "symbol": "AAPL",
+ "name": "Apple",
+ "price": "0"
+
+ }, {
+ "symbol": "GOOG",
+ "name": "Google",
+ "price": "0"
+
+ }
+ ]
+ }
+ }
+ ]
+ }
+ j = json_normalize(data=j['Trades'], record_path=[['general', 'stocks']],
+ meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='ignore')
+ expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
+ 'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
+ 'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
+ 'price': {0: '0', 1: '0', 2: '0', 3: '0'},
+ 'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
+
+ self.assertEqual(j.fillna('').to_dict(), expected)
+
if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
'--pdb-failure', '-s'], exit=False)
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 0219e16391be8..3be02c55ea10a 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -17,8 +17,8 @@
import pandas.util.testing as tm
from pandas import DataFrame, Series, Index, MultiIndex
from pandas import compat
-from pandas.compat import(StringIO, BytesIO, PY3,
- range, lrange, u)
+from pandas.compat import (StringIO, BytesIO, PY3,
+ range, lrange, u)
from pandas.io.common import DtypeWarning, EmptyDataError, URLError
from pandas.io.parsers import TextFileReader, TextParser
@@ -1602,3 +1602,26 @@ def test_internal_eof_byte(self):
expected = pd.DataFrame([["1\x1a", 2]], columns=['a', 'b'])
result = self.read_csv(StringIO(data))
tm.assert_frame_equal(result, expected)
+
+ def test_file_handles(self):
+ # GH 14418 - don't close user provided file handles
+
+ fh = StringIO('a,b\n1,2')
+ self.read_csv(fh)
+ self.assertFalse(fh.closed)
+
+ with open(self.csv1, 'r') as f:
+ self.read_csv(f)
+ self.assertFalse(f.closed)
+
+ # mmap not working with python engine
+ if self.engine != 'python':
+
+ import mmap
+ with open(self.csv1, 'r') as f:
+ m = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+ self.read_csv(m)
+ # closed attribute new in python 3.2
+ if PY3:
+ self.assertFalse(m.closed)
+ m.close()
diff --git a/pandas/io/tests/parser/quoting.py b/pandas/io/tests/parser/quoting.py
index d0f1493be0621..765cec8243a0a 100644
--- a/pandas/io/tests/parser/quoting.py
+++ b/pandas/io/tests/parser/quoting.py
@@ -9,7 +9,7 @@
import pandas.util.testing as tm
from pandas import DataFrame
-from pandas.compat import StringIO
+from pandas.compat import PY3, StringIO, u
class QuotingTests(object):
@@ -138,3 +138,16 @@ def test_double_quote(self):
result = self.read_csv(StringIO(data), quotechar='"',
doublequote=False)
tm.assert_frame_equal(result, expected)
+
+ def test_quotechar_unicode(self):
+ # See gh-14477
+ data = 'a\n1'
+ expected = DataFrame({'a': [1]})
+
+ result = self.read_csv(StringIO(data), quotechar=u('"'))
+ tm.assert_frame_equal(result, expected)
+
+ # Compared to Python 3.x, Python 2.x does not handle unicode well.
+ if PY3:
+ result = self.read_csv(StringIO(data), quotechar=u('\u0394'))
+ tm.assert_frame_equal(result, expected)
diff --git a/pandas/io/tests/parser/skiprows.py b/pandas/io/tests/parser/skiprows.py
index c9f50dec6c01e..9f01adb6fabcb 100644
--- a/pandas/io/tests/parser/skiprows.py
+++ b/pandas/io/tests/parser/skiprows.py
@@ -190,3 +190,11 @@ def test_skiprows_lineterminator(self):
skiprows=1, delim_whitespace=True,
names=['date', 'time', 'var', 'flag', 'oflag'])
tm.assert_frame_equal(df, expected)
+
+ def test_skiprows_infield_quote(self):
+ # see gh-14459
+ data = 'a"\nb"\na\n1'
+ expected = DataFrame({'a': [1]})
+
+ df = self.read_csv(StringIO(data), skiprows=2)
+ tm.assert_frame_equal(df, expected)
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index af8989baabbc0..e9d19bbd8be66 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -944,7 +944,7 @@ def test_sqlalchemy_type_mapping(self):
self.assertTrue(isinstance(
table.table.c['time'].type, sqltypes.DateTime))
- def test_to_sql_read_sql_with_database_uri(self):
+ def test_database_uri_string(self):
# Test read_sql and .to_sql method with a database URI (GH10654)
test_frame1 = self.test_frame1
@@ -963,6 +963,12 @@ def test_to_sql_read_sql_with_database_uri(self):
tm.assert_frame_equal(test_frame1, test_frame3)
tm.assert_frame_equal(test_frame1, test_frame4)
+ # using driver that will not be installed on Travis to trigger error
+ # in sqlalchemy.create_engine -> test passing of this error to user
+ db_uri = "postgresql+pg8000://user:pass@host/dbname"
+ with tm.assertRaisesRegexp(ImportError, "pg8000"):
+ sql.read_sql("select * from table", db_uri)
+
def _make_iris_table_metadata(self):
sa = sqlalchemy
metadata = sa.MetaData()
diff --git a/pandas/msgpack/__init__.py b/pandas/msgpack/__init__.py
index 0c2370df936a4..33d60a12ef0a3 100644
--- a/pandas/msgpack/__init__.py
+++ b/pandas/msgpack/__init__.py
@@ -1,11 +1,10 @@
# coding: utf-8
-# flake8: noqa
-
-from pandas.msgpack._version import version
-from pandas.msgpack.exceptions import *
from collections import namedtuple
+from pandas.msgpack.exceptions import * # noqa
+from pandas.msgpack._version import version # noqa
+
class ExtType(namedtuple('ExtType', 'code data')):
"""ExtType represents ext type in msgpack."""
@@ -18,11 +17,10 @@ def __new__(cls, code, data):
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
+import os # noqa
-import os
-from pandas.msgpack._packer import Packer
-from pandas.msgpack._unpacker import unpack, unpackb, Unpacker
-
+from pandas.msgpack._packer import Packer # noqa
+from pandas.msgpack._unpacker import unpack, unpackb, Unpacker # noqa
def pack(o, stream, **kwargs):
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 12525c7a9c587..93a494c176b99 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -272,7 +272,7 @@ cdef class TextReader:
parser_t *parser
object file_handle, na_fvalues
object true_values, false_values
- object dsource
+ object handle
bint na_filter, verbose, has_usecols, has_mi_columns
int parser_start
list clocks
@@ -554,9 +554,9 @@ cdef class TextReader:
def close(self):
# we need to properly close an open derived
# filehandle here, e.g. and UTFRecoder
- if self.dsource is not None:
+ if self.handle is not None:
try:
- self.dsource.close()
+ self.handle.close()
except:
pass
@@ -570,7 +570,8 @@ cdef class TextReader:
if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
raise TypeError('bad "quoting" value')
- if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
+ if not isinstance(quote_char, (str, compat.text_type,
+ bytes)) and quote_char is not None:
dtype = type(quote_char).__name__
raise TypeError('"quotechar" must be string, '
'not {dtype}'.format(dtype=dtype))
@@ -640,6 +641,7 @@ cdef class TextReader:
else:
raise ValueError('Unrecognized compression type: %s' %
self.compression)
+ self.handle = source
if isinstance(source, basestring):
if not isinstance(source, bytes):
@@ -683,8 +685,6 @@ cdef class TextReader:
raise IOError('Expected file path name or file-like object,'
' got %s type' % type(source))
- self.dsource = source
-
cdef _get_header(self):
# header is now a list of lists, so field_count should use header[0]
diff --git a/pandas/src/parser/tokenizer.c b/pandas/src/parser/tokenizer.c
index af85b7b894d26..748edc7fcacc5 100644
--- a/pandas/src/parser/tokenizer.c
+++ b/pandas/src/parser/tokenizer.c
@@ -478,9 +478,10 @@ static int end_line(parser_t *self) {
}
}
- if (self->state == SKIP_LINE || \
- self->state == QUOTE_IN_SKIP_LINE || \
- self->state == QUOTE_IN_QUOTE_IN_SKIP_LINE
+ if (self->state == START_FIELD_IN_SKIP_LINE || \
+ self->state == IN_FIELD_IN_SKIP_LINE || \
+ self->state == IN_QUOTED_FIELD_IN_SKIP_LINE || \
+ self->state == QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE
) {
TRACE(("end_line: Skipping row %d\n", self->file_lines));
// increment file line count
@@ -761,38 +762,54 @@ int tokenize_bytes(parser_t *self, size_t line_limit)
switch(self->state) {
- case SKIP_LINE:
- TRACE(("tokenize_bytes SKIP_LINE 0x%x, state %d\n", c, self->state));
+ case START_FIELD_IN_SKIP_LINE:
if (IS_TERMINATOR(c)) {
END_LINE();
} else if (IS_CARRIAGE(c)) {
self->file_lines++;
self->state = EAT_CRNL_NOP;
} else if (IS_QUOTE(c)) {
- self->state = QUOTE_IN_SKIP_LINE;
+ self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
+ } else if (IS_DELIMITER(c)) {
+ // Do nothing, we're starting a new field again.
+ } else {
+ self->state = IN_FIELD_IN_SKIP_LINE;
+ }
+ break;
+
+ case IN_FIELD_IN_SKIP_LINE:
+ if (IS_TERMINATOR(c)) {
+ END_LINE();
+ } else if (IS_CARRIAGE(c)) {
+ self->file_lines++;
+ self->state = EAT_CRNL_NOP;
+ } else if (IS_DELIMITER(c)) {
+ self->state = START_FIELD_IN_SKIP_LINE;
}
break;
- case QUOTE_IN_SKIP_LINE:
+ case IN_QUOTED_FIELD_IN_SKIP_LINE:
if (IS_QUOTE(c)) {
if (self->doublequote) {
- self->state = QUOTE_IN_QUOTE_IN_SKIP_LINE;
+ self->state = QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE;
} else {
- self->state = SKIP_LINE;
+ self->state = IN_FIELD_IN_SKIP_LINE;
}
}
break;
- case QUOTE_IN_QUOTE_IN_SKIP_LINE:
+ case QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE:
if (IS_QUOTE(c)) {
- self->state = QUOTE_IN_SKIP_LINE;
+ self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
} else if (IS_TERMINATOR(c)) {
END_LINE();
} else if (IS_CARRIAGE(c)) {
self->file_lines++;
self->state = EAT_CRNL_NOP;
+ } else if (IS_DELIMITER(c)) {
+ self->state = START_FIELD_IN_SKIP_LINE;
} else {
- self->state = SKIP_LINE;
+ self->state = IN_FIELD_IN_SKIP_LINE;
}
break;
@@ -846,9 +863,9 @@ int tokenize_bytes(parser_t *self, size_t line_limit)
// start of record
if (skip_this_line(self, self->file_lines)) {
if (IS_QUOTE(c)) {
- self->state = QUOTE_IN_SKIP_LINE;
+ self->state = IN_QUOTED_FIELD_IN_SKIP_LINE;
} else {
- self->state = SKIP_LINE;
+ self->state = IN_FIELD_IN_SKIP_LINE;
if (IS_TERMINATOR(c)) {
END_LINE();
diff --git a/pandas/src/parser/tokenizer.h b/pandas/src/parser/tokenizer.h
index 8f7ae436bb7b7..487c1265d9358 100644
--- a/pandas/src/parser/tokenizer.h
+++ b/pandas/src/parser/tokenizer.h
@@ -123,9 +123,10 @@ typedef enum {
EAT_COMMENT,
EAT_LINE_COMMENT,
WHITESPACE_LINE,
- SKIP_LINE,
- QUOTE_IN_SKIP_LINE,
- QUOTE_IN_QUOTE_IN_SKIP_LINE,
+ START_FIELD_IN_SKIP_LINE,
+ IN_FIELD_IN_SKIP_LINE,
+ IN_QUOTED_FIELD_IN_SKIP_LINE,
+ QUOTE_IN_QUOTED_FIELD_IN_SKIP_LINE,
FINISHED
} ParserState;
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index d21db5ba52a45..e55ba3e161ed9 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -259,6 +259,14 @@ def test_constructor_dict(self):
frame = DataFrame({'A': [], 'B': []}, columns=['A', 'B'])
self.assert_index_equal(frame.index, Index([], dtype=np.int64))
+ # GH 14381
+ # Dict with None value
+ frame_none = DataFrame(dict(a=None), index=[0])
+ frame_none_list = DataFrame(dict(a=[None]), index=[0])
+ tm.assert_equal(frame_none.get_value(0, 'a'), None)
+ tm.assert_equal(frame_none_list.get_value(0, 'a'), None)
+ tm.assert_frame_equal(frame_none, frame_none_list)
+
# GH10856
# dict with scalar values should raise error, even if columns passed
with tm.assertRaises(ValueError):
diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py
index 52e8697abe850..22414a6ba8a53 100644
--- a/pandas/tests/frame/test_quantile.py
+++ b/pandas/tests/frame/test_quantile.py
@@ -262,6 +262,11 @@ def test_quantile_datetime(self):
index=[0.5], columns=[0, 1])
assert_frame_equal(result, expected)
+ # empty when numeric_only=True
+ # FIXME (gives empty frame in 0.18.1, broken in 0.19.0)
+ # result = df[['a', 'c']].quantile(.5)
+ # result = df[['a', 'c']].quantile([.5])
+
def test_quantile_invalid(self):
msg = 'percentiles should all be in the interval \\[0, 1\\]'
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
@@ -340,3 +345,95 @@ def test_quantile_box(self):
pd.Timedelta('2 days')]],
index=[0.5], columns=list('AaBbCc'))
tm.assert_frame_equal(res, exp)
+
+ def test_quantile_nan(self):
+
+ # GH 14357 - float block where some cols have missing values
+ df = DataFrame({'a': np.arange(1, 6.0), 'b': np.arange(1, 6.0)})
+ df.iloc[-1, 1] = np.nan
+
+ res = df.quantile(0.5)
+ exp = Series([3.0, 2.5], index=['a', 'b'], name=0.5)
+ tm.assert_series_equal(res, exp)
+
+ res = df.quantile([0.5, 0.75])
+ exp = DataFrame({'a': [3.0, 4.0], 'b': [2.5, 3.25]}, index=[0.5, 0.75])
+ tm.assert_frame_equal(res, exp)
+
+ res = df.quantile(0.5, axis=1)
+ exp = Series(np.arange(1.0, 6.0), name=0.5)
+ tm.assert_series_equal(res, exp)
+
+ res = df.quantile([0.5, 0.75], axis=1)
+ exp = DataFrame([np.arange(1.0, 6.0)] * 2, index=[0.5, 0.75])
+ tm.assert_frame_equal(res, exp)
+
+ # full-nan column
+ df['b'] = np.nan
+
+ res = df.quantile(0.5)
+ exp = Series([3.0, np.nan], index=['a', 'b'], name=0.5)
+ tm.assert_series_equal(res, exp)
+
+ res = df.quantile([0.5, 0.75])
+ exp = DataFrame({'a': [3.0, 4.0], 'b': [np.nan, np.nan]},
+ index=[0.5, 0.75])
+ tm.assert_frame_equal(res, exp)
+
+ def test_quantile_nat(self):
+
+ # full NaT column
+ df = DataFrame({'a': [pd.NaT, pd.NaT, pd.NaT]})
+
+ res = df.quantile(0.5, numeric_only=False)
+ exp = Series([pd.NaT], index=['a'], name=0.5)
+ tm.assert_series_equal(res, exp)
+
+ res = df.quantile([0.5], numeric_only=False)
+ exp = DataFrame({'a': [pd.NaT]}, index=[0.5])
+ tm.assert_frame_equal(res, exp)
+
+ # mixed non-null / full null column
+ df = DataFrame({'a': [pd.Timestamp('2012-01-01'),
+ pd.Timestamp('2012-01-02'),
+ pd.Timestamp('2012-01-03')],
+ 'b': [pd.NaT, pd.NaT, pd.NaT]})
+
+ res = df.quantile(0.5, numeric_only=False)
+ exp = Series([pd.Timestamp('2012-01-02'), pd.NaT], index=['a', 'b'],
+ name=0.5)
+ tm.assert_series_equal(res, exp)
+
+ res = df.quantile([0.5], numeric_only=False)
+ exp = DataFrame([[pd.Timestamp('2012-01-02'), pd.NaT]], index=[0.5],
+ columns=['a', 'b'])
+ tm.assert_frame_equal(res, exp)
+
+ def test_quantile_empty(self):
+
+ # floats
+ df = DataFrame(columns=['a', 'b'], dtype='float64')
+
+ res = df.quantile(0.5)
+ exp = Series([np.nan, np.nan], index=['a', 'b'], name=0.5)
+ tm.assert_series_equal(res, exp)
+
+ res = df.quantile([0.5])
+ exp = DataFrame([[np.nan, np.nan]], columns=['a', 'b'], index=[0.5])
+ tm.assert_frame_equal(res, exp)
+
+ # FIXME (gives empty frame in 0.18.1, broken in 0.19.0)
+ # res = df.quantile(0.5, axis=1)
+ # res = df.quantile([0.5], axis=1)
+
+ # ints
+ df = DataFrame(columns=['a', 'b'], dtype='int64')
+
+ # FIXME (gives empty frame in 0.18.1, broken in 0.19.0)
+ # res = df.quantile(0.5)
+
+ # datetimes
+ df = DataFrame(columns=['a', 'b'], dtype='datetime64')
+
+ # FIXME (gives NaNs instead of NaT in 0.18.1 or 0.19.0)
+ # res = df.quantile(0.5, numeric_only=False)
diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py
index 5e5e9abda1200..12cd62f8b4cc0 100644
--- a/pandas/tests/frame/test_repr_info.py
+++ b/pandas/tests/frame/test_repr_info.py
@@ -405,3 +405,11 @@ def memory_usage(f):
# high upper bound
self.assertTrue(memory_usage(unstacked) - memory_usage(df) < 2000)
+
+ def test_info_categorical(self):
+ # GH14298
+ idx = pd.CategoricalIndex(['a', 'b'])
+ df = pd.DataFrame(np.zeros((2, 2)), index=idx, columns=idx)
+
+ buf = StringIO()
+ df.info(buf=buf)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 21471b1883209..b839ed6331457 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1576,11 +1576,10 @@ def test_string_index_repr(self):
# py3/py2 repr can differ because of "u" prefix
# which also affects to displayed element size
- # suppress flake8 warnings
if PY3:
coerce = lambda x: x
else:
- coerce = unicode
+ coerce = unicode # noqa
# short
idx = pd.Index(['a', 'bb', 'ccc'])
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 9f8405bcc2e1e..c76f5ff22c534 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -278,6 +278,11 @@ def test_append(self):
# invalid objects
self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd'])))
+ # GH14298 - if base object is not categorical -> coerce to object
+ result = Index(['c', 'a']).append(ci)
+ expected = Index(list('caaabbca'))
+ tm.assert_index_equal(result, expected, exact=True)
+
def test_insert(self):
ci = self.create_index()
diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py
index b04e840ffc849..68db163be6fde 100644
--- a/pandas/tests/indexes/test_datetimelike.py
+++ b/pandas/tests/indexes/test_datetimelike.py
@@ -732,30 +732,21 @@ def test_fillna_datetime64(self):
dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)
- def test_difference_of_union(self):
- # GH14323: Test taking the union of differences of an Index.
- # Difference of DatetimeIndex does not preserve frequency,
- # so a differencing operation should not retain the freq field of the
- # original index.
- i = pd.date_range("20160920", "20160925", freq="D")
-
- a = pd.date_range("20160921", "20160924", freq="D")
- expected = pd.DatetimeIndex(["20160920", "20160925"], freq=None)
- a_diff = i.difference(a)
- tm.assert_index_equal(a_diff, expected)
- tm.assert_attr_equal('freq', a_diff, expected)
-
- b = pd.date_range("20160922", "20160925", freq="D")
- b_diff = i.difference(b)
- expected = pd.DatetimeIndex(["20160920", "20160921"], freq=None)
- tm.assert_index_equal(b_diff, expected)
- tm.assert_attr_equal('freq', b_diff, expected)
-
- union_of_diff = a_diff.union(b_diff)
- expected = pd.DatetimeIndex(["20160920", "20160921", "20160925"],
- freq=None)
- tm.assert_index_equal(union_of_diff, expected)
- tm.assert_attr_equal('freq', union_of_diff, expected)
+ def test_difference_freq(self):
+ # GH14323: difference of DatetimeIndex should not preserve frequency
+
+ index = date_range("20160920", "20160925", freq="D")
+ other = date_range("20160921", "20160924", freq="D")
+ expected = DatetimeIndex(["20160920", "20160925"], freq=None)
+ idx_diff = index.difference(other)
+ tm.assert_index_equal(idx_diff, expected)
+ tm.assert_attr_equal('freq', idx_diff, expected)
+
+ other = date_range("20160922", "20160925", freq="D")
+ idx_diff = index.difference(other)
+ expected = DatetimeIndex(["20160920", "20160921"], freq=None)
+ tm.assert_index_equal(idx_diff, expected)
+ tm.assert_attr_equal('freq', idx_diff, expected)
class TestPeriodIndex(DatetimeLike, tm.TestCase):
@@ -963,29 +954,23 @@ def test_no_millisecond_field(self):
with self.assertRaises(AttributeError):
DatetimeIndex([]).millisecond
- def test_difference_of_union(self):
- # GH14323: Test taking the union of differences of an Index.
- # Difference of Period MUST preserve frequency, but the ability
- # to union results must be preserved
- i = pd.period_range("20160920", "20160925", freq="D")
-
- a = pd.period_range("20160921", "20160924", freq="D")
- expected = pd.PeriodIndex(["20160920", "20160925"], freq='D')
- a_diff = i.difference(a)
- tm.assert_index_equal(a_diff, expected)
- tm.assert_attr_equal('freq', a_diff, expected)
-
- b = pd.period_range("20160922", "20160925", freq="D")
- b_diff = i.difference(b)
- expected = pd.PeriodIndex(["20160920", "20160921"], freq='D')
- tm.assert_index_equal(b_diff, expected)
- tm.assert_attr_equal('freq', b_diff, expected)
-
- union_of_diff = a_diff.union(b_diff)
- expected = pd.PeriodIndex(["20160920", "20160921", "20160925"],
- freq='D')
- tm.assert_index_equal(union_of_diff, expected)
- tm.assert_attr_equal('freq', union_of_diff, expected)
+ def test_difference_freq(self):
+ # GH14323: difference of Period MUST preserve frequency
+ # but the ability to union results must be preserved
+
+ index = period_range("20160920", "20160925", freq="D")
+
+ other = period_range("20160921", "20160924", freq="D")
+ expected = PeriodIndex(["20160920", "20160925"], freq='D')
+ idx_diff = index.difference(other)
+ tm.assert_index_equal(idx_diff, expected)
+ tm.assert_attr_equal('freq', idx_diff, expected)
+
+ other = period_range("20160922", "20160925", freq="D")
+ idx_diff = index.difference(other)
+ expected = PeriodIndex(["20160920", "20160921"], freq='D')
+ tm.assert_index_equal(idx_diff, expected)
+ tm.assert_attr_equal('freq', idx_diff, expected)
class TestTimedeltaIndex(DatetimeLike, tm.TestCase):
@@ -1199,27 +1184,19 @@ def test_fillna_timedelta(self):
[pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object)
self.assert_index_equal(idx.fillna('x'), exp)
- def test_difference_of_union(self):
- # GH14323: Test taking the union of differences of an Index.
- # Difference of TimedeltaIndex does not preserve frequency,
- # so a differencing operation should not retain the freq field of the
- # original index.
- i = pd.timedelta_range("0 days", "5 days", freq="D")
-
- a = pd.timedelta_range("1 days", "4 days", freq="D")
- expected = pd.TimedeltaIndex(["0 days", "5 days"], freq=None)
- a_diff = i.difference(a)
- tm.assert_index_equal(a_diff, expected)
- tm.assert_attr_equal('freq', a_diff, expected)
-
- b = pd.timedelta_range("2 days", "5 days", freq="D")
- b_diff = i.difference(b)
- expected = pd.TimedeltaIndex(["0 days", "1 days"], freq=None)
- tm.assert_index_equal(b_diff, expected)
- tm.assert_attr_equal('freq', b_diff, expected)
-
- union_of_difference = a_diff.union(b_diff)
- expected = pd.TimedeltaIndex(["0 days", "1 days", "5 days"],
- freq=None)
- tm.assert_index_equal(union_of_difference, expected)
- tm.assert_attr_equal('freq', union_of_difference, expected)
+ def test_difference_freq(self):
+ # GH14323: Difference of TimedeltaIndex should not preserve frequency
+
+ index = timedelta_range("0 days", "5 days", freq="D")
+
+ other = timedelta_range("1 days", "4 days", freq="D")
+ expected = TimedeltaIndex(["0 days", "5 days"], freq=None)
+ idx_diff = index.difference(other)
+ tm.assert_index_equal(idx_diff, expected)
+ tm.assert_attr_equal('freq', idx_diff, expected)
+
+ other = timedelta_range("2 days", "5 days", freq="D")
+ idx_diff = index.difference(other)
+ expected = TimedeltaIndex(["0 days", "1 days"], freq=None)
+ tm.assert_index_equal(idx_diff, expected)
+ tm.assert_attr_equal('freq', idx_diff, expected)
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 26d50aa55431f..38e715fce2720 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -29,12 +29,7 @@ def setUp(self):
def create_index(self):
return RangeIndex(5)
- def test_binops(self):
- ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
- operator.truediv, pow]
- scalars = [-1, 1, 2]
- idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2),
- RangeIndex(-10, 10, 2), RangeIndex(5, -5, -1)]
+ def check_binop(self, ops, scalars, idxs):
for op in ops:
for a, b in combinations(idxs, 2):
result = op(a, b)
@@ -46,6 +41,23 @@ def test_binops(self):
expected = op(Int64Index(idx), scalar)
tm.assert_index_equal(result, expected)
+ def test_binops(self):
+ ops = [operator.add, operator.sub, operator.mul, operator.floordiv,
+ operator.truediv]
+ scalars = [-1, 1, 2]
+ idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2),
+ RangeIndex(-10, 10, 2), RangeIndex(5, -5, -1)]
+ self.check_binop(ops, scalars, idxs)
+
+ def test_binops_pow(self):
+ # later versions of numpy don't allow powers of negative integers
+ # so test separately
+ # https://github.com/numpy/numpy/pull/8127
+ ops = [pow]
+ scalars = [1, 2]
+ idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2)]
+ self.check_binop(ops, scalars, idxs)
+
def test_too_many_names(self):
def testit():
self.index.names = ["roger", "harold"]
diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py
index 7d2517987e526..76db6c90a685f 100644
--- a/pandas/tests/series/test_quantile.py
+++ b/pandas/tests/series/test_quantile.py
@@ -184,3 +184,35 @@ def test_quantile_nat(self):
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
+
+ def test_quantile_empty(self):
+
+ # floats
+ s = Series([], dtype='float64')
+
+ res = s.quantile(0.5)
+ self.assertTrue(np.isnan(res))
+
+ res = s.quantile([0.5])
+ exp = Series([np.nan], index=[0.5])
+ tm.assert_series_equal(res, exp)
+
+ # int
+ s = Series([], dtype='int64')
+
+ res = s.quantile(0.5)
+ self.assertTrue(np.isnan(res))
+
+ res = s.quantile([0.5])
+ exp = Series([np.nan], index=[0.5])
+ tm.assert_series_equal(res, exp)
+
+ # datetime
+ s = Series([], dtype='datetime64[ns]')
+
+ res = s.quantile(0.5)
+ self.assertTrue(res is pd.NaT)
+
+ res = s.quantile([0.5])
+ exp = Series([pd.NaT], index=[0.5])
+ tm.assert_series_equal(res, exp)
diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py
index 96213a4aec34d..4645ae24684ff 100644
--- a/pandas/tseries/base.py
+++ b/pandas/tseries/base.py
@@ -100,7 +100,7 @@ def round(self, freq, *args, **kwargs):
def floor(self, freq):
return self._round(freq, np.floor)
- @Appender(_round_doc % "floor")
+ @Appender(_round_doc % "ceil")
def ceil(self, freq):
return self._round(freq, np.ceil)
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
index f68750e242f1f..70e2d2c121773 100644
--- a/pandas/tseries/index.py
+++ b/pandas/tseries/index.py
@@ -1453,8 +1453,9 @@ def _maybe_cast_slice_bound(self, label, side, kind):
# lower, upper form the half-open interval:
# [parsed, parsed + 1 freq)
# because label may be passed to searchsorted
- # the bounds need swapped if index is reverse sorted
- if self.is_monotonic_decreasing:
+ # the bounds need swapped if index is reverse sorted and has a
+ # length (is_monotonic_decreasing gives True for empty index)
+ if self.is_monotonic_decreasing and len(self):
return upper if side == 'left' else lower
return lower if side == 'left' else upper
else:
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
index c13805d383e5d..aa8a5d10cd9d3 100644
--- a/pandas/tseries/tests/test_timeseries.py
+++ b/pandas/tseries/tests/test_timeseries.py
@@ -3911,6 +3911,18 @@ def test_slice_with_zero_step_raises(self):
self.assertRaisesRegexp(ValueError, 'slice step cannot be zero',
lambda: ts.ix[::0])
+ def test_slice_bounds_empty(self):
+ # GH 14354
+ empty_idx = DatetimeIndex(freq='1H', periods=0, end='2015')
+
+ right = empty_idx._maybe_cast_slice_bound('2015-01-02', 'right', 'loc')
+ exp = Timestamp('2015-01-02 23:59:59.999999999')
+ self.assertEqual(right, exp)
+
+ left = empty_idx._maybe_cast_slice_bound('2015-01-02', 'left', 'loc')
+ exp = Timestamp('2015-01-02 00:00:00')
+ self.assertEqual(left, exp)
+
class TestDatetime64(tm.TestCase):
"""
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 57bb01e5e0406..05517bf6cf53a 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -35,7 +35,7 @@
from pandas.core.algorithms import take_1d
import pandas.compat as compat
-from pandas.compat import(
+from pandas.compat import (
filter, map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter,
raise_with_traceback, httplib, is_platform_windows, is_platform_32bit,
PY3
diff --git a/setup.py b/setup.py
index 3f8667cd6fe42..a17dd502d7706 100755
--- a/setup.py
+++ b/setup.py
@@ -85,7 +85,11 @@ def is_platform_mac():
try:
if not _CYTHON_INSTALLED:
raise ImportError('No supported version of Cython installed.')
- from Cython.Distutils import build_ext as _build_ext
+ try:
+ from Cython.Distutils.old_build_ext import old_build_ext as _build_ext
+ except ImportError:
+ # Pre 0.25
+ from Cython.Distutils import build_ext as _build_ext
cython = True
except ImportError:
cython = False