Skip to content

Commit 27df31a

Browse files
authored
Merge branch 'master' into period_immutable
2 parents b86a7b9 + 4e9c0d1 commit 27df31a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+2310
-1935
lines changed

appveyor.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ install:
7272
- cmd: conda info -a
7373

7474
# create our env
75-
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest pytest-xdist
75+
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
7676
- cmd: activate pandas
7777
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
7878
- cmd: echo "installing requirements from %REQ%"

asv_bench/benchmarks/timeseries.py

+14
Original file line numberDiff line numberDiff line change
@@ -510,3 +510,17 @@ def time_begin_incr_rng(self):
510510

511511
def time_begin_decr_rng(self):
512512
self.rng - self.semi_month_begin
513+
514+
515+
class DatetimeAccessor(object):
516+
def setup(self):
517+
self.N = 100000
518+
self.series = pd.Series(
519+
pd.date_range(start='1/1/2000', periods=self.N, freq='T')
520+
)
521+
522+
def time_dt_accessor(self):
523+
self.series.dt
524+
525+
def time_dt_accessor_normalize(self):
526+
self.series.dt.normalize()

ci/install_circle.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ fi
6464
# create envbuild deps
6565
echo "[create env: ${REQ_BUILD}]"
6666
time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
67-
time conda install -n pandas pytest || exit 1
67+
time conda install -n pandas pytest>=3.1.0 || exit 1
6868

6969
source activate pandas
7070

ci/install_travis.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ if [ -e ${REQ} ]; then
103103
time bash $REQ || exit 1
104104
fi
105105

106-
time conda install -n pandas pytest
106+
time conda install -n pandas pytest>=3.1.0
107107
time pip install pytest-xdist
108108

109109
if [ "$LINT" ]; then

ci/requirements_all.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
pytest
1+
pytest>=3.1.0
22
pytest-cov
33
pytest-xdist
44
flake8

ci/requirements_dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ python-dateutil
22
pytz
33
numpy
44
cython
5-
pytest
5+
pytest>=3.1.0
66
pytest-cov
77
flake8

doc/source/contributing.rst

+20-4
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,10 @@ Like many packages, *pandas* uses `pytest
598598
extensions in `numpy.testing
599599
<http://docs.scipy.org/doc/numpy/reference/routines.testing.html>`_.
600600

601+
.. note::
602+
603+
The earliest supported pytest version is 3.1.0.
604+
601605
Writing tests
602606
~~~~~~~~~~~~~
603607

@@ -654,7 +658,9 @@ Using ``pytest``
654658
Here is an example of a self-contained set of tests that illustrate multiple features that we like to use.
655659

656660
- functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters
661+
- ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``.
657662
- using ``parametrize``: allow testing of multiple cases
663+
- to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used
658664
- ``fixture``, code for object construction, on a per-test basis
659665
- using bare ``assert`` for scalars and truth-testing
660666
- ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons.
@@ -673,6 +679,13 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
673679
def test_dtypes(dtype):
674680
assert str(np.dtype(dtype)) == dtype
675681
682+
@pytest.mark.parametrize('dtype', ['float32',
683+
pytest.param('int16', marks=pytest.mark.skip),
684+
pytest.param('int32',
685+
marks=pytest.mark.xfail(reason='to show how it works'))])
686+
def test_mark(dtype):
687+
assert str(np.dtype(dtype)) == 'float32'
688+
676689
@pytest.fixture
677690
def series():
678691
return pd.Series([1, 2, 3])
@@ -695,13 +708,16 @@ A test run of this yields
695708
696709
((pandas) bash-3.2$ pytest test_cool_feature.py -v
697710
=========================== test session starts ===========================
698-
platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0
699-
collected 8 items
711+
platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0
712+
collected 11 items
700713
701714
tester.py::test_dtypes[int8] PASSED
702715
tester.py::test_dtypes[int16] PASSED
703716
tester.py::test_dtypes[int32] PASSED
704717
tester.py::test_dtypes[int64] PASSED
718+
tester.py::test_mark[float32] PASSED
719+
tester.py::test_mark[int16] SKIPPED
720+
tester.py::test_mark[int32] xfail
705721
tester.py::test_series[int8] PASSED
706722
tester.py::test_series[int16] PASSED
707723
tester.py::test_series[int32] PASSED
@@ -714,8 +730,8 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
714730
715731
((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8
716732
=========================== test session starts ===========================
717-
platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0
718-
collected 8 items
733+
platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0
734+
collected 11 items
719735
720736
test_cool_feature.py::test_dtypes[int8] PASSED
721737
test_cool_feature.py::test_series[int8] PASSED

doc/source/io.rst

+7
Original file line numberDiff line numberDiff line change
@@ -2020,6 +2020,13 @@ into a flat table.
20202020
.. ipython:: python
20212021
20222022
from pandas.io.json import json_normalize
2023+
data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
2024+
{'name': {'given': 'Mose', 'family': 'Regner'}},
2025+
{'id': 2, 'name': 'Faye Raker'}]
2026+
json_normalize(data)
2027+
2028+
.. ipython:: python
2029+
20232030
data = [{'state': 'Florida',
20242031
'shortname': 'FL',
20252032
'info': {

doc/source/whatsnew/v0.21.0.txt

+12-2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ Other Enhancements
126126
- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
127127
- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
128128
- :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
129+
- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
130+
- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
129131

130132
.. _whatsnew_0210.api_breaking:
131133

@@ -275,7 +277,7 @@ Other API Changes
275277
- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
276278
- Moved definition of ``MergeError`` to the ``pandas.errors`` module.
277279
- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`)
278-
280+
- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`)
279281

280282
.. _whatsnew_0210.deprecations:
281283

@@ -306,6 +308,7 @@ Performance Improvements
306308
~~~~~~~~~~~~~~~~~~~~~~~~
307309

308310
- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`)
311+
- :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`)
309312

310313

311314
.. _whatsnew_0210.bug_fixes:
@@ -317,8 +320,11 @@ Bug Fixes
317320
Conversion
318321
^^^^^^^^^^
319322

320-
- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
323+
- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
321324
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
325+
- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`)
326+
- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`)
327+
- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
322328

323329

324330
Indexing
@@ -379,6 +385,7 @@ Reshaping
379385
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
380386
- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`)
381387
- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
388+
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
382389

383390
Numeric
384391
^^^^^^^
@@ -388,6 +395,9 @@ Numeric
388395
Categorical
389396
^^^^^^^^^^^
390397
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
398+
- Bug in the categorical constructor with empty values and categories causing
399+
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
400+
``Index`` with object dtype (:issue:`17248`)
391401

392402

393403
Other

pandas/_libs/hashtable.pyx

+21-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,27 @@
22

33
from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check
44

5-
from khash cimport *
5+
from khash cimport (
6+
khiter_t,
7+
8+
kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
9+
kh_get_str, kh_destroy_str, kh_resize_str,
10+
11+
kh_put_strbox, kh_get_strbox, kh_init_strbox,
12+
13+
kh_int64_t, kh_init_int64, kh_resize_int64, kh_destroy_int64,
14+
kh_get_int64, kh_exist_int64, kh_put_int64,
15+
16+
kh_float64_t, kh_exist_float64, kh_put_float64, kh_init_float64,
17+
kh_get_float64, kh_destroy_float64, kh_resize_float64,
18+
19+
kh_resize_uint64, kh_exist_uint64, kh_destroy_uint64, kh_put_uint64,
20+
kh_get_uint64, kh_init_uint64,
21+
22+
kh_destroy_pymap, kh_exist_pymap, kh_init_pymap, kh_get_pymap,
23+
kh_put_pymap, kh_resize_pymap)
24+
25+
626
from numpy cimport *
727

828
from libc.stdlib cimport malloc, free

pandas/_libs/index.pyx

+7-11
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# cython: profile=False
22

3-
from numpy cimport ndarray
4-
5-
from numpy cimport (float64_t, int32_t, int64_t, uint8_t,
3+
from numpy cimport (ndarray, float64_t, int32_t, int64_t, uint8_t, uint64_t,
64
NPY_DATETIME, NPY_TIMEDELTA)
75
cimport cython
86

@@ -16,7 +14,9 @@ cimport util
1614
import numpy as np
1715

1816
cimport tslib
19-
from hashtable cimport *
17+
18+
from hashtable cimport HashTable
19+
2020
from pandas._libs import tslib, algos, hashtable as _hash
2121
from pandas._libs.tslib import Timestamp, Timedelta
2222
from datetime import datetime, timedelta
@@ -32,13 +32,9 @@ cdef extern from "datetime.h":
3232

3333
cdef int64_t iNaT = util.get_nat()
3434

35-
try:
36-
from dateutil.tz import tzutc as _du_utc
37-
import pytz
38-
UTC = pytz.utc
39-
have_pytz = True
40-
except ImportError:
41-
have_pytz = False
35+
from dateutil.tz import tzutc as _du_utc
36+
import pytz
37+
UTC = pytz.utc
4238

4339
PyDateTime_IMPORT
4440

pandas/_libs/join_func_helper.pxi.in

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
99
# asof_join_by
1010
#----------------------------------------------------------------------
1111

12+
from hashtable cimport PyObjectHashTable, UInt64HashTable, Int64HashTable
13+
1214
{{py:
1315

1416
# table_type, by_dtype
@@ -23,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
2325
}}
2426

2527

26-
from hashtable cimport *
2728

2829
{{for table_type, by_dtype in by_dtypes}}
2930
{{for on_dtype in on_dtypes}}

pandas/_libs/lib.pyx

+14-10
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,14 @@ from numpy cimport *
1010

1111
np.import_array()
1212

13-
cdef extern from "numpy/arrayobject.h":
14-
cdef enum NPY_TYPES:
15-
NPY_intp "NPY_INTP"
16-
1713
from libc.stdlib cimport malloc, free
1814

19-
from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
20-
PyDict_Contains, PyDict_Keys,
21-
Py_INCREF, PyTuple_SET_ITEM,
15+
from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
2216
PyList_Check, PyFloat_Check,
2317
PyString_Check,
2418
PyBytes_Check,
25-
PyTuple_SetItem,
19+
PyUnicode_Check,
2620
PyTuple_New,
27-
PyObject_SetAttrString,
2821
PyObject_RichCompareBool,
2922
PyBytes_GET_SIZE,
3023
PyUnicode_GET_SIZE,
@@ -55,7 +48,18 @@ cdef double NAN = nan
5548
from datetime import datetime as pydatetime
5649

5750
# this is our tseries.pxd
58-
from datetime cimport *
51+
from datetime cimport (
52+
get_timedelta64_value, get_datetime64_value,
53+
npy_timedelta, npy_datetime,
54+
PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check,
55+
PyDateTime_GET_YEAR,
56+
PyDateTime_GET_MONTH,
57+
PyDateTime_GET_DAY,
58+
PyDateTime_DATE_GET_HOUR,
59+
PyDateTime_DATE_GET_MINUTE,
60+
PyDateTime_DATE_GET_SECOND,
61+
PyDateTime_IMPORT)
62+
5963

6064
from tslib cimport (convert_to_tsobject, convert_to_timedelta64,
6165
_check_all_nulls)

pandas/_libs/parsers.pyx

+9-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ cdef extern from "stdlib.h":
3232
cimport cython
3333
cimport numpy as cnp
3434

35-
from numpy cimport ndarray, uint8_t, uint64_t
35+
from numpy cimport ndarray, uint8_t, uint64_t, int64_t
3636

3737
import numpy as np
3838
cimport util
@@ -57,7 +57,14 @@ import os
5757

5858
cnp.import_array()
5959

60-
from khash cimport *
60+
from khash cimport (
61+
khiter_t,
62+
kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
63+
kh_get_str, kh_destroy_str,
64+
kh_float64_t, kh_get_float64, kh_destroy_float64,
65+
kh_put_float64, kh_init_float64,
66+
kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox,
67+
kh_destroy_strbox)
6168

6269
import sys
6370

pandas/_libs/period.pyx

+13-7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ from datetime import datetime, date, timedelta
22
import operator
33

44
from cpython cimport (
5+
PyUnicode_Check,
56
PyObject_RichCompareBool,
67
Py_EQ, Py_NE)
78

@@ -18,21 +19,29 @@ from pandas import compat
1819
from pandas.compat import PY2
1920

2021
cimport cython
21-
from datetime cimport *
22+
23+
from datetime cimport (
24+
is_leapyear,
25+
PyDateTime_IMPORT,
26+
pandas_datetimestruct,
27+
pandas_datetimestruct_to_datetime,
28+
pandas_datetime_to_datetimestruct,
29+
PANDAS_FR_ns,
30+
INT32_MIN)
31+
2232

2333
cimport util, lib
2434

2535
from lib cimport is_null_datetimelike, is_period
2636
from pandas._libs import tslib, lib
2737
from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
28-
NaT, have_pytz, _get_utcoffset)
38+
NaT, _get_utcoffset)
2939
from tslib cimport (
3040
maybe_get_tz,
3141
_is_utc,
3242
_is_tzlocal,
3343
_get_dst_info,
34-
_nat_scalar_rules,
35-
)
44+
_nat_scalar_rules)
3645

3746
from pandas.tseries import offsets
3847
from pandas.core.tools.datetimes import parse_time_string
@@ -611,9 +620,6 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
611620
ndarray[int64_t] trans, deltas, pos
612621
pandas_datetimestruct dts
613622

614-
if not have_pytz:
615-
raise Exception('Could not find pytz module')
616-
617623
if _is_utc(tz):
618624
for i in range(n):
619625
if stamps[i] == NPY_NAT:

0 commit comments

Comments
 (0)