Skip to content

Commit 0b9f273

Browse files
committed
Merge remote-tracking branch 'upstream/master' into windows_crlf
2 parents aa55c9a + 996f361 commit 0b9f273

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+1687
-1158
lines changed

.coveragerc

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ exclude_lines =
1717
# Don't complain if tests don't hit defensive assertion code:
1818
raise AssertionError
1919
raise NotImplementedError
20+
AbstractMethodError
2021

2122
# Don't complain if non-runnable code isn't run:
2223
if 0:

ci/appveyor-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- matplotlib
1414
- numexpr
1515
- numpy=1.12*
16-
- openpyxl
16+
- openpyxl=2.5.5
1717
- pytables
1818
- python=2.7.*
1919
- pytz

ci/appveyor-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies:
1010
- matplotlib
1111
- numexpr
1212
- numpy=1.14*
13-
- openpyxl
13+
- openpyxl=2.5.5
1414
- pyarrow
1515
- pytables
1616
- python-dateutil

ci/circle-27-compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ dependencies:
88
- jinja2=2.8
99
- numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
1010
- numpy=1.9.3
11-
- openpyxl
11+
- openpyxl=2.5.5
1212
- psycopg2
1313
- pytables=3.2.2
1414
- python-dateutil=2.5.0

ci/circle-36-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- nomkl
1414
- numexpr
1515
- numpy
16-
- openpyxl
16+
- openpyxl=2.5.5
1717
- psycopg2
1818
- pymysql
1919
- pytables

ci/circle-36-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ dependencies:
1414
- nomkl
1515
- numexpr
1616
- numpy
17-
- openpyxl
17+
- openpyxl=2.5.5
1818
- psycopg2
1919
- pymysql
2020
- pytables

ci/requirements-optional-conda.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ lxml
1212
matplotlib
1313
nbsphinx
1414
numexpr
15-
openpyxl
15+
openpyxl=2.5.5
1616
pyarrow
1717
pymysql
1818
pytables

ci/requirements-optional-pip.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ lxml
1414
matplotlib
1515
nbsphinx
1616
numexpr
17-
openpyxl
17+
openpyxl=2.5.5
1818
pyarrow
1919
pymysql
2020
tables
@@ -28,4 +28,4 @@ statsmodels
2828
xarray
2929
xlrd
3030
xlsxwriter
31-
xlwt
31+
xlwt

ci/travis-35-osx.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- nomkl
1313
- numexpr
1414
- numpy=1.10.4
15-
- openpyxl
15+
- openpyxl=2.5.5
1616
- pytables
1717
- python=3.5*
1818
- pytz

ci/travis-36-doc.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ dependencies:
2222
- notebook
2323
- numexpr
2424
- numpy=1.13*
25-
- openpyxl
25+
- openpyxl=2.5.5
2626
- pandoc
2727
- pyqt
2828
- pytables

ci/travis-36-slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies:
1010
- matplotlib
1111
- numexpr
1212
- numpy
13-
- openpyxl
13+
- openpyxl=2.5.5
1414
- patsy
1515
- psycopg2
1616
- pymysql

ci/travis-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- nomkl
1919
- numexpr
2020
- numpy
21-
- openpyxl
21+
- openpyxl=2.5.5
2222
- psycopg2
2323
- pyarrow
2424
- pymysql

doc/source/ecosystem.rst

+28-11
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ development to remain focused around it's original requirements.
1414

1515
This is an inexhaustive list of projects that build on pandas in order to provide
1616
tools in the PyData space. For a list of projects that depend on pandas,
17-
see the
17+
see the
1818
`libraries.io usage page for pandas <https://libraries.io/pypi/pandas/usage>`_
1919
or `search pypi for pandas <https://pypi.org/search/?q=pandas>`_.
2020

@@ -44,7 +44,7 @@ ML pipeline.
4444
`Featuretools <https://github.com/featuretools/featuretools/>`__
4545
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4646

47-
Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community.
47+
Featuretools is a Python library for automated feature engineering built on top of pandas. It excels at transforming temporal and relational datasets into feature matrices for machine learning using reusable feature engineering "primitives". Users can contribute their own primitives in Python and share them with the rest of the community.
4848

4949
.. _ecosystem.visualization:
5050

@@ -149,13 +149,30 @@ for pandas ``display.`` settings.
149149
qgrid is "an interactive grid for sorting and filtering
150150
DataFrames in IPython Notebook" built with SlickGrid.
151151

152-
`Spyder <https://github.com/spyder-ide/spyder/>`__
153-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
152+
`Spyder <https://www.spyder-ide.org/>`__
153+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
154+
155+
Spyder is a cross-platform PyQt-based IDE combining the editing, analysis,
156+
debugging and profiling functionality of a software development tool with the
157+
data exploration, interactive execution, deep inspection and rich visualization
158+
capabilities of a scientific environment like MATLAB or Rstudio.
159+
160+
Its `Variable Explorer <https://docs.spyder-ide.org/variableexplorer.html>`__
161+
allows users to view, manipulate and edit pandas ``Index``, ``Series``,
162+
and ``DataFrame`` objects like a "spreadsheet", including copying and modifying
163+
values, sorting, displaying a "heatmap", converting data types and more.
164+
Pandas objects can also be renamed, duplicated, new columns added,
165+
copyed/pasted to/from the clipboard (as TSV), and saved/loaded to/from a file.
166+
Spyder can also import data from a variety of plain text and binary files
167+
or the clipboard into a new pandas DataFrame via a sophisticated import wizard.
168+
169+
Most pandas classes, methods and data attributes can be autocompleted in
170+
Spyder's `Editor <https://docs.spyder-ide.org/editor.html>`__ and
171+
`IPython Console <https://docs.spyder-ide.org/ipythonconsole.html>`__,
172+
and Spyder's `Help pane<https://docs.spyder-ide.org/help.html>`__ can retrieve
173+
and render Numpydoc documentation on pandas objects in rich text with Sphinx
174+
both automatically and on-demand.
154175

155-
Spyder is a cross-platform Qt-based open-source Python IDE with
156-
editing, testing, debugging, and introspection features.
157-
Spyder can now introspect and display Pandas DataFrames and show
158-
both "column wise min/max and global min/max coloring."
159176

160177
.. _ecosystem.api:
161178

@@ -205,12 +222,12 @@ This package requires valid credentials for this API (non free).
205222
pandaSDMX is a library to retrieve and acquire statistical data
206223
and metadata disseminated in
207224
`SDMX <http://www.sdmx.org>`_ 2.1, an ISO-standard
208-
widely used by institutions such as statistics offices, central banks,
209-
and international organisations. pandaSDMX can expose datasets and related
225+
widely used by institutions such as statistics offices, central banks,
226+
and international organisations. pandaSDMX can expose datasets and related
210227
structural metadata including data flows, code-lists,
211228
and data structure definitions as pandas Series
212229
or MultiIndexed DataFrames.
213-
230+
214231
`fredapi <https://github.com/mortada/fredapi>`__
215232
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
216233
fredapi is a Python interface to the `Federal Reserve Economic Data (FRED) <http://research.stlouisfed.org/fred2/>`__

doc/source/timeseries.rst

+24-2
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ In practice this becomes very cumbersome because we often need a very long
369369
index with a large number of timestamps. If we need timestamps on a regular
370370
frequency, we can use the :func:`date_range` and :func:`bdate_range` functions
371371
to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a
372-
**calendar day** while the default for ``bdate_range`` is a **business day**:
372+
**day** while the default for ``bdate_range`` is a **business day**:
373373

374374
.. ipython:: python
375375
@@ -886,6 +886,27 @@ normalized after the function is applied.
886886
hour.apply(pd.Timestamp('2014-01-01 23:00'))
887887
888888
889+
.. _timeseries.dayvscalendarday:
890+
891+
Day vs. CalendarDay
892+
~~~~~~~~~~~~~~~~~~~
893+
894+
:class:`Day` (``'D'``) is a timedelta-like offset that respects absolute time
895+
arithmetic and is an alias for 24 :class:`Hour`. This offset is the default
896+
argument to many pandas time related function like :func:`date_range` and :func:`timedelta_range`.
897+
898+
:class:`CalendarDay` (``'CD'``) is a relativedelta-like offset that respects
899+
calendar time arithmetic. :class:`CalendarDay` is useful preserving calendar day
900+
semantics with date times with have day light savings transitions, i.e. :class:`CalendarDay`
901+
will preserve the hour before the day light savings transition.
902+
903+
.. ipython:: python
904+
905+
ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')
906+
ts + pd.offsets.Day(1)
907+
ts + pd.offsets.CalendarDay(1)
908+
909+
889910
Parametric Offsets
890911
~~~~~~~~~~~~~~~~~~
891912

@@ -1176,7 +1197,8 @@ frequencies. We will refer to these aliases as *offset aliases*.
11761197

11771198
"B", "business day frequency"
11781199
"C", "custom business day frequency"
1179-
"D", "calendar day frequency"
1200+
"D", "day frequency"
1201+
"CD", "calendar day frequency"
11801202
"W", "weekly frequency"
11811203
"M", "month end frequency"
11821204
"SM", "semi-month end frequency (15th and end of month)"

doc/source/whatsnew/v0.23.5.txt

+3
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ Fixed Regressions
2323
- Constructing a DataFrame with an index argument that wasn't already an
2424
instance of :class:`~pandas.core.Index` was broken in `4efb39f
2525
<https://github.com/pandas-dev/pandas/commit/4efb39f01f5880122fa38d91e12d217ef70fad9e>`_ (:issue:`22227`).
26+
- Calling :meth:`DataFrameGroupBy.rank` and :meth:`SeriesGroupBy.rank` with empty groups
27+
and ``pct=True`` was raising a ``ZeroDivisionError`` due to `c1068d9
28+
<https://github.com/pandas-dev/pandas/commit/c1068d9d242c22cb2199156f6fb82eb5759178ae>`_ (:issue:`22519`)
2629
-
2730
-
2831

doc/source/whatsnew/v0.24.0.txt

+49-2
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,46 @@ that the dates have been converted to UTC
286286
.. ipython:: python
287287
pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True)
288288

289+
.. _whatsnew_0240.api_breaking.calendarday:
290+
291+
CalendarDay Offset
292+
^^^^^^^^^^^^^^^^^^
293+
294+
:class:`Day` and associated frequency alias ``'D'`` were documented to represent
295+
a calendar day; however, arithmetic and operations with :class:`Day` sometimes
296+
respected absolute time instead (i.e. ``Day(n)`` and acted identically to ``Timedelta(days=n)``).
297+
298+
*Previous Behavior*:
299+
300+
.. code-block:: ipython
301+
302+
303+
In [2]: ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')
304+
305+
# Respects calendar arithmetic
306+
In [3]: pd.date_range(start=ts, freq='D', periods=3)
307+
Out[3]:
308+
DatetimeIndex(['2016-10-30 00:00:00+03:00', '2016-10-31 00:00:00+02:00',
309+
'2016-11-01 00:00:00+02:00'],
310+
dtype='datetime64[ns, Europe/Helsinki]', freq='D')
311+
312+
# Respects absolute arithmetic
313+
In [4]: ts + pd.tseries.frequencies.to_offset('D')
314+
Out[4]: Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')
315+
316+
:class:`CalendarDay` and associated frequency alias ``'CD'`` are now available
317+
and respect calendar day arithmetic while :class:`Day` and frequency alias ``'D'``
318+
will now respect absolute time (:issue:`22274`, :issue:`20596`, :issue:`16980`, :issue:`8774`)
319+
See the :ref:`documentation here <timeseries.dayvscalendarday>` for more information.
320+
321+
Addition with :class:`CalendarDay` across a daylight savings time transition:
322+
323+
.. ipython:: python
324+
325+
ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')
326+
ts + pd.offsets.Day(1)
327+
ts + pd.offsets.CalendarDay(1)
328+
289329
.. _whatsnew_0240.api_breaking.period_end_time:
290330

291331
Time values in ``dt.end_time`` and ``to_timestamp(how='end')``
@@ -486,6 +526,7 @@ Datetimelike API Changes
486526
- :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`)
487527
- :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`)
488528
- :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`)
529+
- :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`)
489530

490531
.. _whatsnew_0240.api.other:
491532

@@ -528,7 +569,8 @@ Removal of prior version deprecations/changes
528569
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
529570
- Removal of the previously deprecated module ``pandas.core.datetools`` (:issue:`14105`, :issue:`14094`)
530571
- Strings passed into :meth:`DataFrame.groupby` that refer to both column and index levels will raise a ``ValueError`` (:issue:`14432`)
531-
-
572+
- :meth:`Index.repeat` and :meth:`MultiIndex.repeat` have renamed the ``n`` argument to ``repeats``(:issue:`14645`)
573+
- Removal of the previously deprecated ``as_indexer`` keyword completely from ``str.match()`` (:issue:`22356`,:issue:`6581`)
532574

533575
.. _whatsnew_0240.performance:
534576

@@ -586,6 +628,8 @@ Datetimelike
586628
- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`)
587629
- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`)
588630
- Bug in :class:`DatetimeIndex` subtraction that incorrectly failed to raise `OverflowError` (:issue:`22492`, :issue:`22508`)
631+
- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`)
632+
-
589633

590634
Timedelta
591635
^^^^^^^^^
@@ -594,7 +638,7 @@ Timedelta
594638
- Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`)
595639
- Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`)
596640
- Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`)
597-
-
641+
- Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`)
598642
-
599643
-
600644

@@ -669,6 +713,7 @@ Indexing
669713
- Bug where indexing with a Numpy array containing negative values would mutate the indexer (:issue:`21867`)
670714
- Bug where mixed indexes wouldn't allow integers for ``.at`` (:issue:`19860`)
671715
- ``Float64Index.get_loc`` now raises ``KeyError`` when boolean key passed. (:issue:`19087`)
716+
- Bug in :meth:`DataFrame.loc` when indexing with an :class:`IntervalIndex` (:issue:`19977`)
672717

673718
Missing
674719
^^^^^^^
@@ -709,6 +754,8 @@ Groupby/Resample/Rolling
709754
- Multiple bugs in :func:`pandas.core.Rolling.min` with ``closed='left'` and a
710755
datetime-like index leading to incorrect results and also segfault. (:issue:`21704`)
711756
- Bug in :meth:`Resampler.apply` when passing postiional arguments to applied func (:issue:`14615`).
757+
- Bug in :meth:`Series.resample` when passing ``numpy.timedelta64`` to `loffset` kwarg (:issue:`7687`).
758+
- Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`).
712759

713760
Sparse
714761
^^^^^^

pandas/_libs/algos_common_helper.pxi.in

+4-4
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def get_dispatch(dtypes):
4545

4646
@cython.wraparound(False)
4747
@cython.boundscheck(False)
48-
cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):
48+
def map_indices_{{name}}(ndarray[{{c_type}}] index):
4949
"""
5050
Produce a dict mapping the values of the input array to their respective
5151
locations.
@@ -542,7 +542,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
542542
cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num
543543

544544

545-
cpdef ensure_platform_int(object arr):
545+
def ensure_platform_int(object arr):
546546
# GH3033, GH1392
547547
# platform int is the size of the int pointer, e.g. np.intp
548548
if util.is_array(arr):
@@ -554,7 +554,7 @@ cpdef ensure_platform_int(object arr):
554554
return np.array(arr, dtype=np.intp)
555555

556556

557-
cpdef ensure_object(object arr):
557+
def ensure_object(object arr):
558558
if util.is_array(arr):
559559
if (<ndarray> arr).descr.type_num == NPY_OBJECT:
560560
return arr
@@ -587,7 +587,7 @@ def get_dispatch(dtypes):
587587

588588
{{for name, c_type, dtype in get_dispatch(dtypes)}}
589589

590-
cpdef ensure_{{name}}(object arr, copy=True):
590+
def ensure_{{name}}(object arr, copy=True):
591591
if util.is_array(arr):
592592
if (<ndarray> arr).descr.type_num == NPY_{{c_type}}:
593593
return arr

pandas/_libs/groupby.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil:
6767
return result
6868

6969

70+
# TODO: Is this redundant with algos.kth_smallest?
7071
cdef inline float64_t kth_smallest_c(float64_t* a,
7172
Py_ssize_t k,
7273
Py_ssize_t n) nogil:

pandas/_libs/groupby_helper.pxi.in

+6-1
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,12 @@ def group_rank_{{name}}(ndarray[float64_t, ndim=2] out,
584584

585585
if pct:
586586
for i in range(N):
587-
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
587+
# We don't include NaN values in percentage
588+
# rankings, so we assign them percentages of NaN.
589+
if out[i, 0] != out[i, 0] or out[i, 0] == NAN:
590+
out[i, 0] = NAN
591+
else:
592+
out[i, 0] = out[i, 0] / grp_sizes[i, 0]
588593
{{endif}}
589594
{{endfor}}
590595

0 commit comments

Comments
 (0)