Skip to content

Commit 6267900

Browse files
authored
Merge pull request #253 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents f5753fa + 08d296f commit 6267900

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+2195
-1965
lines changed

.github/workflows/ci.yml

-4
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@ jobs:
3232
with:
3333
fetch-depth: 0
3434

35-
- name: Looking for unwanted patterns
36-
run: ci/code_checks.sh patterns
37-
if: always()
38-
3935
- name: Cache conda
4036
uses: actions/cache@v2
4137
with:

.pre-commit-config.yaml

+22
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,29 @@ repos:
102102
# Incorrect code-block / IPython directives
103103
|\.\.\ code-block\ ::
104104
|\.\.\ ipython\ ::
105+
106+
# Check for deprecated messages without sphinx directive
107+
|(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)
105108
types_or: [python, cython, rst]
109+
- id: incorrect-backticks
110+
name: Check for backticks incorrectly rendering because of missing spaces
111+
language: pygrep
112+
entry: '[a-zA-Z0-9]\`\`?[a-zA-Z0-9]'
113+
types: [rst]
114+
files: ^doc/source/
115+
- id: seed-check-asv
116+
name: Check for unnecessary random seeds in asv benchmarks
117+
language: pygrep
118+
entry: 'np\.random\.seed'
119+
files: ^asv_bench/benchmarks
120+
exclude: ^asv_bench/benchmarks/pandas_vb_common\.py
121+
- id: invalid-ea-testing
122+
name: Check for invalid EA testing
123+
language: pygrep
124+
entry: 'tm\.assert_(series|frame)_equal'
125+
files: ^pandas/tests/extension/base
126+
types: [python]
127+
exclude: ^pandas/tests/extension/base/base\.py
106128
- id: pip-to-conda
107129
name: Generate pip dependency from conda
108130
description: This hook checks if the conda environment.yml and requirements-dev.txt are equal

ci/code_checks.sh

+2-25
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,13 @@
1111
# Usage:
1212
# $ ./ci/code_checks.sh # run all checks
1313
# $ ./ci/code_checks.sh lint # run linting only
14-
# $ ./ci/code_checks.sh patterns # check for patterns that should not exist
1514
# $ ./ci/code_checks.sh code # checks on imported code
1615
# $ ./ci/code_checks.sh doctests # run doctests
1716
# $ ./ci/code_checks.sh docstrings # validate docstring errors
1817
# $ ./ci/code_checks.sh typing # run static type analysis
1918

20-
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
21-
{ echo "Unknown command $1. Usage: $0 [lint|patterns|code|doctests|docstrings|typing]"; exit 9999; }
19+
[[ -z "$1" || "$1" == "lint" || "$1" == "code" || "$1" == "doctests" || "$1" == "docstrings" || "$1" == "typing" ]] || \
20+
{ echo "Unknown command $1. Usage: $0 [lint|code|doctests|docstrings|typing]"; exit 9999; }
2221

2322
BASE_DIR="$(dirname $0)/.."
2423
RET=0
@@ -58,28 +57,6 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5857

5958
fi
6059

61-
### PATTERNS ###
62-
if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
63-
64-
# Check for the following code in the extension array base tests: `tm.assert_frame_equal` and `tm.assert_series_equal`
65-
MSG='Check for invalid EA testing' ; echo $MSG
66-
invgrep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
67-
RET=$(($RET + $?)) ; echo $MSG "DONE"
68-
69-
MSG='Check for deprecated messages without sphinx directive' ; echo $MSG
70-
invgrep -R --include="*.py" --include="*.pyx" -E "(DEPRECATED|DEPRECATE|Deprecated)(:|,|\.)" pandas
71-
RET=$(($RET + $?)) ; echo $MSG "DONE"
72-
73-
MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG
74-
invgrep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
75-
RET=$(($RET + $?)) ; echo $MSG "DONE"
76-
77-
MSG='Check for unnecessary random seeds in asv benchmarks' ; echo $MSG
78-
invgrep -R --exclude pandas_vb_common.py -E 'np.random.seed' asv_bench/benchmarks/
79-
RET=$(($RET + $?)) ; echo $MSG "DONE"
80-
81-
fi
82-
8360
### CODE ###
8461
if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
8562

doc/source/getting_started/install.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ cross platform distribution for data analysis and scientific computing.
1212
This is the recommended installation method for most users.
1313

1414
Instructions for installing from source,
15-
`PyPI <https://pypi.org/project/pandas>`__, `ActivePython <https://www.activestate.com/activepython/downloads>`__, various Linux distributions, or a
15+
`PyPI <https://pypi.org/project/pandas>`__, `ActivePython <https://www.activestate.com/products/python/downloads/>`__, various Linux distributions, or a
1616
`development version <https://github.com/pandas-dev/pandas>`__ are also provided.
1717

1818
.. _install.version:
@@ -47,7 +47,7 @@ rest of the `SciPy <https://scipy.org/>`__ stack without needing to install
4747
anything else, and without needing to wait for any software to be compiled.
4848

4949
Installation instructions for `Anaconda <https://docs.continuum.io/anaconda/>`__
50-
`can be found here <https://docs.continuum.io/anaconda/install.html>`__.
50+
`can be found here <https://docs.continuum.io/anaconda/install/>`__.
5151

5252
A full list of the packages available as part of the
5353
`Anaconda <https://docs.continuum.io/anaconda/>`__ distribution

doc/source/getting_started/overview.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ and :class:`DataFrame` (2-dimensional), handle the vast majority of typical use
2929
cases in finance, statistics, social science, and many areas of
3030
engineering. For R users, :class:`DataFrame` provides everything that R's
3131
``data.frame`` provides and much more. pandas is built on top of `NumPy
32-
<https://www.numpy.org>`__ and is intended to integrate well within a scientific
32+
<https://numpy.org>`__ and is intended to integrate well within a scientific
3333
computing environment with many other 3rd party libraries.
3434

3535
Here are just a few of the things that pandas does well:
@@ -75,7 +75,7 @@ Some other notes
7575
specialized tool.
7676

7777
- pandas is a dependency of `statsmodels
78-
<https://www.statsmodels.org/stable/index.html>`__, making it an important part of the
78+
<https://statsmodels.org>`__, making it an important part of the
7979
statistical computing ecosystem in Python.
8080

8181
- pandas has been used extensively in production in financial applications.
@@ -168,7 +168,7 @@ The list of the Core Team members and more detailed information can be found on
168168
Institutional partners
169169
----------------------
170170

171-
The information about current institutional partners can be found on `pandas website page <https://pandas.pydata.org/about.html>`__.
171+
The information about current institutional partners can be found on `pandas website page <https://pandas.pydata.org/about/sponsors.html>`__.
172172

173173
License
174174
-------

doc/source/whatsnew/v1.3.2.rst

+4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ Bug fixes
3232
~~~~~~~~~
3333
- Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`)
3434
- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`)
35+
- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and ``window`` is an offset that covers all the rows (:issue:`42753`)
3536
- :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`)
37+
- :meth:`.Styler.set_sticky` has amended CSS to control the column/index names and ensure the correct sticky positions (:issue:`42537`)
38+
- Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`)
39+
-
3640

3741
.. ---------------------------------------------------------------------------
3842

doc/source/whatsnew/v1.4.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Other enhancements
3434
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
3535
- Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
3636
- :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
37+
- Add keyword ``levels`` to :meth:`.Styler.hide_index` for optionally controlling hidden levels in a MultiIndex (:issue:`25475`)
3738
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
3839
- Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`)
3940
- Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
@@ -236,6 +237,7 @@ Indexing
236237
- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
237238
- Bug in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` when passing an ascending value, failed to raise or incorrectly raising ``ValueError`` (:issue:`41634`)
238239
- Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
240+
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`)
239241
- Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`)
240242
-
241243

@@ -274,7 +276,9 @@ Groupby/resample/rolling
274276
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
275277
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
276278
- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
279+
- Bug in :meth:`SeriesGroupBy.nlargest` and :meth:`SeriesGroupBy.nsmallest` would have an inconsistent index when the input Series was sorted and ``n`` was greater than or equal to all group sizes (:issue:`15272`, :issue:`16345`, :issue:`29129`)
277280
- Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`)
281+
- Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`)
278282

279283
Reshaping
280284
^^^^^^^^^

pandas/_libs/index.pyx

+20-2
Original file line numberDiff line numberDiff line change
@@ -288,10 +288,12 @@ cdef class IndexEngine:
288288
object val
289289
int count = 0, count_missing = 0
290290
Py_ssize_t i, j, n, n_t, n_alloc
291+
bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True
291292

292293
self._ensure_mapping_populated()
293294
values = np.array(self._get_index_values(), copy=False)
294295
stargets = set(targets)
296+
295297
n = len(values)
296298
n_t = len(targets)
297299
if n > 10_000:
@@ -321,19 +323,35 @@ cdef class IndexEngine:
321323

322324
if stargets:
323325
# otherwise, map by iterating through all items in the index
326+
324327
for i in range(n):
325328
val = values[i]
326329
if val in stargets:
327330
if val not in d:
328331
d[val] = []
329332
d[val].append(i)
330333

334+
elif util.is_nan(val):
335+
# GH#35392
336+
if need_nan_check:
337+
# Do this check only once
338+
stargets_has_nan = any(util.is_nan(val) for x in stargets)
339+
need_nan_check = False
340+
341+
if stargets_has_nan:
342+
if not d_has_nan:
343+
# use a canonical nan object
344+
d[np.nan] = []
345+
d_has_nan = True
346+
d[np.nan].append(i)
347+
331348
for i in range(n_t):
332349
val = targets[i]
333350

334351
# found
335-
if val in d:
336-
for j in d[val]:
352+
if val in d or (d_has_nan and util.is_nan(val)):
353+
key = val if not util.is_nan(val) else np.nan
354+
for j in d[key]:
337355

338356
# realloc if needed
339357
if count >= n_alloc:

pandas/_libs/join.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ ctypedef fused join_t:
265265
int16_t
266266
int32_t
267267
int64_t
268+
uint8_t
269+
uint16_t
270+
uint32_t
268271
uint64_t
269272

270273

pandas/_libs/window/indexers.pyx

+4-5
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,11 @@ def calculate_variable_window_bounds(
7979
else:
8080
end[0] = 0
8181
if center:
82-
for j in range(0, num_values + 1):
83-
if (index[j] == index[0] + index_growth_sign * window_size / 2 and
84-
right_closed):
82+
end_bound = index[0] + index_growth_sign * window_size / 2
83+
for j in range(0, num_values):
84+
if (index[j] < end_bound) or (index[j] == end_bound and right_closed):
8585
end[0] = j + 1
86-
break
87-
elif index[j] >= index[0] + index_growth_sign * window_size / 2:
86+
elif index[j] >= end_bound:
8887
end[0] = j
8988
break
9089

pandas/_testing/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
use_numexpr,
106106
with_csv_dialect,
107107
)
108+
from pandas.core.api import NumericIndex
108109
from pandas.core.arrays import (
109110
DatetimeArray,
110111
PandasArray,
@@ -314,7 +315,7 @@ def makeNumericIndex(k=10, name=None, *, dtype):
314315
else:
315316
raise NotImplementedError(f"wrong dtype {dtype}")
316317

317-
return Index(values, dtype=dtype, name=name)
318+
return NumericIndex(values, dtype=dtype, name=name)
318319

319320

320321
def makeIntIndex(k=10, name=None):

pandas/conftest.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -460,6 +460,16 @@ def _create_mi_with_dt64tz_level():
460460
"uint": tm.makeUIntIndex(100),
461461
"range": tm.makeRangeIndex(100),
462462
"float": tm.makeFloatIndex(100),
463+
"num_int64": tm.makeNumericIndex(100, dtype="int64"),
464+
"num_int32": tm.makeNumericIndex(100, dtype="int32"),
465+
"num_int16": tm.makeNumericIndex(100, dtype="int16"),
466+
"num_int8": tm.makeNumericIndex(100, dtype="int8"),
467+
"num_uint64": tm.makeNumericIndex(100, dtype="uint64"),
468+
"num_uint32": tm.makeNumericIndex(100, dtype="uint32"),
469+
"num_uint16": tm.makeNumericIndex(100, dtype="uint16"),
470+
"num_uint8": tm.makeNumericIndex(100, dtype="uint8"),
471+
"num_float64": tm.makeNumericIndex(100, dtype="float64"),
472+
"num_float32": tm.makeNumericIndex(100, dtype="float32"),
463473
"bool": tm.makeBoolIndex(10),
464474
"categorical": tm.makeCategoricalIndex(100),
465475
"interval": tm.makeIntervalIndex(100),
@@ -511,7 +521,10 @@ def index_flat(request):
511521
params=[
512522
key
513523
for key in indices_dict
514-
if key not in ["int", "uint", "range", "empty", "repeats"]
524+
if not (
525+
key in ["int", "uint", "range", "empty", "repeats"]
526+
or key.startswith("num_")
527+
)
515528
and not isinstance(indices_dict[key], MultiIndex)
516529
]
517530
)

0 commit comments

Comments
 (0)