Skip to content

Commit db409f1

Browse files
author
auderson
committed
Merge remote-tracking branch 'upstream/main' into same_val_counts_for_roll_skew_kurt
2 parents 074e846 + 32999a1 commit db409f1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+1264
-780
lines changed

.pre-commit-config.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,13 @@ repos:
176176
files: ^pandas/core/
177177
exclude: ^pandas/core/api\.py$
178178
types: [python]
179+
- id: use-io-common-urlopen
180+
name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
181+
language: python
182+
entry: python scripts/use_io_common_urlopen.py
183+
files: ^pandas/
184+
exclude: ^pandas/tests/
185+
types: [python]
179186
- id: no-bool-in-core-generic
180187
name: Use bool_t instead of bool in pandas/core/generic.py
181188
entry: python scripts/no_bool_in_generic.py

LICENSES/KLIB_LICENSE

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
The MIT License
2+
3+
Copyright (c) 2008- Attractive Chaos <[email protected]>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining
6+
a copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be
14+
included in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.

MANIFEST.in

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
include RELEASE.md
2+
include versioneer.py
23

34
graft doc
45
prune doc/build
@@ -54,9 +55,6 @@ global-exclude *.pxi
5455
# exclude the whole directory to avoid running related tests in sdist
5556
prune pandas/tests/io/parser/data
5657

57-
include versioneer.py
58-
include pandas/_version.py
59-
include pandas/io/formats/templates/*.tpl
60-
58+
# Selectively re-add *.cxx files that were excluded above
6159
graft pandas/_libs/src
6260
graft pandas/_libs/tslibs/src

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12-
- hypothesis>=5.5.3
1312
- psutil
1413
- pytest-asyncio>=0.17
1514
- boto3
@@ -27,6 +26,7 @@ dependencies:
2726
- fastparquet
2827
- fsspec
2928
- html5lib
29+
- hypothesis
3030
- gcsfs
3131
- jinja2
3232
- lxml

ci/deps/actions-38-downstream_compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ dependencies:
1010
- pytest>=6.0
1111
- pytest-cov
1212
- pytest-xdist>=1.31
13-
- hypothesis>=5.5.3
1413
- psutil
1514
- pytest-asyncio>=0.17
1615
- boto3
@@ -28,6 +27,7 @@ dependencies:
2827
- fastparquet
2928
- fsspec
3029
- html5lib
30+
- hypothesis
3131
- gcsfs
3232
- jinja2
3333
- lxml

ci/deps/actions-38-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ dependencies:
1111
- pytest>=6.0
1212
- pytest-cov
1313
- pytest-xdist>=1.31
14-
- hypothesis>=5.5.3
1514
- psutil
1615
- pytest-asyncio>=0.17
1716
- boto3
@@ -29,6 +28,7 @@ dependencies:
2928
- fastparquet=0.4.0
3029
- fsspec=0.7.4
3130
- html5lib=1.1
31+
- hypothesis=5.5.3
3232
- gcsfs=0.6.0
3333
- jinja2=2.11
3434
- lxml=4.5.0

ci/deps/actions-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12-
- hypothesis>=5.5.3
1312
- psutil
1413
- pytest-asyncio>=0.17
1514
- boto3
@@ -27,6 +26,7 @@ dependencies:
2726
- fastparquet
2827
- fsspec
2928
- html5lib
29+
- hypothesis
3030
- gcsfs
3131
- jinja2
3232
- lxml

ci/deps/actions-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12-
- hypothesis>=5.5.3
1312
- psutil
1413
- pytest-asyncio>=0.17
1514
- boto3
@@ -27,6 +26,7 @@ dependencies:
2726
- fastparquet
2827
- fsspec
2928
- html5lib
29+
- hypothesis
3030
- gcsfs
3131
- jinja2
3232
- lxml

ci/deps/circle-38-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ dependencies:
99
- pytest>=6.0
1010
- pytest-cov
1111
- pytest-xdist>=1.31
12-
- hypothesis>=5.5.3
1312
- psutil
1413
- pytest-asyncio>=0.17
1514
- boto3
@@ -27,6 +26,7 @@ dependencies:
2726
- fastparquet
2827
- fsspec
2928
- html5lib
29+
- hypothesis
3030
- gcsfs
3131
- jinja2
3232
- lxml

doc/source/development/code_style.rst

-31
This file was deleted.

doc/source/development/contributing_codebase.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,14 @@ In addition to ``./ci/code_checks.sh``, some extra checks are run by
3737
``pre-commit`` - see :ref:`here <contributing.pre-commit>` for how to
3838
run them.
3939

40-
Additional standards are outlined on the :ref:`pandas code style guide <code_style>`.
41-
4240
.. _contributing.pre-commit:
4341

4442
Pre-commit
4543
----------
4644

4745
Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
48-
like ``black``, ``flake8``, ``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
46+
like ``black``, ``flake8`` (including a `pandas-dev-flaker <https://github.com/pandas-dev/pandas-dev-flaker>`_ plugin),
47+
``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
4948
Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
5049
it is helpful to run the check yourself before submitting code. This
5150
can be done by installing ``pre-commit``::

doc/source/development/index.rst

-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ Development
1616
contributing_environment
1717
contributing_documentation
1818
contributing_codebase
19-
code_style
2019
maintaining
2120
internals
2221
test_writing

doc/source/whatsnew/v0.13.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ Enhancements
664664
other = pd.DataFrame({'A': [1, 3, 3, 7], 'B': ['e', 'f', 'f', 'e']})
665665
mask = dfi.isin(other)
666666
mask
667-
dfi[mask.any(1)]
667+
dfi[mask.any(axis=1)]
668668
669669
- ``Series`` now supports a ``to_frame`` method to convert it to a single-column DataFrame (:issue:`5164`)
670670

doc/source/whatsnew/v1.4.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17+
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1718
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
1819
-
1920

doc/source/whatsnew/v1.5.0.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ Other enhancements
9494
- :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`)
9595
- :meth:`pd.concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`)
9696
- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
97-
-
97+
- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, and :meth:`DataFrame.cov` (:issue:`46560`)
9898

9999
.. ---------------------------------------------------------------------------
100100
.. _whatsnew_150.notable_bug_fixes:
@@ -429,8 +429,9 @@ Other Deprecations
429429
- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
430430
- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`)
431431
- Deprecated :attr:`Timedelta.delta` (:issue:`46476`)
432+
- Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`)
432433
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
433-
-
434+
- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
434435

435436
.. ---------------------------------------------------------------------------
436437
.. _whatsnew_150.performance:
@@ -498,6 +499,7 @@ Conversion
498499
- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`)
499500
- Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`)
500501
- Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`)
502+
- Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`)
501503

502504
Strings
503505
^^^^^^^
@@ -563,6 +565,7 @@ I/O
563565
- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`)
564566
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
565567
- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
568+
- Bug in :func:`read_html` where elements surrounding ``<br>`` were joined without a space between them (:issue:`29528`)
566569
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
567570
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
568571
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)

pandas/_libs/algos.pxd

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from pandas._libs.dtypes cimport numeric_t
1+
from pandas._libs.dtypes cimport (
2+
numeric_object_t,
3+
numeric_t,
4+
)
25

36

47
cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil
@@ -10,3 +13,10 @@ cdef enum TiebreakEnumType:
1013
TIEBREAK_FIRST
1114
TIEBREAK_FIRST_DESCENDING
1215
TIEBREAK_DENSE
16+
17+
18+
cdef numeric_object_t get_rank_nan_fill_val(
19+
bint rank_nans_highest,
20+
numeric_object_t val,
21+
bint is_datetimelike=*,
22+
)

pandas/_libs/algos.pyx

+10-3
Original file line numberDiff line numberDiff line change
@@ -822,13 +822,17 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):
822822

823823
cdef numeric_object_t get_rank_nan_fill_val(
824824
bint rank_nans_highest,
825-
numeric_object_t[:] _=None
825+
numeric_object_t val,
826+
bint is_datetimelike=False,
826827
):
827828
"""
828829
Return the value we'll use to represent missing values when sorting depending
829830
on if we'd like missing values to end up at the top/bottom. (The second parameter
830831
is unused, but needed for fused type specialization)
831832
"""
833+
if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest:
834+
return NPY_NAT + 1
835+
832836
if rank_nans_highest:
833837
if numeric_object_t is object:
834838
return Infinity()
@@ -854,6 +858,9 @@ cdef numeric_object_t get_rank_nan_fill_val(
854858
if numeric_object_t is object:
855859
return NegInfinity()
856860
elif numeric_object_t is int64_t:
861+
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
862+
# instead of NPY_NAT here causes build warnings and failure in
863+
# test_cummax_i8_at_implementation_bound
857864
return NPY_NAT
858865
elif numeric_object_t is int32_t:
859866
return util.INT32_MIN
@@ -975,7 +982,7 @@ def rank_1d(
975982
# will flip the ordering to still end up with lowest rank.
976983
# Symmetric logic applies to `na_option == 'bottom'`
977984
nans_rank_highest = ascending ^ (na_option == 'top')
978-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
985+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
979986
if nans_rank_highest:
980987
order = [masked_vals, mask]
981988
else:
@@ -1335,7 +1342,7 @@ def rank_2d(
13351342

13361343
nans_rank_highest = ascending ^ (na_option == 'top')
13371344
if check_mask:
1338-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
1345+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
13391346

13401347
if numeric_object_t is object:
13411348
mask = missing.isnaobj2d(values).view(np.uint8)

pandas/_libs/groupby.pyx

+12-29
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ from numpy.math cimport NAN
3131
cnp.import_array()
3232

3333
from pandas._libs cimport util
34-
from pandas._libs.algos cimport kth_smallest_c
34+
from pandas._libs.algos cimport (
35+
get_rank_nan_fill_val,
36+
kth_smallest_c,
37+
)
3538

3639
from pandas._libs.algos import (
3740
ensure_platform_int,
@@ -989,36 +992,16 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
989992
return False
990993

991994

992-
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike):
995+
cdef numeric_object_t _get_min_or_max(numeric_object_t val, bint compute_max, bint is_datetimelike):
993996
"""
994-
Find either the min or the max supported by numeric_t; 'val' is a placeholder
995-
to effectively make numeric_t an argument.
997+
Find either the min or the max supported by numeric_object_t; 'val' is a
998+
placeholder to effectively make numeric_object_t an argument.
996999
"""
997-
if numeric_t is int64_t:
998-
if compute_max and is_datetimelike:
999-
return -_int64_max
1000-
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
1001-
# instead of NPY_NAT here causes build warnings and failure in
1002-
# test_cummax_i8_at_implementation_bound
1003-
return NPY_NAT if compute_max else util.INT64_MAX
1004-
elif numeric_t is int32_t:
1005-
return util.INT32_MIN if compute_max else util.INT32_MAX
1006-
elif numeric_t is int16_t:
1007-
return util.INT16_MIN if compute_max else util.INT16_MAX
1008-
elif numeric_t is int8_t:
1009-
return util.INT8_MIN if compute_max else util.INT8_MAX
1010-
1011-
elif numeric_t is uint64_t:
1012-
return 0 if compute_max else util.UINT64_MAX
1013-
elif numeric_t is uint32_t:
1014-
return 0 if compute_max else util.UINT32_MAX
1015-
elif numeric_t is uint16_t:
1016-
return 0 if compute_max else util.UINT16_MAX
1017-
elif numeric_t is uint8_t:
1018-
return 0 if compute_max else util.UINT8_MAX
1019-
1020-
else:
1021-
return -np.inf if compute_max else np.inf
1000+
return get_rank_nan_fill_val(
1001+
not compute_max,
1002+
val=val,
1003+
is_datetimelike=is_datetimelike,
1004+
)
10221005

10231006

10241007
cdef numeric_t _get_na_val(numeric_t val, bint is_datetimelike):

0 commit comments

Comments
 (0)