Skip to content

Commit 77bc42b

Browse files
authored
Merge branch 'pandas-dev:main' into 45772-add-plot-show
2 parents cb426b5 + f976aa6 commit 77bc42b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+602
-565
lines changed

.pre-commit-config.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,13 @@ repos:
176176
files: ^pandas/core/
177177
exclude: ^pandas/core/api\.py$
178178
types: [python]
179+
- id: use-io-common-urlopen
180+
name: Use pandas.io.common.urlopen instead of urllib.request.urlopen
181+
language: python
182+
entry: python scripts/use_io_common_urlopen.py
183+
files: ^pandas/
184+
exclude: ^pandas/tests/
185+
types: [python]
179186
- id: no-bool-in-core-generic
180187
name: Use bool_t instead of bool in pandas/core/generic.py
181188
entry: python scripts/no_bool_in_generic.py

LICENSES/KLIB_LICENSE

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
The MIT License
2+
3+
Copyright (c) 2008- Attractive Chaos <[email protected]>
4+
5+
Permission is hereby granted, free of charge, to any person obtaining
6+
a copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be
14+
included in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
SOFTWARE.

MANIFEST.in

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
include RELEASE.md
2+
include versioneer.py
23

34
graft doc
45
prune doc/build
@@ -54,9 +55,6 @@ global-exclude *.pxi
5455
# exclude the whole directory to avoid running related tests in sdist
5556
prune pandas/tests/io/parser/data
5657

57-
include versioneer.py
58-
include pandas/_version.py
59-
include pandas/io/formats/templates/*.tpl
60-
58+
# Selectively re-add *.cxx files that were excluded above
6159
graft pandas/_libs/src
6260
graft pandas/_libs/tslibs/src

ci/code_checks.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ fi
7878
### DOCSTRINGS ###
7979
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8080

81-
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SS01, SS02, SS03, SS04, SS05)' ; echo $MSG
82-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SS01,SS02,SS03,SS04,SS05
81+
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05)' ; echo $MSG
82+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05
8383
RET=$(($RET + $?)) ; echo $MSG "DONE"
8484

8585
fi

doc/source/development/code_style.rst

-31
This file was deleted.

doc/source/development/contributing_codebase.rst

+2-3
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,14 @@ In addition to ``./ci/code_checks.sh``, some extra checks are run by
3737
``pre-commit`` - see :ref:`here <contributing.pre-commit>` for how to
3838
run them.
3939

40-
Additional standards are outlined on the :ref:`pandas code style guide <code_style>`.
41-
4240
.. _contributing.pre-commit:
4341

4442
Pre-commit
4543
----------
4644

4745
Additionally, :ref:`Continuous Integration <contributing.ci>` will run code formatting checks
48-
like ``black``, ``flake8``, ``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
46+
like ``black``, ``flake8`` (including a `pandas-dev-flaker <https://github.com/pandas-dev/pandas-dev-flaker>`_ plugin),
47+
``isort``, and ``cpplint`` and more using `pre-commit hooks <https://pre-commit.com/>`_
4948
Any warnings from these checks will cause the :ref:`Continuous Integration <contributing.ci>` to fail; therefore,
5049
it is helpful to run the check yourself before submitting code. This
5150
can be done by installing ``pre-commit``::

doc/source/development/index.rst

-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ Development
1616
contributing_environment
1717
contributing_documentation
1818
contributing_codebase
19-
code_style
2019
maintaining
2120
internals
2221
test_writing

doc/source/whatsnew/v0.13.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ Enhancements
664664
other = pd.DataFrame({'A': [1, 3, 3, 7], 'B': ['e', 'f', 'f', 'e']})
665665
mask = dfi.isin(other)
666666
mask
667-
dfi[mask.any(1)]
667+
dfi[mask.any(axis=1)]
668668
669669
- ``Series`` now supports a ``to_frame`` method to convert it to a single-column DataFrame (:issue:`5164`)
670670

doc/source/whatsnew/v1.4.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17+
- Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
1718
- Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
1819
-
1920

doc/source/whatsnew/v1.5.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ Other Deprecations
429429
- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`).
430430
- Deprecated :attr:`Timedelta.freq` and :attr:`Timedelta.is_populated` (:issue:`46430`)
431431
- Deprecated :attr:`Timedelta.delta` (:issue:`46476`)
432+
- Deprecated passing arguments as positional in :meth:`DataFrame.any` and :meth:`Series.any` (:issue:`44802`)
432433
- Deprecated the ``closed`` argument in :meth:`interval_range` in favor of ``inclusive`` argument; In a future version passing ``closed`` will raise (:issue:`40245`)
433434
- Deprecated the methods :meth:`DataFrame.mad`, :meth:`Series.mad`, and the corresponding groupby methods (:issue:`11787`)
434435

@@ -498,6 +499,7 @@ Conversion
498499
- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`)
499500
- Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`)
500501
- Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`)
502+
- Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`)
501503

502504
Strings
503505
^^^^^^^
@@ -563,6 +565,7 @@ I/O
563565
- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`)
564566
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
565567
- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
568+
- Bug in :func:`read_html` where elements surrounding ``<br>`` were joined without a space between them (:issue:`29528`)
566569
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)
567570
- Bug in :func:`read_excel` when reading a ``.ods`` file with newlines between xml elements (:issue:`45598`)
568571
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)

pandas/_libs/algos.pxd

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
from pandas._libs.dtypes cimport numeric_t
1+
from pandas._libs.dtypes cimport (
2+
numeric_object_t,
3+
numeric_t,
4+
)
25

36

47
cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil
@@ -10,3 +13,10 @@ cdef enum TiebreakEnumType:
1013
TIEBREAK_FIRST
1114
TIEBREAK_FIRST_DESCENDING
1215
TIEBREAK_DENSE
16+
17+
18+
cdef numeric_object_t get_rank_nan_fill_val(
19+
bint rank_nans_highest,
20+
numeric_object_t val,
21+
bint is_datetimelike=*,
22+
)

pandas/_libs/algos.pyx

+10-3
Original file line numberDiff line numberDiff line change
@@ -822,13 +822,17 @@ def is_monotonic(ndarray[numeric_object_t, ndim=1] arr, bint timelike):
822822

823823
cdef numeric_object_t get_rank_nan_fill_val(
824824
bint rank_nans_highest,
825-
numeric_object_t[:] _=None
825+
numeric_object_t val,
826+
bint is_datetimelike=False,
826827
):
827828
"""
828829
Return the value we'll use to represent missing values when sorting depending
829830
on if we'd like missing values to end up at the top/bottom. (The second parameter
830831
is unused, but needed for fused type specialization)
831832
"""
833+
if numeric_object_t is int64_t and is_datetimelike and not rank_nans_highest:
834+
return NPY_NAT + 1
835+
832836
if rank_nans_highest:
833837
if numeric_object_t is object:
834838
return Infinity()
@@ -854,6 +858,9 @@ cdef numeric_object_t get_rank_nan_fill_val(
854858
if numeric_object_t is object:
855859
return NegInfinity()
856860
elif numeric_object_t is int64_t:
861+
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
862+
# instead of NPY_NAT here causes build warnings and failure in
863+
# test_cummax_i8_at_implementation_bound
857864
return NPY_NAT
858865
elif numeric_object_t is int32_t:
859866
return util.INT32_MIN
@@ -975,7 +982,7 @@ def rank_1d(
975982
# will flip the ordering to still end up with lowest rank.
976983
# Symmetric logic applies to `na_option == 'bottom'`
977984
nans_rank_highest = ascending ^ (na_option == 'top')
978-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
985+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
979986
if nans_rank_highest:
980987
order = [masked_vals, mask]
981988
else:
@@ -1335,7 +1342,7 @@ def rank_2d(
13351342

13361343
nans_rank_highest = ascending ^ (na_option == 'top')
13371344
if check_mask:
1338-
nan_fill_val = get_rank_nan_fill_val[numeric_object_t](nans_rank_highest)
1345+
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
13391346

13401347
if numeric_object_t is object:
13411348
mask = missing.isnaobj2d(values).view(np.uint8)

pandas/_libs/dtypes.pxd

-12
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,3 @@ ctypedef fused numeric_t:
3434
ctypedef fused numeric_object_t:
3535
numeric_t
3636
object
37-
38-
# i64 + u64 + all float types
39-
ctypedef fused iu_64_floating_t:
40-
float64_t
41-
float32_t
42-
int64_t
43-
uint64_t
44-
45-
# i64 + u64 + all float types + object
46-
ctypedef fused iu_64_floating_obj_t:
47-
iu_64_floating_t
48-
object

0 commit comments

Comments
 (0)