Skip to content

Commit d7824c2

Browse files
committed
Merge upstream master.
2 parents dfca181 + 72bc92e commit d7824c2

File tree

165 files changed

+1156
-1137
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

165 files changed

+1156
-1137
lines changed

.pre-commit-config.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,20 @@ repos:
1010
- id: flake8
1111
language: python_venv
1212
additional_dependencies: [flake8-comprehensions>=3.1.0]
13+
- id: flake8
14+
name: flake8-pyx
15+
language: python_venv
16+
files: \.(pyx|pxd)$
17+
types:
18+
- file
19+
args: [--append-config=flake8/cython.cfg]
20+
- id: flake8
21+
name: flake8-pxd
22+
language: python_venv
23+
files: \.pxi\.in$
24+
types:
25+
- file
26+
args: [--append-config=flake8/cython-template.cfg]
1327
- repo: https://github.com/pre-commit/mirrors-isort
1428
rev: v4.3.21
1529
hooks:

asv_bench/benchmarks/multiindex_object.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,43 @@ def time_equals_non_object_index(self):
160160
self.mi_large_slow.equals(self.idx_non_object)
161161

162162

163+
class SetOperations:
164+
165+
params = [
166+
("monotonic", "non_monotonic"),
167+
("datetime", "int", "string"),
168+
("intersection", "union", "symmetric_difference"),
169+
]
170+
param_names = ["index_structure", "dtype", "method"]
171+
172+
def setup(self, index_structure, dtype, method):
173+
N = 10 ** 5
174+
level1 = range(1000)
175+
176+
level2 = date_range(start="1/1/2000", periods=N // 1000)
177+
dates_left = MultiIndex.from_product([level1, level2])
178+
179+
level2 = range(N // 1000)
180+
int_left = MultiIndex.from_product([level1, level2])
181+
182+
level2 = tm.makeStringIndex(N // 1000).values
183+
str_left = MultiIndex.from_product([level1, level2])
184+
185+
data = {
186+
"datetime": dates_left,
187+
"int": int_left,
188+
"string": str_left,
189+
}
190+
191+
if index_structure == "non_monotonic":
192+
data = {k: mi[::-1] for k, mi in data.items()}
193+
194+
data = {k: {"left": mi, "right": mi[:-1]} for k, mi in data.items()}
195+
self.left = data[dtype]["left"]
196+
self.right = data[dtype]["right"]
197+
198+
def time_operation(self, index_structure, dtype, method):
199+
getattr(self.left, method)(self.right)
200+
201+
163202
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
6565
flake8 --format="$FLAKE8_FORMAT" .
6666
RET=$(($RET + $?)) ; echo $MSG "DONE"
6767

68-
MSG='Linting .pyx code' ; echo $MSG
69-
flake8 --format="$FLAKE8_FORMAT" pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
68+
MSG='Linting .pyx and .pxd code' ; echo $MSG
69+
flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg
7070
RET=$(($RET + $?)) ; echo $MSG "DONE"
7171

72-
MSG='Linting .pxd and .pxi.in' ; echo $MSG
73-
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
72+
MSG='Linting .pxi.in' ; echo $MSG
73+
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg
7474
RET=$(($RET + $?)) ; echo $MSG "DONE"
7575

7676
echo "flake8-rst --version"

doc/source/development/contributing_docstring.rst

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -937,33 +937,31 @@ classes. This helps us keep docstrings consistent, while keeping things clear
937937
for the user reading. It comes at the cost of some complexity when writing.
938938

939939
Each shared docstring will have a base template with variables, like
940-
``%(klass)s``. The variables filled in later on using the ``Substitution``
941-
decorator. Finally, docstrings can be appended to with the ``Appender``
942-
decorator.
940+
``{klass}``. The variables filled in later on using the ``doc`` decorator.
941+
Finally, docstrings can also be appended to with the ``doc`` decorator.
943942

944943
In this example, we'll create a parent docstring normally (this is like
945944
``pandas.core.generic.NDFrame``. Then we'll have two children (like
946945
``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll
947-
substitute the children's class names in this docstring.
946+
substitute the class names in this docstring.
948947

949948
.. code-block:: python
950949
951950
class Parent:
951+
@doc(klass="Parent")
952952
def my_function(self):
953-
"""Apply my function to %(klass)s."""
953+
"""Apply my function to {klass}."""
954954
...
955955
956956
957957
class ChildA(Parent):
958-
@Substitution(klass="ChildA")
959-
@Appender(Parent.my_function.__doc__)
958+
@doc(Parent.my_function, klass="ChildA")
960959
def my_function(self):
961960
...
962961
963962
964963
class ChildB(Parent):
965-
@Substitution(klass="ChildB")
966-
@Appender(Parent.my_function.__doc__)
964+
@doc(Parent.my_function, klass="ChildB")
967965
def my_function(self):
968966
...
969967
@@ -972,18 +970,16 @@ The resulting docstrings are
972970
.. code-block:: python
973971
974972
>>> print(Parent.my_function.__doc__)
975-
Apply my function to %(klass)s.
973+
Apply my function to Parent.
976974
>>> print(ChildA.my_function.__doc__)
977975
Apply my function to ChildA.
978976
>>> print(ChildB.my_function.__doc__)
979977
Apply my function to ChildB.
980978
981-
Notice two things:
979+
Notice:
982980

983981
1. We "append" the parent docstring to the children docstrings, which are
984982
initially empty.
985-
2. Python decorators are applied inside out. So the order is Append then
986-
Substitution, even though Substitution comes first in the file.
987983

988984
Our files will often contain a module-level ``_shared_doc_kwargs`` with some
989985
common substitution values (things like ``klass``, ``axes``, etc).
@@ -992,14 +988,13 @@ You can substitute and append in one shot with something like
992988

993989
.. code-block:: python
994990
995-
@Appender(template % _shared_doc_kwargs)
991+
@doc(template, **_shared_doc_kwargs)
996992
def my_function(self):
997993
...
998994
999995
where ``template`` may come from a module-level ``_shared_docs`` dictionary
1000996
mapping function names to docstrings. Wherever possible, we prefer using
1001-
``Appender`` and ``Substitution``, since the docstring-writing processes is
1002-
slightly closer to normal.
997+
``doc``, since the docstring-writing processes is slightly closer to normal.
1003998

1004999
See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
10051000
``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``

doc/source/whatsnew/v1.0.2.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,21 @@ Fixed regressions
2828
Bug fixes
2929
~~~~~~~~~
3030

31+
**Categorical**
32+
33+
- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`)
34+
3135
**I/O**
3236

3337
- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
38+
- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
39+
40+
41+
42+
**Experimental dtypes**
43+
44+
- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`).
45+
- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`)
3446

3547
.. ---------------------------------------------------------------------------
3648

doc/source/whatsnew/v1.1.0.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Backwards incompatible API changes
7575
Deprecations
7676
~~~~~~~~~~~~
7777
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
78+
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
7879
-
7980
-
8081

@@ -177,6 +178,16 @@ MultiIndex
177178
index=[["a", "a", "b", "b"], [1, 2, 1, 2]])
178179
# Rows are now ordered as the requested keys
179180
df.loc[(['b', 'a'], [2, 1]), :]
181+
182+
- Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`)
183+
184+
.. ipython:: python
185+
186+
left = pd.MultiIndex.from_arrays([["b", "a"], [2, 1]])
187+
right = pd.MultiIndex.from_arrays([["a", "b", "c"], [1, 2, 3]])
188+
# Common elements are now guaranteed to be ordered by the left side
189+
left.intersection(right, sort=False)
190+
180191
-
181192

182193
I/O

flake8/cython-template.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[flake8]
2+
filename = *.pxi.in
3+
select = E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
4+

flake8/cython.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[flake8]
2+
filename = *.pyx,*.pxd
3+
select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411

pandas/_config/config.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,6 @@ class option_context:
395395
396396
Examples
397397
--------
398-
399398
>>> with option_context('display.max_rows', 10, 'display.max_columns', 5):
400399
... ...
401400
"""
@@ -550,7 +549,6 @@ def _select_options(pat: str) -> List[str]:
550549
551550
if pat=="all", returns all registered options
552551
"""
553-
554552
# short-circuit for exact key
555553
if pat in _registered_options:
556554
return [pat]
@@ -573,7 +571,6 @@ def _get_root(key: str) -> Tuple[Dict[str, Any], str]:
573571

574572
def _is_deprecated(key: str) -> bool:
575573
""" Returns True if the given option has been deprecated """
576-
577574
key = key.lower()
578575
return key in _deprecated_options
579576

@@ -586,7 +583,6 @@ def _get_deprecated_option(key: str):
586583
-------
587584
DeprecatedOption (namedtuple) if key is deprecated, None otherwise
588585
"""
589-
590586
try:
591587
d = _deprecated_options[key]
592588
except KeyError:
@@ -611,7 +607,6 @@ def _translate_key(key: str) -> str:
611607
if key id deprecated and a replacement key defined, will return the
612608
replacement key, otherwise returns `key` as - is
613609
"""
614-
615610
d = _get_deprecated_option(key)
616611
if d:
617612
return d.rkey or key
@@ -627,7 +622,6 @@ def _warn_if_deprecated(key: str) -> bool:
627622
-------
628623
bool - True if `key` is deprecated, False otherwise.
629624
"""
630-
631625
d = _get_deprecated_option(key)
632626
if d:
633627
if d.msg:
@@ -649,7 +643,6 @@ def _warn_if_deprecated(key: str) -> bool:
649643

650644
def _build_option_description(k: str) -> str:
651645
""" Builds a formatted description of a registered option and prints it """
652-
653646
o = _get_registered_option(k)
654647
d = _get_deprecated_option(k)
655648

@@ -674,7 +667,6 @@ def _build_option_description(k: str) -> str:
674667

675668
def pp_options_list(keys: Iterable[str], width=80, _print: bool = False):
676669
""" Builds a concise listing of available options, grouped by prefix """
677-
678670
from textwrap import wrap
679671
from itertools import groupby
680672

@@ -723,8 +715,8 @@ def config_prefix(prefix):
723715
Warning: This is not thread - safe, and won't work properly if you import
724716
the API functions into your module using the "from x import y" construct.
725717
726-
Example:
727-
718+
Example
719+
-------
728720
import pandas._config.config as cf
729721
with cf.config_prefix("display.font"):
730722
cf.register_option("color", "red")
@@ -738,7 +730,6 @@ def config_prefix(prefix):
738730
will register options "display.font.color", "display.font.size", set the
739731
value of "display.font.size"... and so on.
740732
"""
741-
742733
# Note: reset_option relies on set_option, and on key directly
743734
# it does not fit in to this monkey-patching scheme
744735

@@ -801,7 +792,6 @@ def is_instance_factory(_type) -> Callable[[Any], None]:
801792
ValueError if x is not an instance of `_type`
802793
803794
"""
804-
805795
if isinstance(_type, (tuple, list)):
806796
_type = tuple(_type)
807797
type_repr = "|".join(map(str, _type))
@@ -848,7 +838,6 @@ def is_nonnegative_int(value: Optional[int]) -> None:
848838
ValueError
849839
When the value is not None or is a negative integer
850840
"""
851-
852841
if value is None:
853842
return
854843

pandas/_config/localization.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
6161
bool
6262
Whether the passed locale can be set
6363
"""
64-
6564
try:
6665
with set_locale(lc, lc_var=lc_var):
6766
pass

pandas/_libs/lib.pyx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,8 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool:
571571
if PyArray_Check(x) and PyArray_Check(y):
572572
if not array_equivalent_object(x, y):
573573
return False
574+
elif (x is C_NA) ^ (y is C_NA):
575+
return False
574576
elif not (PyObject_RichCompareBool(x, y, Py_EQ) or
575577
(x is None or is_nan(x)) and (y is None or is_nan(y))):
576578
return False
@@ -1005,7 +1007,7 @@ _TYPE_MAP = {
10051007
'complex64': 'complex',
10061008
'complex128': 'complex',
10071009
'c': 'complex',
1008-
'string': 'bytes',
1010+
'string': 'string',
10091011
'S': 'bytes',
10101012
'U': 'string',
10111013
'bool': 'boolean',

pandas/_libs/sparse_op_helper.pxi.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_,
235235
{{dtype}}_t yfill):
236236
cdef:
237237
IntIndex out_index
238-
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
238+
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
239239
int32_t xloc, yloc
240240
int32_t[:] xindices, yindices, out_indices
241241
{{dtype}}_t[:] x, y

pandas/_libs/tslibs/util.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ cdef extern from "numpy/ndarrayobject.h":
4242
bint PyArray_IsIntegerScalar(obj) nogil
4343
bint PyArray_Check(obj) nogil
4444

45-
cdef extern from "numpy/npy_common.h":
45+
cdef extern from "numpy/npy_common.h":
4646
int64_t NPY_MIN_INT64
4747

4848

0 commit comments

Comments
 (0)