Skip to content

Commit bca582e

Browse files
Daniel SaxtonDaniel Saxton
Daniel Saxton
authored and
Daniel Saxton
committed
Merge branch 'master' into bool-idx
2 parents d7fc3b7 + 8a7fbbe commit bca582e

File tree

171 files changed

+2435
-2387
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

171 files changed

+2435
-2387
lines changed

.pre-commit-config.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,20 @@ repos:
1010
- id: flake8
1111
language: python_venv
1212
additional_dependencies: [flake8-comprehensions>=3.1.0]
13+
- id: flake8
14+
name: flake8-pyx
15+
language: python_venv
16+
files: \.(pyx|pxd)$
17+
types:
18+
- file
19+
args: [--append-config=flake8/cython.cfg]
20+
- id: flake8
21+
name: flake8-pxd
22+
language: python_venv
23+
files: \.pxi\.in$
24+
types:
25+
- file
26+
args: [--append-config=flake8/cython-template.cfg]
1327
- repo: https://github.com/pre-commit/mirrors-isort
1428
rev: v4.3.21
1529
hooks:

asv_bench/benchmarks/multiindex_object.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,43 @@ def time_equals_non_object_index(self):
160160
self.mi_large_slow.equals(self.idx_non_object)
161161

162162

163+
class SetOperations:
164+
165+
params = [
166+
("monotonic", "non_monotonic"),
167+
("datetime", "int", "string"),
168+
("intersection", "union", "symmetric_difference"),
169+
]
170+
param_names = ["index_structure", "dtype", "method"]
171+
172+
def setup(self, index_structure, dtype, method):
173+
N = 10 ** 5
174+
level1 = range(1000)
175+
176+
level2 = date_range(start="1/1/2000", periods=N // 1000)
177+
dates_left = MultiIndex.from_product([level1, level2])
178+
179+
level2 = range(N // 1000)
180+
int_left = MultiIndex.from_product([level1, level2])
181+
182+
level2 = tm.makeStringIndex(N // 1000).values
183+
str_left = MultiIndex.from_product([level1, level2])
184+
185+
data = {
186+
"datetime": dates_left,
187+
"int": int_left,
188+
"string": str_left,
189+
}
190+
191+
if index_structure == "non_monotonic":
192+
data = {k: mi[::-1] for k, mi in data.items()}
193+
194+
data = {k: {"left": mi, "right": mi[:-1]} for k, mi in data.items()}
195+
self.left = data[dtype]["left"]
196+
self.right = data[dtype]["right"]
197+
198+
def time_operation(self, index_structure, dtype, method):
199+
getattr(self.left, method)(self.right)
200+
201+
163202
from .pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
6565
flake8 --format="$FLAKE8_FORMAT" .
6666
RET=$(($RET + $?)) ; echo $MSG "DONE"
6767

68-
MSG='Linting .pyx code' ; echo $MSG
69-
flake8 --format="$FLAKE8_FORMAT" pandas --filename=*.pyx --select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
68+
MSG='Linting .pyx and .pxd code' ; echo $MSG
69+
flake8 --format="$FLAKE8_FORMAT" pandas --append-config=flake8/cython.cfg
7070
RET=$(($RET + $?)) ; echo $MSG "DONE"
7171

72-
MSG='Linting .pxd and .pxi.in' ; echo $MSG
73-
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
72+
MSG='Linting .pxi.in' ; echo $MSG
73+
flake8 --format="$FLAKE8_FORMAT" pandas/_libs --append-config=flake8/cython-template.cfg
7474
RET=$(($RET + $?)) ; echo $MSG "DONE"
7575

7676
echo "flake8-rst --version"

doc/source/development/contributing_docstring.rst

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -937,33 +937,31 @@ classes. This helps us keep docstrings consistent, while keeping things clear
937937
for the user reading. It comes at the cost of some complexity when writing.
938938

939939
Each shared docstring will have a base template with variables, like
940-
``%(klass)s``. The variables filled in later on using the ``Substitution``
941-
decorator. Finally, docstrings can be appended to with the ``Appender``
942-
decorator.
940+
``{klass}``. The variables filled in later on using the ``doc`` decorator.
941+
Finally, docstrings can also be appended to with the ``doc`` decorator.
943942

944943
In this example, we'll create a parent docstring normally (this is like
945944
``pandas.core.generic.NDFrame``. Then we'll have two children (like
946945
``pandas.core.series.Series`` and ``pandas.core.frame.DataFrame``). We'll
947-
substitute the children's class names in this docstring.
946+
substitute the class names in this docstring.
948947

949948
.. code-block:: python
950949
951950
class Parent:
951+
@doc(klass="Parent")
952952
def my_function(self):
953-
"""Apply my function to %(klass)s."""
953+
"""Apply my function to {klass}."""
954954
...
955955
956956
957957
class ChildA(Parent):
958-
@Substitution(klass="ChildA")
959-
@Appender(Parent.my_function.__doc__)
958+
@doc(Parent.my_function, klass="ChildA")
960959
def my_function(self):
961960
...
962961
963962
964963
class ChildB(Parent):
965-
@Substitution(klass="ChildB")
966-
@Appender(Parent.my_function.__doc__)
964+
@doc(Parent.my_function, klass="ChildB")
967965
def my_function(self):
968966
...
969967
@@ -972,18 +970,16 @@ The resulting docstrings are
972970
.. code-block:: python
973971
974972
>>> print(Parent.my_function.__doc__)
975-
Apply my function to %(klass)s.
973+
Apply my function to Parent.
976974
>>> print(ChildA.my_function.__doc__)
977975
Apply my function to ChildA.
978976
>>> print(ChildB.my_function.__doc__)
979977
Apply my function to ChildB.
980978
981-
Notice two things:
979+
Notice:
982980

983981
1. We "append" the parent docstring to the children docstrings, which are
984982
initially empty.
985-
2. Python decorators are applied inside out. So the order is Append then
986-
Substitution, even though Substitution comes first in the file.
987983

988984
Our files will often contain a module-level ``_shared_doc_kwargs`` with some
989985
common substitution values (things like ``klass``, ``axes``, etc).
@@ -992,14 +988,13 @@ You can substitute and append in one shot with something like
992988

993989
.. code-block:: python
994990
995-
@Appender(template % _shared_doc_kwargs)
991+
@doc(template, **_shared_doc_kwargs)
996992
def my_function(self):
997993
...
998994
999995
where ``template`` may come from a module-level ``_shared_docs`` dictionary
1000996
mapping function names to docstrings. Wherever possible, we prefer using
1001-
``Appender`` and ``Substitution``, since the docstring-writing processes is
1002-
slightly closer to normal.
997+
``doc``, since the docstring-writing processes is slightly closer to normal.
1003998

1004999
See ``pandas.core.generic.NDFrame.fillna`` for an example template, and
10051000
``pandas.core.series.Series.fillna`` and ``pandas.core.generic.frame.fillna``

doc/source/getting_started/install.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,23 @@ The commands in this table will install pandas for Python 3 from your distributi
163163
to get the newest version of pandas, it's recommended to install using the ``pip`` or ``conda``
164164
methods described above.
165165

166+
Handling ImportErrors
167+
~~~~~~~~~~~~~~~~~~~~~~
168+
169+
If you encounter an ImportError, it usually means that Python couldn't find pandas in the list of available
170+
libraries. Python internally has a list of directories it searches through, to find packages. You can
171+
obtain these directories with::
172+
173+
import sys
174+
sys.path
175+
176+
One way you could be encountering this error is if you have multiple Python installations on your system
177+
and you don't have pandas installed in the Python installation you're currently using.
178+
In Linux/Mac you can run ``which python`` on your terminal and it will tell you which Python installation you're
179+
using. If it's something like "/usr/bin/python", you're using the Python from the system, which is not recommended.
180+
181+
It is highly recommended to use ``conda``, for quick installation and for package and dependency updates.
182+
You can find simple installation instructions for pandas in this document: `installation instructions </getting_started.html>`.
166183

167184
Installing from source
168185
~~~~~~~~~~~~~~~~~~~~~~

doc/source/whatsnew/v1.0.2.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Fixed regression in :meth:`DataFrame.to_excel` when ``columns`` kwarg is passed (:issue:`31677`)
1919
- Fixed regression in :meth:`Series.align` when ``other`` is a DataFrame and ``method`` is not None (:issue:`31785`)
2020
- Fixed regression in :meth:`pandas.core.groupby.RollingGroupby.apply` where the ``raw`` parameter was ignored (:issue:`31754`)
21+
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.Rolling.corr>` when using a time offset (:issue:`31789`)
2122
-
2223

2324
.. ---------------------------------------------------------------------------
@@ -27,9 +28,21 @@ Fixed regressions
2728
Bug fixes
2829
~~~~~~~~~
2930

31+
**Categorical**
32+
33+
- Fixed bug where :meth:`Categorical.from_codes` improperly raised a ``ValueError`` when passed nullable integer codes. (:issue:`31779`)
34+
3035
**I/O**
3136

3237
- Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
38+
- Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
39+
40+
41+
42+
**Experimental dtypes**
43+
44+
- Fix bug in :meth:`DataFrame.convert_dtypes` for columns that were already using the ``"string"`` dtype (:issue:`31731`).
45+
- Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`)
3346

3447
.. ---------------------------------------------------------------------------
3548

doc/source/whatsnew/v1.1.0.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Other enhancements
6969
^^^^^^^^^^^^^^^^^^
7070

7171
- :class:`Styler` may now render CSS more efficiently where multiple cells have the same styling (:issue:`30876`)
72+
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
7273
-
7374
-
7475

@@ -100,6 +101,7 @@ Backwards incompatible API changes
100101
Deprecations
101102
~~~~~~~~~~~~
102103
- Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`)
104+
- :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`)
103105
-
104106
-
105107

@@ -202,6 +204,16 @@ MultiIndex
202204
index=[["a", "a", "b", "b"], [1, 2, 1, 2]])
203205
# Rows are now ordered as the requested keys
204206
df.loc[(['b', 'a'], [2, 1]), :]
207+
208+
- Bug in :meth:`MultiIndex.intersection` was not guaranteed to preserve order when ``sort=False``. (:issue:`31325`)
209+
210+
.. ipython:: python
211+
212+
left = pd.MultiIndex.from_arrays([["b", "a"], [2, 1]])
213+
right = pd.MultiIndex.from_arrays([["a", "b", "c"], [1, 2, 3]])
214+
# Common elements are now guaranteed to be ordered by the left side
215+
left.intersection(right, sort=False)
216+
205217
-
206218

207219
I/O
@@ -219,6 +231,8 @@ Plotting
219231

220232
- :func:`.plot` for line/bar now accepts color by dictonary (:issue:`8193`).
221233
-
234+
- Bug in :meth:`DataFrame.boxplot` and :meth:`DataFrame.plot.boxplot` lost color attributes of ``medianprops``, ``whiskerprops``, ``capprops`` and ``medianprops`` (:issue:`30346`)
235+
222236

223237
Groupby/resample/rolling
224238
^^^^^^^^^^^^^^^^^^^^^^^^

flake8/cython-template.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
[flake8]
2+
filename = *.pxi.in
3+
select = E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
4+

flake8/cython.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[flake8]
2+
filename = *.pyx,*.pxd
3+
select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411

pandas/_config/config.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,6 @@ def _select_options(pat: str) -> List[str]:
550550
551551
if pat=="all", returns all registered options
552552
"""
553-
554553
# short-circuit for exact key
555554
if pat in _registered_options:
556555
return [pat]
@@ -573,7 +572,6 @@ def _get_root(key: str) -> Tuple[Dict[str, Any], str]:
573572

574573
def _is_deprecated(key: str) -> bool:
575574
""" Returns True if the given option has been deprecated """
576-
577575
key = key.lower()
578576
return key in _deprecated_options
579577

@@ -586,7 +584,6 @@ def _get_deprecated_option(key: str):
586584
-------
587585
DeprecatedOption (namedtuple) if key is deprecated, None otherwise
588586
"""
589-
590587
try:
591588
d = _deprecated_options[key]
592589
except KeyError:
@@ -611,7 +608,6 @@ def _translate_key(key: str) -> str:
611608
if key id deprecated and a replacement key defined, will return the
612609
replacement key, otherwise returns `key` as - is
613610
"""
614-
615611
d = _get_deprecated_option(key)
616612
if d:
617613
return d.rkey or key
@@ -627,7 +623,6 @@ def _warn_if_deprecated(key: str) -> bool:
627623
-------
628624
bool - True if `key` is deprecated, False otherwise.
629625
"""
630-
631626
d = _get_deprecated_option(key)
632627
if d:
633628
if d.msg:
@@ -649,7 +644,6 @@ def _warn_if_deprecated(key: str) -> bool:
649644

650645
def _build_option_description(k: str) -> str:
651646
""" Builds a formatted description of a registered option and prints it """
652-
653647
o = _get_registered_option(k)
654648
d = _get_deprecated_option(k)
655649

@@ -674,7 +668,6 @@ def _build_option_description(k: str) -> str:
674668

675669
def pp_options_list(keys: Iterable[str], width=80, _print: bool = False):
676670
""" Builds a concise listing of available options, grouped by prefix """
677-
678671
from textwrap import wrap
679672
from itertools import groupby
680673

@@ -738,7 +731,6 @@ def config_prefix(prefix):
738731
will register options "display.font.color", "display.font.size", set the
739732
value of "display.font.size"... and so on.
740733
"""
741-
742734
# Note: reset_option relies on set_option, and on key directly
743735
# it does not fit in to this monkey-patching scheme
744736

@@ -801,7 +793,6 @@ def is_instance_factory(_type) -> Callable[[Any], None]:
801793
ValueError if x is not an instance of `_type`
802794
803795
"""
804-
805796
if isinstance(_type, (tuple, list)):
806797
_type = tuple(_type)
807798
type_repr = "|".join(map(str, _type))
@@ -848,7 +839,6 @@ def is_nonnegative_int(value: Optional[int]) -> None:
848839
ValueError
849840
When the value is not None or is a negative integer
850841
"""
851-
852842
if value is None:
853843
return
854844

pandas/_config/localization.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ def can_set_locale(lc: str, lc_var: int = locale.LC_ALL) -> bool:
6161
bool
6262
Whether the passed locale can be set
6363
"""
64-
6564
try:
6665
with set_locale(lc, lc_var=lc_var):
6766
pass

pandas/_libs/lib.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1005,7 +1005,7 @@ _TYPE_MAP = {
10051005
'complex64': 'complex',
10061006
'complex128': 'complex',
10071007
'c': 'complex',
1008-
'string': 'bytes',
1008+
'string': 'string',
10091009
'S': 'bytes',
10101010
'U': 'string',
10111011
'bool': 'boolean',

pandas/_libs/sparse_op_helper.pxi.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_,
235235
{{dtype}}_t yfill):
236236
cdef:
237237
IntIndex out_index
238-
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
238+
Py_ssize_t xi = 0, yi = 0, out_i = 0 # fp buf indices
239239
int32_t xloc, yloc
240240
int32_t[:] xindices, yindices, out_indices
241241
{{dtype}}_t[:] x, y

pandas/_libs/tslibs/util.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ cdef extern from "numpy/ndarrayobject.h":
4242
bint PyArray_IsIntegerScalar(obj) nogil
4343
bint PyArray_Check(obj) nogil
4444

45-
cdef extern from "numpy/npy_common.h":
45+
cdef extern from "numpy/npy_common.h":
4646
int64_t NPY_MIN_INT64
4747

4848

0 commit comments

Comments
 (0)