Skip to content

Commit f3b53ad

Browse files
committed
Merge branch 'main' into pandas-devgh-52343-timestamp-from-positional
2 parents b4c389e + faeedad commit f3b53ad

File tree

140 files changed

+1618
-665
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

140 files changed

+1618
-665
lines changed

.pre-commit-config.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ repos:
2424
hooks:
2525
- id: black
2626
- repo: https://github.com/astral-sh/ruff-pre-commit
27-
rev: v0.0.285
27+
rev: v0.0.287
2828
hooks:
2929
- id: ruff
3030
args: [--exit-non-zero-on-fix]
@@ -34,7 +34,7 @@ repos:
3434
alias: ruff-selected-autofixes
3535
args: [--select, "ANN001,ANN204", --fix-only, --exit-non-zero-on-fix]
3636
- repo: https://github.com/jendrikseipp/vulture
37-
rev: 'v2.7'
37+
rev: 'v2.9.1'
3838
hooks:
3939
- id: vulture
4040
entry: python scripts/run_vulture.py
@@ -84,7 +84,7 @@ repos:
8484
'--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
8585
]
8686
- repo: https://github.com/pylint-dev/pylint
87-
rev: v3.0.0a6
87+
rev: v3.0.0a7
8888
hooks:
8989
- id: pylint
9090
stages: [manual]
@@ -124,7 +124,7 @@ repos:
124124
types: [text] # overwrite types: [rst]
125125
types_or: [python, rst]
126126
- repo: https://github.com/sphinx-contrib/sphinx-lint
127-
rev: v0.6.7
127+
rev: v0.6.8
128128
hooks:
129129
- id: sphinx-lint
130130
- repo: local

asv_bench/benchmarks/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def time_setitem(self, multiple_chunks):
9090
self.array[i] = "foo"
9191

9292
def time_setitem_list(self, multiple_chunks):
93-
indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
93+
indexer = list(range(50)) + list(range(-1000, 0, 50))
9494
self.array[indexer] = ["foo"] * len(indexer)
9595

9696
def time_setitem_slice(self, multiple_chunks):

asv_bench/benchmarks/frame_methods.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -693,20 +693,30 @@ def time_frame_sort_values(self, ascending):
693693
self.df.sort_values(by="A", ascending=ascending)
694694

695695

696-
class SortIndexByColumns:
697-
def setup(self):
696+
class SortMultiKey:
697+
params = [True, False]
698+
param_names = ["monotonic"]
699+
700+
def setup(self, monotonic):
698701
N = 10000
699702
K = 10
700-
self.df = DataFrame(
703+
df = DataFrame(
701704
{
702705
"key1": tm.makeStringIndex(N).values.repeat(K),
703706
"key2": tm.makeStringIndex(N).values.repeat(K),
704707
"value": np.random.randn(N * K),
705708
}
706709
)
710+
if monotonic:
711+
df = df.sort_values(["key1", "key2"])
712+
self.df_by_columns = df
713+
self.df_by_index = df.set_index(["key1", "key2"])
714+
715+
def time_sort_values(self, monotonic):
716+
self.df_by_columns.sort_values(by=["key1", "key2"])
707717

708-
def time_frame_sort_values_by_columns(self):
709-
self.df.sort_values(by=["key1", "key2"])
718+
def time_sort_index(self, monotonic):
719+
self.df_by_index.sort_index()
710720

711721

712722
class Quantile:

asv_bench/benchmarks/join_merge.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -360,14 +360,14 @@ class MergeCategoricals:
360360
def setup(self):
361361
self.left_object = DataFrame(
362362
{
363-
"X": np.random.choice(range(0, 10), size=(10000,)),
363+
"X": np.random.choice(range(10), size=(10000,)),
364364
"Y": np.random.choice(["one", "two", "three"], size=(10000,)),
365365
}
366366
)
367367

368368
self.right_object = DataFrame(
369369
{
370-
"X": np.random.choice(range(0, 10), size=(10000,)),
370+
"X": np.random.choice(range(10), size=(10000,)),
371371
"Z": np.random.choice(["jjj", "kkk", "sss"], size=(10000,)),
372372
}
373373
)

doc/cheatsheet/README.md

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Pandas Cheat Sheet
2+
3+
The Pandas Cheat Sheet was created using Microsoft Powerpoint 2013.
4+
To create the PDF version, within Powerpoint, simply do a "Save As"
5+
and pick "PDF" as the format.
6+
7+
This cheat sheet, originally written by Irv Lustig, [Princeton Consultants](https://www.princetonoptimization.com/), was inspired by the [RStudio Data Wrangling Cheatsheet](https://www.rstudio.com/wp-content/uploads/2015/02/data-wrangling-cheatsheet.pdf).
8+
9+
| Topic | PDF | PPT |
10+
|------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
11+
| Pandas_Cheat_Sheet | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
12+
| Pandas_Cheat_Sheet_JA | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://github.com/pandas-dev/pandas/blob/main/doc/cheatsheet/Pandas_Cheat_Sheet_JA.pptx" target="_parent"><img src="https://img.shields.io/badge/Open in PPT-B7472A?style=flat-square&logo=microsoft-powerpoint&logoColor=white"/></a> |
13+
14+
15+
**Alternative**
16+
17+
Alternatively, if you want to complement your learning, you can use the Pandas Cheat sheets
18+
developed by [DataCamp](https://www.datacamp.com/) in "PDF", "Google Colab" and "Streamlit" formats.
19+
20+
| Topic | PDF | Streamlit | Google Colab |
21+
|-------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
22+
| Pandas | <a href="https://github.com/fralfaro/DS-Cheat-Sheets/blob/main/docs/files/pandas_cs.pdf" target="_parent"><img src="https://img.shields.io/badge/Open in PDF-%23FF0000.svg?style=flat-square&logo=adobe&logoColor=white"/></a> | <a href="https://ds-cheat-sheets-pandas.streamlit.app/" target="_parent"><img src="https://static.streamlit.io/badges/streamlit_badge_black_white.svg"/></a> | <a href="https://colab.research.google.com/github/fralfaro/DS-Cheat-Sheets/blob/main/docs/examples/pandas/pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a> |

doc/cheatsheet/README.txt

-8
This file was deleted.

doc/make.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,10 @@ def _get_page_title(self, page):
159159
Open the rst file `page` and extract its title.
160160
"""
161161
fname = os.path.join(SOURCE_PATH, f"{page}.rst")
162-
option_parser = docutils.frontend.OptionParser(
163-
components=(docutils.parsers.rst.Parser,)
162+
doc = docutils.utils.new_document(
163+
"<doc>",
164+
docutils.frontend.get_default_settings(docutils.parsers.rst.Parser),
164165
)
165-
doc = docutils.utils.new_document("<doc>", option_parser.get_default_values())
166166
with open(fname, encoding="utf-8") as f:
167167
data = f.read()
168168

doc/source/development/debugging_extensions.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,16 @@ By default building pandas from source will generate a release build. To generat
2121

2222
pip install -ve . --no-build-isolation --config-settings=builddir="debug" --config-settings=setup-args="-Dbuildtype=debug"
2323

24+
.. note::
25+
26+
conda environements update CFLAGS/CPPFLAGS with flags that are geared towards generating releases. If using conda, you may need to set ``CFLAGS="$CFLAGS -O0"`` and ``CPPFLAGS="$CPPFLAGS -O0"`` to ensure optimizations are turned off for debugging
27+
2428
By specifying ``builddir="debug"`` all of the targets will be built and placed in the debug directory relative to the project root. This helps to keep your debug and release artifacts separate; you are of course able to choose a different directory name or omit altogether if you do not care to separate build types.
2529

2630
Editor support
2731
--------------
2832

29-
The meson build system generates a `compilation database <https://clang.llvm.org/docs/JSONCompilationDatabase.html>`_ automatically and places it in the build directory. Many language servers and IDEs can use this information to provide code-completion, go-to-defintion and error checking support as you type.
33+
The meson build system generates a `compilation database <https://clang.llvm.org/docs/JSONCompilationDatabase.html>`_ automatically and places it in the build directory. Many language servers and IDEs can use this information to provide code-completion, go-to-definition and error checking support as you type.
3034

3135
How each language server / IDE chooses to look for the compilation database may vary. When in doubt you may want to create a symlink at the root of the project that points to the compilation database in your build directory. Assuming you used *debug* as your directory name, you can run::
3236

doc/source/getting_started/intro_tutorials/01_table_oriented.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ between square brackets ``[]``.
106106
</ul>
107107

108108
.. note::
109-
If you are familiar to Python
109+
If you are familiar with Python
110110
:ref:`dictionaries <python:tut-dictionaries>`, the selection of a
111-
single column is very similar to selection of dictionary values based on
111+
single column is very similar to the selection of dictionary values based on
112112
the key.
113113

114114
You can create a ``Series`` from scratch as well:

doc/source/user_guide/io.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1811,8 +1811,8 @@ Writing JSON
18111811
A ``Series`` or ``DataFrame`` can be converted to a valid JSON string. Use ``to_json``
18121812
with optional parameters:
18131813

1814-
* ``path_or_buf`` : the pathname or buffer to write the output
1815-
This can be ``None`` in which case a JSON string is returned
1814+
* ``path_or_buf`` : the pathname or buffer to write the output.
1815+
This can be ``None`` in which case a JSON string is returned.
18161816
* ``orient`` :
18171817

18181818
``Series``:

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 2.1
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v2.1.1
2728
v2.1.0
2829

2930
Version 2.0

doc/source/whatsnew/v0.15.2.rst

+49-13
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,61 @@ API changes
2424
- Indexing in ``MultiIndex`` beyond lex-sort depth is now supported, though
2525
a lexically sorted index will have a better performance. (:issue:`2646`)
2626

27-
.. ipython:: python
28-
:okexcept:
29-
:okwarning:
27+
.. code-block:: ipython
28+
29+
In [1]: df = pd.DataFrame({'jim':[0, 0, 1, 1],
30+
...: 'joe':['x', 'x', 'z', 'y'],
31+
...: 'jolie':np.random.rand(4)}).set_index(['jim', 'joe'])
32+
...:
3033
31-
df = pd.DataFrame({'jim':[0, 0, 1, 1],
32-
'joe':['x', 'x', 'z', 'y'],
33-
'jolie':np.random.rand(4)}).set_index(['jim', 'joe'])
34-
df
35-
df.index.lexsort_depth
34+
In [2]: df
35+
Out[2]:
36+
jolie
37+
jim joe
38+
0 x 0.126970
39+
x 0.966718
40+
1 z 0.260476
41+
y 0.897237
42+
43+
[4 rows x 1 columns]
44+
45+
In [3]: df.index.lexsort_depth
46+
Out[3]: 1
3647
3748
# in prior versions this would raise a KeyError
3849
# will now show a PerformanceWarning
39-
df.loc[(1, 'z')]
50+
In [4]: df.loc[(1, 'z')]
51+
Out[4]:
52+
jolie
53+
jim joe
54+
1 z 0.260476
55+
56+
[1 rows x 1 columns]
4057
4158
# lexically sorting
42-
df2 = df.sort_index()
43-
df2
44-
df2.index.lexsort_depth
45-
df2.loc[(1,'z')]
59+
In [5]: df2 = df.sort_index()
60+
61+
In [6]: df2
62+
Out[6]:
63+
jolie
64+
jim joe
65+
0 x 0.126970
66+
x 0.966718
67+
1 y 0.897237
68+
z 0.260476
69+
70+
[4 rows x 1 columns]
71+
72+
In [7]: df2.index.lexsort_depth
73+
Out[7]: 2
74+
75+
In [8]: df2.loc[(1,'z')]
76+
Out[8]:
77+
jolie
78+
jim joe
79+
1 z 0.260476
80+
81+
[1 rows x 1 columns]
4682
4783
- Bug in unique of Series with ``category`` dtype, which returned all categories regardless
4884
whether they were "used" or not (see :issue:`8559` for the discussion).

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ value. (:issue:`17054`)
286286

287287
.. ipython:: python
288288
289+
from io import StringIO
289290
result = pd.read_html(StringIO("""
290291
<table>
291292
<thead>

doc/source/whatsnew/v2.1.1.rst

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
.. _whatsnew_211:
2+
3+
What's new in 2.1.1 (September XX, 2023)
4+
----------------------------------------
5+
6+
These are the changes in pandas 2.1.1. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
.. _whatsnew_211.regressions:
13+
14+
Fixed regressions
15+
~~~~~~~~~~~~~~~~~
16+
- Fixed regression in :func:`concat` when :class:`DataFrame` 's have two different extension dtypes (:issue:`54848`)
17+
- Fixed regression in :func:`merge` when merging over a PyArrow string index (:issue:`54894`)
18+
- Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`)
19+
- Fixed regression in :func:`read_csv` when ``delim_whitespace`` is True (:issue:`54918`, :issue:`54931`)
20+
- Fixed regression in :meth:`.GroupBy.get_group` raising for ``axis=1`` (:issue:`54858`)
21+
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
22+
- Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
23+
- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
24+
- Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
25+
- Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)
26+
- Fixed regression in :meth:`Series.interpolate` raising when ``fill_value`` was given (:issue:`54920`)
27+
- Fixed regression in :meth:`Series.value_counts` raising for numeric data if ``bins`` was specified (:issue:`54857`)
28+
- Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`)
29+
30+
.. ---------------------------------------------------------------------------
31+
.. _whatsnew_211.bug_fixes:
32+
33+
Bug fixes
34+
~~~~~~~~~
35+
- Fixed bug for :class:`ArrowDtype` raising ``NotImplementedError`` for fixed-size list (:issue:`55000`)
36+
- Fixed bug in :meth:`DataFrame.stack` with ``future_stack=True`` and columns a non-:class:`MultiIndex` consisting of tuples (:issue:`54948`)
37+
- Fixed bug in :meth:`Series.pct_change` and :meth:`DataFrame.pct_change` showing unnecessary ``FutureWarning`` (:issue:`54981`)
38+
39+
.. ---------------------------------------------------------------------------
40+
.. _whatsnew_211.other:
41+
42+
Other
43+
~~~~~
44+
-
45+
46+
.. ---------------------------------------------------------------------------
47+
.. _whatsnew_211.contributors:
48+
49+
Contributors
50+
~~~~~~~~~~~~

doc/source/whatsnew/v2.2.0.rst

+6-3
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ Deprecations
145145
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
146146
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
147147
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
148+
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
148149
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
149150
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
150151
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
@@ -157,19 +158,21 @@ Deprecations
157158

158159
Performance improvements
159160
~~~~~~~~~~~~~~~~~~~~~~~~
161+
- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
162+
- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
160163
- Performance improvement when indexing with more than 4 keys (:issue:`54550`)
161-
-
162164

163165
.. ---------------------------------------------------------------------------
164166
.. _whatsnew_220.bug_fixes:
165167

166168
Bug fixes
167169
~~~~~~~~~
168170
- Bug in :class:`AbstractHolidayCalendar` where timezone data was not propagated when computing holiday observances (:issue:`54580`)
171+
- Bug in :class:`pandas.core.window.Rolling` where duplicate datetimelike indexes are treated as consecutive rather than equal with ``closed='left'`` and ``closed='neither'`` (:issue:`20712`)
169172

170173
Categorical
171174
^^^^^^^^^^^
172-
-
175+
- :meth:`Categorical.isin` raising ``InvalidIndexError`` for categorical containing overlapping :class:`Interval` values (:issue:`34974`)
173176
-
174177

175178
Datetimelike
@@ -243,7 +246,7 @@ Groupby/resample/rolling
243246

244247
Reshaping
245248
^^^^^^^^^
246-
-
249+
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
247250
-
248251

249252
Sparse

pandas/_libs/meson.build

+2-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ libs_sources = {
6969
'index': {'sources': ['index.pyx', _index_class_helper]},
7070
'indexing': {'sources': ['indexing.pyx']},
7171
'internals': {'sources': ['internals.pyx']},
72-
'interval': {'sources': ['interval.pyx', _intervaltree_helper]},
72+
'interval': {'sources': ['interval.pyx', _intervaltree_helper],
73+
'deps': _khash_primitive_helper_dep},
7374
'join': {'sources': ['join.pyx', _khash_primitive_helper],
7475
'deps': _khash_primitive_helper_dep},
7576
'lib': {'sources': ['lib.pyx', 'src/parser/tokenizer.c']},

pandas/_libs/src/parser/tokenizer.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,8 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
664664
((!self->delim_whitespace && c == ' ' && self->skipinitialspace))
665665

666666
// applied when in a field
667-
#define IS_DELIMITER(c) ((c == delimiter) || (delim_whitespace && isblank(c)))
667+
#define IS_DELIMITER(c) \
668+
((!delim_whitespace && c == delimiter) || (delim_whitespace && isblank(c)))
668669

669670
#define _TOKEN_CLEANUP() \
670671
self->stream_len = slen; \

0 commit comments

Comments
 (0)