Skip to content

Commit cdbe28d

Browse files
authored
Merge branch 'pandas-dev:main' into main
2 parents 44e3863 + 05d12b5 commit cdbe28d

File tree

88 files changed

+1672
-1342
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+1672
-1342
lines changed

asv_bench/benchmarks/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def time_setitem(self, multiple_chunks):
9393
self.array[i] = "foo"
9494

9595
def time_setitem_list(self, multiple_chunks):
96-
indexer = list(range(0, 50)) + list(range(-50, 0))
96+
indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
9797
self.array[indexer] = ["foo"] * len(indexer)
9898

9999
def time_setitem_slice(self, multiple_chunks):

asv_bench/benchmarks/io/json.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ def time_float_longint_str_lines(self):
294294
class ToJSONMem:
295295
def setup_cache(self):
296296
df = DataFrame([[1]])
297-
frames = {"int": df, "float": df.astype(float)}
297+
df2 = DataFrame(range(8), date_range("1/1/2000", periods=8, freq="T"))
298+
frames = {"int": df, "float": df.astype(float), "datetime": df2}
298299

299300
return frames
300301

@@ -308,5 +309,10 @@ def peakmem_float(self, frames):
308309
for _ in range(100_000):
309310
df.to_json()
310311

312+
def peakmem_time(self, frames):
313+
df = frames["datetime"]
314+
for _ in range(10_000):
315+
df.to_json(orient="table")
316+
311317

312318
from ..pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

+1-11
Original file line numberDiff line numberDiff line change
@@ -85,27 +85,17 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8585

8686
MSG='Partially validate docstrings (RT02)' ; echo $MSG
8787
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \
88-
pandas.Series.align \
89-
pandas.Series.dt.total_seconds \
90-
pandas.Series.cat.rename_categories \
91-
pandas.Series.cat.reorder_categories \
92-
pandas.Series.cat.add_categories \
93-
pandas.Series.cat.remove_categories \
94-
pandas.Series.cat.remove_unused_categories \
9588
pandas.Index.all \
9689
pandas.Index.any \
9790
pandas.MultiIndex.drop \
9891
pandas.DatetimeIndex.to_pydatetime \
9992
pandas.TimedeltaIndex.to_pytimedelta \
100-
pandas.core.groupby.SeriesGroupBy.apply \
101-
pandas.core.groupby.DataFrameGroupBy.apply \
10293
pandas.io.formats.style.Styler.export \
10394
pandas.api.extensions.ExtensionArray.astype \
10495
pandas.api.extensions.ExtensionArray.dropna \
10596
pandas.api.extensions.ExtensionArray.isna \
10697
pandas.api.extensions.ExtensionArray.repeat \
107-
pandas.api.extensions.ExtensionArray.unique \
108-
pandas.DataFrame.align
98+
pandas.api.extensions.ExtensionArray.unique
10999
RET=$(($RET + $?)) ; echo $MSG "DONE"
110100

111101
fi

doc/source/development/maintaining.rst

+13-6
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,8 @@ which will be triggered when the tag is pushed.
458458
git checkout master
459459
git pull --ff-only upstream master
460460
git checkout -B RLS-<version>
461-
sed -i 's/BUILD_COMMIT: "v.*/BUILD_COMMIT: "'<version>'"/' azure/windows.yml azure/posix.yml
462-
sed -i 's/BUILD_COMMIT="v.*/BUILD_COMMIT="'<version>'"/' .travis.yml
461+
sed -i 's/BUILD_COMMIT: "v.*/BUILD_COMMIT: "'v<version>'"/' azure/windows.yml azure/posix.yml
462+
sed -i 's/BUILD_COMMIT="v.*/BUILD_COMMIT="'v<version>'"/' .travis.yml
463463
git commit -am "RLS <version>"
464464
git push -u origin RLS-<version>
465465

@@ -474,14 +474,21 @@ which will be triggered when the tag is pushed.
474474
Post-Release
475475
````````````
476476

477-
1. Close the milestone and the issue for the released version.
477+
1. Update symlink to stable documentation by logging in to our web server, and
478+
editing ``/var/www/html/pandas-docs/stable`` to point to ``version/<latest-version>``.
478479

479-
2. Create a new issue for the next release, with the estimated date or release.
480+
2. If releasing a major or minor release, open a PR in our source code to update
481+
``web/pandas/versions.json``, to have the desired versions in the documentation
482+
dropdown menu.
480483

481-
3. Open a PR with the placeholder for the release notes of the next version. See
484+
3. Close the milestone and the issue for the released version.
485+
486+
4. Create a new issue for the next release, with the estimated date of release.
487+
488+
5. Open a PR with the placeholder for the release notes of the next version. See
482489
for example [the PR for 1.5.3](https://github.com/pandas-dev/pandas/pull/49843/files).
483490

484-
4. Announce the new release in the official channels (use previous announcements
491+
6. Announce the new release in the official channels (use previous announcements
485492
for reference):
486493

487494
- The pandas-dev and pydata mailing lists

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.5
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.5.4
2728
v1.5.3
2829
v1.5.2
2930
v1.5.1

doc/source/whatsnew/v1.5.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ Other
4343
Contributors
4444
~~~~~~~~~~~~
4545

46-
.. contributors:: v1.5.1..v1.5.2|HEAD
46+
.. contributors:: v1.5.1..v1.5.2

doc/source/whatsnew/v1.5.3.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,4 @@ Other
5555
Contributors
5656
~~~~~~~~~~~~
5757

58-
.. contributors:: v1.5.2..v1.5.3|HEAD
58+
.. contributors:: v1.5.2..v1.5.3

doc/source/whatsnew/v1.5.4.rst

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
.. _whatsnew_154:
2+
3+
What's new in 1.5.4 (March XX, 2023)
4+
--------------------------------------
5+
6+
These are the changes in pandas 1.5.4. See :ref:`release` for a full changelog
7+
including other versions of pandas.
8+
9+
{{ header }}
10+
11+
.. ---------------------------------------------------------------------------
12+
.. _whatsnew_154.regressions:
13+
14+
Fixed regressions
15+
~~~~~~~~~~~~~~~~~
16+
-
17+
18+
.. ---------------------------------------------------------------------------
19+
.. _whatsnew_154.bug_fixes:
20+
21+
Bug fixes
22+
~~~~~~~~~
23+
-
24+
25+
.. ---------------------------------------------------------------------------
26+
.. _whatsnew_154.other:
27+
28+
Other
29+
~~~~~
30+
-
31+
32+
.. ---------------------------------------------------------------------------
33+
.. _whatsnew_154.contributors:
34+
35+
Contributors
36+
~~~~~~~~~~~~
37+
38+
.. contributors:: v1.5.3..v1.5.4|HEAD

doc/source/whatsnew/v2.0.0.rst

+41-1
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ Other enhancements
161161
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
162162
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
163163
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
164+
- :meth:`Series.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`48304`)
164165
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
165166
- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
166167
- Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
@@ -453,6 +454,37 @@ Now, the axes return an empty :class:`RangeIndex`.
453454
pd.Series().index
454455
pd.DataFrame().axes
455456
457+
.. _whatsnew_200.api_breaking.to_latex:
458+
459+
DataFrame to LaTeX has a new render engine
460+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
461+
462+
The existing :meth:`DataFrame.to_latex` has been restructured to utilise the
463+
extended implementation previously available under :meth:`.Styler.to_latex`.
464+
The arguments signature is similar, albeit ``col_space`` has been removed since
465+
it is ignored by LaTeX engines. This render engine also requires ``jinja2`` as a
466+
dependency which needs to be installed, since rendering is based upon jinja2 templates.
467+
468+
The pandas options below are no longer used and will be removed in future releases.
469+
The alternative options giving similar functionality are indicated below:
470+
471+
- ``display.latex.escape``: replaced with ``styler.format.escape``,
472+
- ``display.latex.longtable``: replaced with ``styler.latex.environment``,
473+
- ``display.latex.multicolumn``, ``display.latex.multicolumn_format`` and
474+
``display.latex.multirow``: replaced with ``styler.sparse.rows``,
475+
``styler.sparse.columns``, ``styler.latex.multirow_align`` and
476+
``styler.latex.multicol_align``,
477+
- ``display.latex.repr``: replaced with ``styler.render.repr``,
478+
- ``display.max_rows`` and ``display.max_columns``: replace with
479+
``styler.render.max_rows``, ``styler.render.max_columns`` and
480+
``styler.render.max_elements``.
481+
482+
Note that the behaviour of ``_repr_latex_`` is also changed. Previously
483+
setting ``display.latex.repr`` would generate LaTeX only when using nbconvert for a
484+
JupyterNotebook, and not when the user is running the notebook. Now the
485+
``styler.render.repr`` option allows control of the specific output
486+
within JupyterNotebooks for operations (not just on nbconvert). See :issue:`39911`.
487+
456488
.. _whatsnew_200.api_breaking.deps:
457489

458490
Increased minimum versions for dependencies
@@ -618,6 +650,7 @@ Removal of prior version deprecations/changes
618650
- Removed deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` (:issue:`49397`)
619651
- Removed deprecated :meth:`.Styler.where` (:issue:`49397`)
620652
- Removed deprecated :meth:`.Styler.render` (:issue:`49397`)
653+
- Removed deprecated argument ``col_space`` in :meth:`DataFrame.to_latex` (:issue:`47970`)
621654
- Removed deprecated argument ``null_color`` in :meth:`.Styler.highlight_null` (:issue:`49397`)
622655
- Removed deprecated argument ``check_less_precise`` in :meth:`.testing.assert_frame_equal`, :meth:`.testing.assert_extension_array_equal`, :meth:`.testing.assert_series_equal`, :meth:`.testing.assert_index_equal` (:issue:`30562`)
623656
- Removed deprecated ``null_counts`` argument in :meth:`DataFrame.info`. Use ``show_counts`` instead (:issue:`37999`)
@@ -792,6 +825,7 @@ Removal of prior version deprecations/changes
792825
- Changed behavior of comparison of ``NaT`` with a ``datetime.date`` object; these now raise on inequality comparisons (:issue:`39196`)
793826
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
794827
- Changed behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
828+
- Changed behaviour of :meth:`DataFrame.to_latex` to now use the Styler implementation via :meth:`.Styler.to_latex` (:issue:`47970`)
795829
- Changed behavior of :meth:`Series.__setitem__` with an integer key and a :class:`Float64Index` when the key is not present in the index; previously we treated the key as positional (behaving like ``series.iloc[key] = val``), now we treat it is a label (behaving like ``series.loc[key] = val``), consistent with :meth:`Series.__getitem__`` behavior (:issue:`33469`)
796830
- Removed ``na_sentinel`` argument from :func:`factorize`, :meth:`.Index.factorize`, and :meth:`.ExtensionArray.factorize` (:issue:`47157`)
797831
- Changed behavior of :meth:`Series.diff` and :meth:`DataFrame.diff` with :class:`ExtensionDtype` dtypes whose arrays do not implement ``diff``, these now raise ``TypeError`` rather than casting to numpy (:issue:`31025`)
@@ -850,7 +884,7 @@ Performance improvements
850884
- Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
851885
- Performance improvement in :meth:`~arrays.IntervalArray.from_tuples` (:issue:`50620`)
852886
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
853-
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
887+
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` (:issue:`50248`, :issue:`50632`)
854888
- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
855889
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`49973`)
856890
- Performance improvement when parsing strings to :class:`BooleanDtype` (:issue:`50613`)
@@ -868,11 +902,15 @@ Performance improvements
868902
- Performance improvement in :func:`merge` when not merging on the index - the new index will now be :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`49478`)
869903
- Performance improvement in :meth:`DataFrame.to_dict` and :meth:`Series.to_dict` when using any non-object dtypes (:issue:`46470`)
870904
- Performance improvement in :func:`read_html` when there are multiple tables (:issue:`49929`)
905+
- Performance improvement in :class:`Period` constructor when constructing from a string or integer (:issue:`38312`)
871906
- Performance improvement in :func:`to_datetime` when using ``'%Y%m%d'`` format (:issue:`17410`)
872907
- Performance improvement in :func:`to_datetime` when format is given or can be inferred (:issue:`50465`)
908+
- Performance improvement in :meth:`Series.median` for nullable dtypes (:issue:`50838`)
873909
- Performance improvement in :func:`read_csv` when passing :func:`to_datetime` lambda-function to ``date_parser`` and inputs have mixed timezone offsetes (:issue:`35296`)
910+
- Performance improvement in :func:`isna` and :func:`isnull` (:issue:`50658`)
874911
- Performance improvement in :meth:`.SeriesGroupBy.value_counts` with categorical dtype (:issue:`46202`)
875912
- Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
913+
- Fixed a memory leak in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when serializing datetimes and timedeltas (:issue:`40443`)
876914

877915
.. ---------------------------------------------------------------------------
878916
.. _whatsnew_200.bug_fixes:
@@ -1020,8 +1058,10 @@ I/O
10201058
- Bug in :meth:`DataFrame.to_string` ignoring float formatter for extension arrays (:issue:`39336`)
10211059
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
10221060
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
1061+
- Bug in :func:`read_csv` unnecessarily overflowing for extension array dtype when containing ``NA`` (:issue:`32134`)
10231062
- Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`)
10241063
- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
1064+
- Bug in :func:`read_xml` where file-like objects failed when iterparse is used (:issue:`50641`)
10251065

10261066
Period
10271067
^^^^^^

pandas/_libs/algos.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1366,7 +1366,7 @@ def rank_2d(
13661366
nan_fill_val = get_rank_nan_fill_val(nans_rank_highest, <numeric_object_t>0)
13671367

13681368
if numeric_object_t is object:
1369-
mask = missing.isnaobj2d(values).view(np.uint8)
1369+
mask = missing.isnaobj(values).view(np.uint8)
13701370
elif numeric_object_t is float64_t or numeric_object_t is float32_t:
13711371
mask = np.isnan(values).view(np.uint8)
13721372
else:

pandas/_libs/missing.pyi

-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,5 @@ def isposinf_scalar(val: object) -> bool: ...
1313
def isneginf_scalar(val: object) -> bool: ...
1414
def checknull(val: object, inf_as_na: bool = ...) -> bool: ...
1515
def isnaobj(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
16-
def isnaobj2d(arr: np.ndarray, inf_as_na: bool = ...) -> npt.NDArray[np.bool_]: ...
1716
def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
1817
def is_float_nan(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

pandas/_libs/missing.pyx

+15-47
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ from sys import maxsize
44

55
cimport cython
66
from cython cimport Py_ssize_t
7+
78
import numpy as np
89

910
cimport numpy as cnp
1011
from numpy cimport (
12+
flatiter,
1113
float64_t,
1214
int64_t,
1315
ndarray,
@@ -197,56 +199,22 @@ cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):
197199
result : ndarray (dtype=np.bool_)
198200
"""
199201
cdef:
200-
Py_ssize_t i, n
202+
Py_ssize_t i, n = arr.size
201203
object val
202-
ndarray[uint8_t] result
203-
204-
assert arr.ndim == 1, "'arr' must be 1-D."
205-
206-
n = len(arr)
207-
result = np.empty(n, dtype=np.uint8)
208-
for i in range(n):
209-
val = arr[i]
210-
result[i] = checknull(val, inf_as_na=inf_as_na)
211-
return result.view(np.bool_)
212-
213-
214-
@cython.wraparound(False)
215-
@cython.boundscheck(False)
216-
def isnaobj2d(arr: ndarray, inf_as_na: bool = False) -> ndarray:
217-
"""
218-
Return boolean mask denoting which elements of a 2-D array are na-like,
219-
according to the criteria defined in `checknull`:
220-
- None
221-
- nan
222-
- NaT
223-
- np.datetime64 representation of NaT
224-
- np.timedelta64 representation of NaT
225-
- NA
226-
- Decimal("NaN")
227-
228-
Parameters
229-
----------
230-
arr : ndarray
231-
232-
Returns
233-
-------
234-
result : ndarray (dtype=np.bool_)
235-
"""
236-
cdef:
237-
Py_ssize_t i, j, n, m
238-
object val
239-
ndarray[uint8_t, ndim=2] result
240-
241-
assert arr.ndim == 2, "'arr' must be 2-D."
204+
bint is_null
205+
ndarray result = np.empty((<object>arr).shape, dtype=np.uint8)
206+
flatiter it = cnp.PyArray_IterNew(arr)
207+
flatiter it2 = cnp.PyArray_IterNew(result)
242208

243-
n, m = (<object>arr).shape
244-
result = np.zeros((n, m), dtype=np.uint8)
245209
for i in range(n):
246-
for j in range(m):
247-
val = arr[i, j]
248-
if checknull(val, inf_as_na=inf_as_na):
249-
result[i, j] = 1
210+
# The PyArray_GETITEM and PyArray_ITER_NEXT are faster
211+
# equivalents to `val = values[i]`
212+
val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))
213+
cnp.PyArray_ITER_NEXT(it)
214+
is_null = checknull(val, inf_as_na=inf_as_na)
215+
# Dereference pointer (set value)
216+
(<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>is_null
217+
cnp.PyArray_ITER_NEXT(it2)
250218
return result.view(np.bool_)
251219

252220

pandas/_libs/src/ujson/python/objToJSON.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -350,13 +350,15 @@ static char *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc,
350350
static char *NpyDateTimeToIsoCallback(JSOBJ Py_UNUSED(unused),
351351
JSONTypeContext *tc, size_t *len) {
352352
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
353-
return int64ToIso(GET_TC(tc)->longValue, base, len);
353+
GET_TC(tc)->cStr = int64ToIso(GET_TC(tc)->longValue, base, len);
354+
return GET_TC(tc)->cStr;
354355
}
355356

356357
/* JSON callback. returns a char* and mutates the pointer to *len */
357358
static char *NpyTimeDeltaToIsoCallback(JSOBJ Py_UNUSED(unused),
358359
JSONTypeContext *tc, size_t *len) {
359-
return int64ToIsoDuration(GET_TC(tc)->longValue, len);
360+
GET_TC(tc)->cStr = int64ToIsoDuration(GET_TC(tc)->longValue, len);
361+
return GET_TC(tc)->cStr;
360362
}
361363

362364
/* JSON callback */

0 commit comments

Comments
 (0)