Skip to content

Commit 3d63f21

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 05b8503 + adfc78b commit 3d63f21

File tree

83 files changed

+504
-425
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+504
-425
lines changed

asv_bench/benchmarks/arithmetic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def setup(self, op, shape):
144144
# should already be the case, but just to be sure
145145
df._consolidate_inplace()
146146

147-
# TODO: GH#33198 the setting here shoudlnt need two steps
147+
# TODO: GH#33198 the setting here shouldn't need two steps
148148
arr1 = np.random.randn(n_rows, max(n_cols // 4, 3)).astype("f8")
149149
arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8")
150150
arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8")

asv_bench/benchmarks/io/csv.py

+12
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,18 @@ def time_frame_date_formatting(self):
6767
self.data.to_csv(self.fname, date_format="%Y%m%d")
6868

6969

70+
class ToCSVDatetimeIndex(BaseIO):
71+
72+
fname = "__test__.csv"
73+
74+
def setup(self):
75+
rng = date_range("2000", periods=100_000, freq="S")
76+
self.data = DataFrame({"a": 1}, index=rng)
77+
78+
def time_frame_date_formatting_index(self):
79+
self.data.to_csv(self.fname, date_format="%Y-%m-%d %H:%M:%S")
80+
81+
7082
class ToCSVDatetimeBig(BaseIO):
7183

7284
fname = "__test__.csv"

ci/deps/actions-38-db.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ dependencies:
1212
- pytest-cov>=2.10.1 # this is only needed in the coverage build, ref: GH 35737
1313

1414
# pandas dependencies
15-
- aiobotocore<2.0.0
15+
- aiobotocore<2.0.0 # GH#44311 pinned to fix docbuild
1616
- beautifulsoup4
1717
- boto3
1818
- botocore>=1.11

doc/source/whatsnew/v1.4.0.rst

+6
Original file line numberDiff line numberDiff line change
@@ -537,7 +537,9 @@ Other Deprecations
537537
- Deprecated passing arguments as positional for :func:`read_fwf` other than ``filepath_or_buffer`` (:issue:`41485`):
538538
- Deprecated passing ``skipna=None`` for :meth:`DataFrame.mad` and :meth:`Series.mad`, pass ``skipna=True`` instead (:issue:`44580`)
539539
- Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`)
540+
- Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`)
540541
- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`)
542+
- Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
541543
-
542544

543545
.. ---------------------------------------------------------------------------
@@ -589,6 +591,7 @@ Performance improvements
589591
- Performance improvement in :meth:`Series.to_frame` (:issue:`43558`)
590592
- Performance improvement in :meth:`Series.mad` (:issue:`43010`)
591593
- Performance improvement in :func:`merge` (:issue:`43332`)
594+
- Performance improvement in :func:`to_csv` when index column is a datetime and is formatted (:issue:`39413`)
592595
- Performance improvement in :func:`read_csv` when ``index_col`` was set with a numeric column (:issue:`44158`)
593596
- Performance improvement in :func:`concat` (:issue:`43354`)
594597
-
@@ -749,6 +752,7 @@ I/O
749752
- Bug in :func:`read_csv` raising ``ValueError`` when names was longer than header but equal to data rows for ``engine="python"`` (:issue:`38453`)
750753
- Bug in :class:`ExcelWriter`, where ``engine_kwargs`` were not passed through to all engines (:issue:`43442`)
751754
- Bug in :func:`read_csv` raising ``ValueError`` when ``parse_dates`` was used with ``MultiIndex`` columns (:issue:`8991`)
755+
- Bug in :func:`read_csv` not raising an ``ValueError`` when ``\n`` was specified as ``delimiter`` or ``sep`` which conflicts with ``lineterminator`` (:issue:`43528`)
752756
- Bug in :func:`read_csv` converting columns to numeric after date parsing failed (:issue:`11019`)
753757
- Bug in :func:`read_csv` not replacing ``NaN`` values with ``np.nan`` before attempting date conversion (:issue:`26203`)
754758
- Bug in :func:`read_csv` raising ``AttributeError`` when attempting to read a .csv file and infer index column dtype from an nullable integer type (:issue:`44079`)
@@ -806,6 +810,7 @@ Reshaping
806810
- Bug in :func:`crosstab` would fail when inputs are lists or tuples (:issue:`44076`)
807811
- Bug in :meth:`DataFrame.append` failing to retain ``index.name`` when appending a list of :class:`Series` objects (:issue:`44109`)
808812
- Fixed metadata propagation in :meth:`Dataframe.apply` method, consequently fixing the same issue for :meth:`Dataframe.transform`, :meth:`Dataframe.nunique` and :meth:`Dataframe.mode` (:issue:`28283`)
813+
- Bug in :func:`concat` casting levels of :class:`MultiIndex` to float if the only consist of missing values (:issue:`44900`)
809814
- Bug in :meth:`DataFrame.stack` with ``ExtensionDtype`` columns incorrectly raising (:issue:`43561`)
810815
- Bug in :meth:`Series.unstack` with object doing unwanted type inference on resulting columns (:issue:`44595`)
811816
- Bug in :class:`MultiIndex` failing join operations with overlapping ``IntervalIndex`` levels (:issue:`44096`)
@@ -856,6 +861,7 @@ Other
856861
- Bug in :meth:`DataFrame.shift` with ``axis=1`` and ``ExtensionDtype`` columns incorrectly raising when an incompatible ``fill_value`` is passed (:issue:`44564`)
857862
- Bug in :meth:`DataFrame.diff` when passing a NumPy integer object instead of an ``int`` object (:issue:`44572`)
858863
- Bug in :meth:`Series.replace` raising ``ValueError`` when using ``regex=True`` with a :class:`Series` containing ``np.nan`` values (:issue:`43344`)
864+
- Bug in :meth:`DataFrame.to_records` where an incorrect ``n`` was used when missing names were replaced by ``level_n`` (:issue:`44818`)
859865

860866
.. ***DO NOT USE THIS SECTION***
861867

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ dependencies:
105105

106106
- pytables>=3.6.1 # pandas.read_hdf, DataFrame.to_hdf
107107
- s3fs>=0.4.0 # file IO when using 's3://...' path
108-
- aiobotocore<2.0.0
108+
- aiobotocore<2.0.0 # GH#44311 pinned to fix docbuild
109109
- fsspec>=0.7.4 # for generic remote file operations
110110
- gcsfs>=0.6.0 # file IO when using 'gcs://...' path
111111
- sqlalchemy # pandas.read_sql, DataFrame.to_sql

pandas/_libs/parsers.pyx

+18-10
Original file line numberDiff line numberDiff line change
@@ -558,18 +558,11 @@ cdef class TextReader:
558558
pass
559559

560560
def __dealloc__(self):
561-
self.close()
561+
_close(self)
562562
parser_del(self.parser)
563563

564-
def close(self) -> None:
565-
# also preemptively free all allocated memory
566-
parser_free(self.parser)
567-
if self.true_set:
568-
kh_destroy_str_starts(self.true_set)
569-
self.true_set = NULL
570-
if self.false_set:
571-
kh_destroy_str_starts(self.false_set)
572-
self.false_set = NULL
564+
def close(self):
565+
_close(self)
573566

574567
def _set_quoting(self, quote_char: str | bytes | None, quoting: int):
575568
if not isinstance(quoting, int):
@@ -1292,6 +1285,21 @@ cdef class TextReader:
12921285
return None
12931286

12941287

1288+
# Factor out code common to TextReader.__dealloc__ and TextReader.close
1289+
# It cannot be a class method, since calling self.close() in __dealloc__
1290+
# which causes a class attribute lookup and violates best parctices
1291+
# https://cython.readthedocs.io/en/latest/src/userguide/special_methods.html#finalization-method-dealloc
1292+
cdef _close(TextReader reader):
1293+
# also preemptively free all allocated memory
1294+
parser_free(reader.parser)
1295+
if reader.true_set:
1296+
kh_destroy_str_starts(reader.true_set)
1297+
reader.true_set = NULL
1298+
if reader.false_set:
1299+
kh_destroy_str_starts(reader.false_set)
1300+
reader.false_set = NULL
1301+
1302+
12951303
cdef:
12961304
object _true_values = [b'True', b'TRUE', b'true']
12971305
object _false_values = [b'False', b'FALSE', b'false']

pandas/compat/__init__.py

-4
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
from pandas._typing import F
1414
from pandas.compat.numpy import (
1515
is_numpy_dev,
16-
np_array_datetime64_compat,
17-
np_datetime64_compat,
1816
np_version_under1p19,
1917
np_version_under1p20,
2018
)
@@ -130,8 +128,6 @@ def get_lzma_file():
130128

131129
__all__ = [
132130
"is_numpy_dev",
133-
"np_array_datetime64_compat",
134-
"np_datetime64_compat",
135131
"np_version_under1p19",
136132
"np_version_under1p20",
137133
"pa_version_under1p01",

pandas/compat/numpy/__init__.py

-41
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
""" support numpy compatibility across versions """
2-
3-
import re
4-
52
import numpy as np
63

74
from pandas.util.version import Version
@@ -29,44 +26,6 @@
2926
)
3027

3128

32-
_tz_regex = re.compile("[+-]0000$")
33-
34-
35-
def _tz_replacer(tstring):
36-
if isinstance(tstring, str):
37-
if tstring.endswith("Z"):
38-
tstring = tstring[:-1]
39-
elif _tz_regex.search(tstring):
40-
tstring = tstring[:-5]
41-
return tstring
42-
43-
44-
def np_datetime64_compat(tstring: str, unit: str = "ns"):
45-
"""
46-
provide compat for construction of strings to numpy datetime64's with
47-
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
48-
warning, when need to pass '2015-01-01 09:00:00'
49-
"""
50-
tstring = _tz_replacer(tstring)
51-
return np.datetime64(tstring, unit)
52-
53-
54-
def np_array_datetime64_compat(arr, dtype="M8[ns]"):
55-
"""
56-
provide compat for construction of an array of strings to a
57-
np.array(..., dtype=np.datetime64(..))
58-
tz-changes in 1.11 that make '2015-01-01 09:00:00Z' show a deprecation
59-
warning, when need to pass '2015-01-01 09:00:00'
60-
"""
61-
# is_list_like; can't import as it would be circular
62-
if hasattr(arr, "__iter__") and not isinstance(arr, (str, bytes)):
63-
arr = [_tz_replacer(s) for s in arr]
64-
else:
65-
arr = _tz_replacer(arr)
66-
67-
return np.array(arr, dtype=dtype)
68-
69-
7029
__all__ = [
7130
"np",
7231
"_np_version",

pandas/compat/pickle_compat.py

-3
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@ def load_reduce(self):
3535
args = stack.pop()
3636
func = stack[-1]
3737

38-
if len(args) and type(args[0]) is type:
39-
n = args[0].__name__ # noqa
40-
4138
try:
4239
stack[-1] = func(*args)
4340
return

pandas/conftest.py

+15
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,21 @@ def index_flat(request):
559559
index_flat2 = index_flat
560560

561561

562+
@pytest.fixture(
563+
params=[
564+
key
565+
for key in indices_dict
566+
if not isinstance(indices_dict[key], MultiIndex) and indices_dict[key].is_unique
567+
]
568+
)
569+
def index_flat_unique(request):
570+
"""
571+
index_flat with uniqueness requirement.
572+
"""
573+
key = request.param
574+
return indices_dict[key].copy()
575+
576+
562577
@pytest.fixture(
563578
params=[
564579
key

pandas/core/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# flake8: noqa
1+
# flake8: noqa:F401
22

33
from pandas._libs import (
44
NaT,

pandas/core/arrays/categorical.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,14 @@ def __init__(
422422
# We remove null values here, then below will re-insert
423423
# them, grep "full_codes"
424424
arr_list = [values[idx] for idx in np.where(~null_mask)[0]]
425-
arr = sanitize_array(arr_list, None)
425+
426+
# GH#44900 Do not cast to float if we have only missing values
427+
if arr_list or arr.dtype == "object":
428+
sanitize_dtype = None
429+
else:
430+
sanitize_dtype = arr.dtype
431+
432+
arr = sanitize_array(arr_list, None, dtype=sanitize_dtype)
426433
values = arr
427434

428435
if dtype.categories is None:
@@ -2377,7 +2384,7 @@ def describe(self):
23772384

23782385
return result
23792386

2380-
def isin(self, values) -> np.ndarray:
2387+
def isin(self, values) -> npt.NDArray[np.bool_]:
23812388
"""
23822389
Check whether `values` are contained in Categorical.
23832390
@@ -2394,7 +2401,7 @@ def isin(self, values) -> np.ndarray:
23942401
23952402
Returns
23962403
-------
2397-
isin : numpy.ndarray (bool dtype)
2404+
np.ndarray[bool]
23982405
23992406
Raises
24002407
------
@@ -2457,6 +2464,16 @@ def replace(self, to_replace, value, inplace: bool = False):
24572464
[3, 2, 3, 3]
24582465
Categories (2, int64): [2, 3]
24592466
"""
2467+
# GH#44929 deprecation
2468+
warn(
2469+
"Categorical.replace is deprecated and will be removed in a future "
2470+
"version. Use Series.replace directly instead.",
2471+
FutureWarning,
2472+
stacklevel=find_stack_level(),
2473+
)
2474+
return self._replace(to_replace=to_replace, value=value, inplace=inplace)
2475+
2476+
def _replace(self, *, to_replace, value, inplace: bool = False):
24602477
inplace = validate_bool_kwarg(inplace, "inplace")
24612478
cat = self if inplace else self.copy()
24622479

pandas/core/arrays/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def asi8(self) -> npt.NDArray[np.int64]:
291291
# ----------------------------------------------------------------
292292
# Rendering Methods
293293

294-
def _format_native_types(self, na_rep="NaT", date_format=None):
294+
def _format_native_types(self, *, na_rep="NaT", date_format=None):
295295
"""
296296
Helper method for astype when converting to strings.
297297

pandas/core/arrays/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -670,7 +670,7 @@ def astype(self, dtype, copy: bool = True):
670670

671671
@dtl.ravel_compat
672672
def _format_native_types(
673-
self, na_rep="NaT", date_format=None, **kwargs
673+
self, *, na_rep="NaT", date_format=None, **kwargs
674674
) -> npt.NDArray[np.object_]:
675675
from pandas.io.formats.format import get_format_datetime64_from_values
676676

pandas/core/arrays/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -632,7 +632,7 @@ def _formatter(self, boxed: bool = False):
632632

633633
@dtl.ravel_compat
634634
def _format_native_types(
635-
self, na_rep="NaT", date_format=None, **kwargs
635+
self, *, na_rep="NaT", date_format=None, **kwargs
636636
) -> np.ndarray:
637637
"""
638638
actually format my specific types

pandas/core/arrays/sparse/array.py

+13-28
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,8 @@ def _sparse_array_op(
181181
ltype = SparseDtype(subtype, left.fill_value)
182182
rtype = SparseDtype(subtype, right.fill_value)
183183

184-
# TODO(GH-23092): pass copy=False. Need to fix astype_nansafe
185-
left = left.astype(ltype)
186-
right = right.astype(rtype)
184+
left = left.astype(ltype, copy=False)
185+
right = right.astype(rtype, copy=False)
187186
dtype = ltype.subtype
188187
else:
189188
dtype = ltype
@@ -233,6 +232,15 @@ def _sparse_array_op(
233232
right.fill_value,
234233
)
235234

235+
if name == "divmod":
236+
# result is a 2-tuple
237+
# error: Incompatible return value type (got "Tuple[SparseArray,
238+
# SparseArray]", expected "SparseArray")
239+
return ( # type: ignore[return-value]
240+
_wrap_result(name, result[0], index, fill[0], dtype=result_dtype),
241+
_wrap_result(name, result[1], index, fill[1], dtype=result_dtype),
242+
)
243+
236244
if result_dtype is None:
237245
result_dtype = result.dtype
238246

@@ -1224,30 +1232,8 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True):
12241232
else:
12251233
return self.copy()
12261234
dtype = self.dtype.update_dtype(dtype)
1227-
# error: Item "ExtensionDtype" of "Union[ExtensionDtype, str, dtype[Any],
1228-
# Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object],
1229-
# None]" has no attribute "_subtype_with_str"
1230-
# error: Item "str" of "Union[ExtensionDtype, str, dtype[Any], Type[str],
1231-
# Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" has no
1232-
# attribute "_subtype_with_str"
1233-
# error: Item "dtype[Any]" of "Union[ExtensionDtype, str, dtype[Any], Type[str],
1234-
# Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" has no
1235-
# attribute "_subtype_with_str"
1236-
# error: Item "ABCMeta" of "Union[ExtensionDtype, str, dtype[Any], Type[str],
1237-
# Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" has no
1238-
# attribute "_subtype_with_str"
1239-
# error: Item "type" of "Union[ExtensionDtype, str, dtype[Any], Type[str],
1240-
# Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" has no
1241-
# attribute "_subtype_with_str"
1242-
# error: Item "None" of "Union[ExtensionDtype, str, dtype[Any], Type[str],
1243-
# Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" has no
1244-
# attribute "_subtype_with_str"
1245-
subtype = pandas_dtype(dtype._subtype_with_str) # type: ignore[union-attr]
1246-
# TODO copy=False is broken for astype_nansafe with int -> float, so cannot
1247-
# passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456
1248-
sp_values = astype_nansafe(self.sp_values, subtype, copy=True)
1249-
if sp_values is self.sp_values and copy:
1250-
sp_values = sp_values.copy()
1235+
subtype = pandas_dtype(dtype._subtype_with_str)
1236+
sp_values = astype_nansafe(self.sp_values, subtype, copy=copy)
12511237

12521238
# error: Argument 1 to "_simple_new" of "SparseArray" has incompatible type
12531239
# "ExtensionArray"; expected "ndarray"
@@ -1646,7 +1632,6 @@ def _arith_method(self, other, op):
16461632
else:
16471633
other = np.asarray(other)
16481634
with np.errstate(all="ignore"):
1649-
# TODO: look into _wrap_result
16501635
if len(self) != len(other):
16511636
raise AssertionError(
16521637
f"length mismatch: {len(self)} vs. {len(other)}"

pandas/core/arrays/sparse/dtype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def is_dtype(cls, dtype: object) -> bool:
292292
return True
293293
return isinstance(dtype, np.dtype) or dtype == "Sparse"
294294

295-
def update_dtype(self, dtype):
295+
def update_dtype(self, dtype) -> SparseDtype:
296296
"""
297297
Convert the SparseDtype to a new dtype.
298298

0 commit comments

Comments
 (0)