Skip to content

Commit 59cd733

Browse files
author
Sylvain MARIE
committed
Merge branch 'main' of https://github.com/pandas-dev/pandas into feature/46405_set_locale_bug
2 parents 149556f + 37e6239 commit 59cd733

File tree

97 files changed

+1765
-728
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+1765
-728
lines changed

.pre-commit-config.yaml

+8
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,11 @@ repos:
229229
entry: python scripts/validate_min_versions_in_sync.py
230230
language: python
231231
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
232+
- id: flake8-pyi
233+
name: flake8-pyi
234+
entry: flake8 --extend-ignore=E301,E302,E305,E701,E704
235+
types: [pyi]
236+
language: python
237+
additional_dependencies:
238+
- flake8==4.0.1
239+
- flake8-pyi==22.5.1

asv_bench/benchmarks/io/sql.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def setup(self, connection):
3939
index=tm.makeStringIndex(N),
4040
)
4141
self.df.loc[1000:3000, "float_with_nan"] = np.nan
42+
self.df["date"] = self.df["datetime"].dt.date
43+
self.df["time"] = self.df["datetime"].dt.time
4244
self.df["datetime_string"] = self.df["datetime"].astype(str)
4345
self.df.to_sql(self.table_name, self.con, if_exists="replace")
4446

@@ -53,7 +55,16 @@ class WriteSQLDtypes:
5355

5456
params = (
5557
["sqlalchemy", "sqlite"],
56-
["float", "float_with_nan", "string", "bool", "int", "datetime"],
58+
[
59+
"float",
60+
"float_with_nan",
61+
"string",
62+
"bool",
63+
"int",
64+
"date",
65+
"time",
66+
"datetime",
67+
],
5768
)
5869
param_names = ["connection", "dtype"]
5970

@@ -78,6 +89,8 @@ def setup(self, connection, dtype):
7889
index=tm.makeStringIndex(N),
7990
)
8091
self.df.loc[1000:3000, "float_with_nan"] = np.nan
92+
self.df["date"] = self.df["datetime"].dt.date
93+
self.df["time"] = self.df["datetime"].dt.time
8194
self.df["datetime_string"] = self.df["datetime"].astype(str)
8295
self.df.to_sql(self.table_name, self.con, if_exists="replace")
8396

@@ -105,6 +118,8 @@ def setup(self):
105118
index=tm.makeStringIndex(N),
106119
)
107120
self.df.loc[1000:3000, "float_with_nan"] = np.nan
121+
self.df["date"] = self.df["datetime"].dt.date
122+
self.df["time"] = self.df["datetime"].dt.time
108123
self.df["datetime_string"] = self.df["datetime"].astype(str)
109124
self.df.to_sql(self.table_name, self.con, if_exists="replace")
110125

@@ -122,7 +137,16 @@ def time_read_sql_table_parse_dates(self):
122137

123138
class ReadSQLTableDtypes:
124139

125-
params = ["float", "float_with_nan", "string", "bool", "int", "datetime"]
140+
params = [
141+
"float",
142+
"float_with_nan",
143+
"string",
144+
"bool",
145+
"int",
146+
"date",
147+
"time",
148+
"datetime",
149+
]
126150
param_names = ["dtype"]
127151

128152
def setup(self, dtype):
@@ -141,6 +165,8 @@ def setup(self, dtype):
141165
index=tm.makeStringIndex(N),
142166
)
143167
self.df.loc[1000:3000, "float_with_nan"] = np.nan
168+
self.df["date"] = self.df["datetime"].dt.date
169+
self.df["time"] = self.df["datetime"].dt.time
144170
self.df["datetime_string"] = self.df["datetime"].astype(str)
145171
self.df.to_sql(self.table_name, self.con, if_exists="replace")
146172

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
from pandas import offsets
5+
6+
7+
class DatetimeStrftime:
8+
timeout = 1500
9+
params = [1000, 10000]
10+
param_names = ["obs"]
11+
12+
def setup(self, obs):
13+
d = "2018-11-29"
14+
dt = "2018-11-26 11:18:27.0"
15+
self.data = pd.DataFrame(
16+
{
17+
"dt": [np.datetime64(dt)] * obs,
18+
"d": [np.datetime64(d)] * obs,
19+
"r": [np.random.uniform()] * obs,
20+
}
21+
)
22+
23+
def time_frame_date_to_str(self, obs):
24+
self.data["d"].astype(str)
25+
26+
def time_frame_date_formatting_default(self, obs):
27+
self.data["d"].dt.strftime(date_format="%Y-%m-%d")
28+
29+
def time_frame_date_formatting_custom(self, obs):
30+
self.data["d"].dt.strftime(date_format="%Y---%m---%d")
31+
32+
def time_frame_datetime_to_str(self, obs):
33+
self.data["dt"].astype(str)
34+
35+
def time_frame_datetime_formatting_default_date_only(self, obs):
36+
self.data["dt"].dt.strftime(date_format="%Y-%m-%d")
37+
38+
def time_frame_datetime_formatting_default(self, obs):
39+
self.data["dt"].dt.strftime(date_format="%Y-%m-%d %H:%M:%S")
40+
41+
def time_frame_datetime_formatting_default_with_float(self, obs):
42+
self.data["dt"].dt.strftime(date_format="%Y-%m-%d %H:%M:%S.%f")
43+
44+
def time_frame_datetime_formatting_custom(self, obs):
45+
self.data["dt"].dt.strftime(date_format="%Y-%m-%d --- %H:%M:%S")
46+
47+
48+
class BusinessHourStrftime:
49+
timeout = 1500
50+
params = [1000, 10000]
51+
param_names = ["obs"]
52+
53+
def setup(self, obs):
54+
self.data = pd.DataFrame(
55+
{
56+
"off": [offsets.BusinessHour()] * obs,
57+
}
58+
)
59+
60+
def time_frame_offset_str(self, obs):
61+
self.data["off"].apply(str)
62+
63+
def time_frame_offset_repr(self, obs):
64+
self.data["off"].apply(repr)

doc/source/reference/testing.rst

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Exceptions and warnings
2626

2727
errors.AbstractMethodError
2828
errors.AccessorRegistrationWarning
29+
errors.CSSWarning
2930
errors.DataError
3031
errors.DtypeWarning
3132
errors.DuplicateLabelError

doc/source/whatsnew/v1.4.4.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ Fixed regressions
2424
Bug fixes
2525
~~~~~~~~~
2626
- The :class:`errors.FutureWarning` raised when passing arguments (other than ``filepath_or_buffer``) as positional in :func:`read_csv` is now raised at the correct stacklevel (:issue:`47385`)
27-
-
27+
- Bug in :meth:`DataFrame.to_sql` when ``method`` was a ``callable`` that did not return an ``int`` and would raise a ``TypeError`` (:issue:`46891`)
2828

2929
.. ---------------------------------------------------------------------------
3030

doc/source/whatsnew/v1.5.0.rst

+15-1
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@ Other enhancements
275275
- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`)
276276
- Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`)
277277
- Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files)
278+
- :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`)
279+
- :class:`Series` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) will now successfully operate when the dtype is numeric and ``numeric_only=True`` is provided; previously this would raise a ``NotImplementedError`` (:issue:`47500`)
280+
-
278281

279282
.. ---------------------------------------------------------------------------
280283
.. _whatsnew_150.notable_bug_fixes:
@@ -766,7 +769,8 @@ Other Deprecations
766769
- Deprecated the argument ``na_sentinel`` in :func:`factorize`, :meth:`Index.factorize`, and :meth:`.ExtensionArray.factorize`; pass ``use_na_sentinel=True`` instead to use the sentinel ``-1`` for NaN values and ``use_na_sentinel=False`` instead of ``na_sentinel=None`` to encode NaN values (:issue:`46910`)
767770
- Deprecated :meth:`DataFrameGroupBy.transform` not aligning the result when the UDF returned DataFrame (:issue:`45648`)
768771
- Clarified warning from :func:`to_datetime` when delimited dates can't be parsed in accordance to specified ``dayfirst`` argument (:issue:`46210`)
769-
772+
- Deprecated :class:`Series` and :class:`Resampler` reducers (e.g. ``min``, ``max``, ``sum``, ``mean``) raising a ``NotImplementedError`` when the dtype is non-numric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
773+
- Deprecated :meth:`Series.rank` returning an empty result when the dtype is non-numeric and ``numeric_only=True`` is provided; this will raise a ``TypeError`` in a future version (:issue:`47500`)
770774

771775
.. ---------------------------------------------------------------------------
772776
.. _whatsnew_150.performance:
@@ -792,6 +796,10 @@ Performance improvements
792796
- Performance improvement in :func:`read_excel` when ``nrows`` argument provided (:issue:`32727`)
793797
- Performance improvement in :meth:`.Styler.to_excel` when applying repeated CSS formats (:issue:`47371`)
794798
- Performance improvement in :meth:`MultiIndex.is_monotonic_increasing` (:issue:`47458`)
799+
- Performance improvement in :class:`BusinessHour` ``str`` and ``repr`` (:issue:`44764`)
800+
- Performance improvement in datetime arrays string formatting when one of the default strftime formats ``"%Y-%m-%d %H:%M:%S"`` or ``"%Y-%m-%d %H:%M:%S.%f"`` is used. (:issue:`44764`)
801+
- Performance improvement in :meth:`Series.to_sql` and :meth:`DataFrame.to_sql` (:class:`SQLiteTable`) when processing time arrays. (:issue:`44764`)
802+
-
795803

796804
.. ---------------------------------------------------------------------------
797805
.. _whatsnew_150.bug_fixes:
@@ -848,6 +856,7 @@ Conversion
848856
- Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`)
849857
- Bug in :meth:`DataFrame.to_records` returning inconsistent numpy types if the index was a :class:`MultiIndex` (:issue:`47263`)
850858
- Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`)
859+
- Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`)
851860

852861
Strings
853862
^^^^^^^
@@ -878,8 +887,10 @@ Indexing
878887
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
879888
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtype :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
880889
- Bug in :meth:`DataFrame.loc` when setting values to a column and right hand side is a dictionary (:issue:`47216`)
890+
- Bug in :meth:`DataFrame.loc` when setting a :class:`DataFrame` not aligning index in some cases (:issue:`47578`)
881891
- Bug in :meth:`Series.__setitem__` with ``datetime64[ns]`` dtype, an all-``False`` boolean mask, and an incompatible value incorrectly casting to ``object`` instead of retaining ``datetime64[ns]`` dtype (:issue:`45967`)
882892
- Bug in :meth:`Index.__getitem__` raising ``ValueError`` when indexer is from boolean dtype with ``NA`` (:issue:`45806`)
893+
- Bug in :meth:`Series.__setitem__` losing precision when enlarging :class:`Series` with scalar (:issue:`32346`)
883894
- Bug in :meth:`Series.mask` with ``inplace=True`` or setting values with a boolean mask with small integer dtypes incorrectly raising (:issue:`45750`)
884895
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
885896
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
@@ -929,12 +940,14 @@ I/O
929940
- Bug in :func:`read_parquet` when ``engine="fastparquet"`` where the file was not closed on error (:issue:`46555`)
930941
- :meth:`to_html` now excludes the ``border`` attribute from ``<table>`` elements when ``border`` keyword is set to ``False``.
931942
- Bug in :func:`read_sas` with certain types of compressed SAS7BDAT files (:issue:`35545`)
943+
- Bug in :func:`read_excel` not forward filling :class:`MultiIndex` when no names were given (:issue:`47487`)
932944
- Bug in :func:`read_sas` returned ``None`` rather than an empty DataFrame for SAS7BDAT files with zero rows (:issue:`18198`)
933945
- Bug in :class:`StataWriter` where value labels were always written with default encoding (:issue:`46750`)
934946
- Bug in :class:`StataWriterUTF8` where some valid characters were removed from variable names (:issue:`47276`)
935947
- Bug in :meth:`DataFrame.to_excel` when writing an empty dataframe with :class:`MultiIndex` (:issue:`19543`)
936948
- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x40 control bytes (:issue:`31243`)
937949
- Bug in :func:`read_sas` that scrambled column names (:issue:`31243`)
950+
- Bug in :func:`read_sas` with RLE-compressed SAS7BDAT files that contain 0x00 control bytes (:issue:`47099`)
938951
-
939952

940953
Period
@@ -990,6 +1003,7 @@ Reshaping
9901003
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
9911004
- Bug in :meth:`DataFrame.align` when aligning a :class:`MultiIndex` to a :class:`Series` with another :class:`MultiIndex` (:issue:`46001`)
9921005
- Bug in concatenation with ``IntegerDtype``, or ``FloatingDtype`` arrays where the resulting dtype did not mirror the behavior of the non-nullable dtypes (:issue:`46379`)
1006+
- Bug in :func:`concat` losing dtype of columns when ``join="outer"`` and ``sort=True`` (:issue:`47329`)
9931007
- Bug in :func:`concat` not sorting the column names when ``None`` is included (:issue:`47331`)
9941008
- Bug in :func:`concat` with identical key leads to error when indexing :class:`MultiIndex` (:issue:`46519`)
9951009
- Bug in :meth:`DataFrame.join` with a list when using suffixes to join DataFrames with duplicate column names (:issue:`46396`)

pandas/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# flake8: noqa
2+
from __future__ import annotations
23

34
__docformat__ = "restructuredtext"
45

@@ -185,7 +186,7 @@
185186
__deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"]
186187

187188

188-
def __dir__():
189+
def __dir__() -> list[str]:
189190
# GH43028
190191
# Int64Index etc. are deprecated, but we still want them to be available in the dir.
191192
# Remove in Pandas 2.0, when we remove Int64Index etc. from the code base.

pandas/_config/config.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
Callable,
6161
Generic,
6262
Iterable,
63+
Iterator,
6364
NamedTuple,
6465
cast,
6566
)
@@ -435,13 +436,13 @@ def __init__(self, *args) -> None:
435436

436437
self.ops = list(zip(args[::2], args[1::2]))
437438

438-
def __enter__(self):
439+
def __enter__(self) -> None:
439440
self.undo = [(pat, _get_option(pat, silent=True)) for pat, val in self.ops]
440441

441442
for pat, val in self.ops:
442443
_set_option(pat, val, silent=True)
443444

444-
def __exit__(self, *args):
445+
def __exit__(self, *args) -> None:
445446
if self.undo:
446447
for pat, val in self.undo:
447448
_set_option(pat, val, silent=True)
@@ -733,7 +734,7 @@ def pp(name: str, ks: Iterable[str]) -> list[str]:
733734

734735

735736
@contextmanager
736-
def config_prefix(prefix):
737+
def config_prefix(prefix) -> Iterator[None]:
737738
"""
738739
contextmanager for multiple invocations of API with a common prefix
739740

pandas/_libs/algos.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t:
324324

325325
@cython.boundscheck(False)
326326
@cython.wraparound(False)
327+
@cython.cdivision(True)
327328
def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
328329
cdef:
329330
Py_ssize_t i, j, xi, yi, N, K
@@ -356,8 +357,8 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
356357
nobs += 1
357358
dx = vx - meanx
358359
dy = vy - meany
359-
meanx += 1 / nobs * dx
360-
meany += 1 / nobs * dy
360+
meanx += 1. / nobs * dx
361+
meany += 1. / nobs * dy
361362
ssqdmx += (vx - meanx) * dx
362363
ssqdmy += (vy - meany) * dy
363364
covxy += (vx - meanx) * dy

pandas/_libs/index.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class BaseMultiIndexCodesEngine:
6969
) -> npt.NDArray[np.intp]: ...
7070

7171
class ExtensionEngine:
72-
def __init__(self, values: "ExtensionArray"): ...
72+
def __init__(self, values: ExtensionArray): ...
7373
def __contains__(self, val: object) -> bool: ...
7474
def get_loc(self, val: object) -> int | slice | np.ndarray: ...
7575
def get_indexer(self, values: np.ndarray) -> npt.NDArray[np.intp]: ...

0 commit comments

Comments
 (0)