Skip to content

Commit e0fddd8

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into depr_fallback_agg_dict
� Conflicts: � pandas/tests/resample/test_resample_api.py
2 parents a1f7277 + 5441d4e commit e0fddd8

File tree

14 files changed

+180
-26
lines changed

14 files changed

+180
-26
lines changed

.pre-commit-config.yaml

+9
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,15 @@ repos:
8181
- flake8-comprehensions==3.1.0
8282
- flake8-bugbear==21.3.2
8383
- pandas-dev-flaker==0.2.0
84+
- repo: local
85+
hooks:
86+
- id: pyright
87+
name: pyright
88+
entry: pyright
89+
language: node
90+
pass_filenames: false
91+
types: [python]
92+
additional_dependencies: ['[email protected]']
8493
- repo: local
8594
hooks:
8695
- id: flake8-rst

asv_bench/benchmarks/io/style.py

+14
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ def peakmem_classes_render(self, cols, rows):
3434
self._style_classes()
3535
self.st._render_html(True, True)
3636

37+
def time_tooltips_render(self, cols, rows):
38+
self._style_tooltips()
39+
self.st._render_html(True, True)
40+
41+
def peakmem_tooltips_render(self, cols, rows):
42+
self._style_tooltips()
43+
self.st._render_html(True, True)
44+
3745
def time_format_render(self, cols, rows):
3846
self._style_format()
3947
self.st._render_html(True, True)
@@ -77,3 +85,9 @@ def _style_apply_format_hide(self):
7785
self.st.format("{:.3f}")
7886
self.st.hide_index(self.st.index[1:])
7987
self.st.hide_columns(self.st.columns[1:])
88+
89+
def _style_tooltips(self):
90+
ttips = DataFrame("abc", index=self.df.index[::2], columns=self.df.columns[::2])
91+
self.st = self.df.style.set_tooltips(ttips)
92+
self.st.hide_index(self.st.index[12:])
93+
self.st.hide_columns(self.st.columns[12:])

doc/source/whatsnew/v1.4.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ Categorical
379379
Datetimelike
380380
^^^^^^^^^^^^
381381
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
382+
- Bug in :func:`to_datetime` with ``format`` and ``pandas.NA`` was raising ``ValueError`` (:issue:`42957`)
382383
- :func:`to_datetime` would silently swap ``MM/DD/YYYY`` and ``DD/MM/YYYY`` formats if the given ``dayfirst`` option could not be respected - now, a warning is raised in the case of delimited date strings (e.g. ``31-12-2012``) (:issue:`12585`)
383384
-
384385

@@ -427,6 +428,7 @@ Indexing
427428
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.nan`` (:issue:`35392`)
428429
- Bug in :meth:`DataFrame.query` did not handle the degree sign in a backticked column name, such as \`Temp(°C)\`, used in an expression to query a dataframe (:issue:`42826`)
429430
- Bug in :meth:`DataFrame.drop` where the error message did not show missing labels with commas when raising ``KeyError`` (:issue:`42881`)
431+
- Bug in :meth:`DataFrame.query` where method calls in query strings led to errors when the ``numexpr`` package was installed. (:issue:`22435`)
430432
- Bug in :meth:`DataFrame.nlargest` and :meth:`Series.nlargest` where sorted result did not count indexes containing ``np.nan`` (:issue:`28984`)
431433

432434

pandas/_libs/tslibs/strptime.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ from numpy cimport (
2020
ndarray,
2121
)
2222

23+
from pandas._libs.missing cimport checknull_with_nat_and_na
2324
from pandas._libs.tslibs.nattype cimport (
2425
NPY_NAT,
2526
c_nat_strings as nat_strings,
26-
checknull_with_nat,
2727
)
2828
from pandas._libs.tslibs.np_datetime cimport (
2929
check_dts_bounds,
@@ -134,7 +134,7 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='
134134
iresult[i] = NPY_NAT
135135
continue
136136
else:
137-
if checknull_with_nat(val):
137+
if checknull_with_nat_and_na(val):
138138
iresult[i] = NPY_NAT
139139
continue
140140
else:

pandas/core/computation/expr.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,10 @@ def f(self, *args, **kwargs):
265265
return f
266266

267267

268-
_T = TypeVar("_T", bound="BaseExprVisitor")
268+
# should be bound by BaseExprVisitor but that creates a circular dependency:
269+
# _T is used in disallow, but disallow is used to define BaseExprVisitor
270+
# https://github.com/microsoft/pyright/issues/2315
271+
_T = TypeVar("_T")
269272

270273

271274
def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:
@@ -279,11 +282,13 @@ def disallow(nodes: set[str]) -> Callable[[type[_T]], type[_T]]:
279282
"""
280283

281284
def disallowed(cls: type[_T]) -> type[_T]:
282-
cls.unsupported_nodes = ()
285+
# error: "Type[_T]" has no attribute "unsupported_nodes"
286+
cls.unsupported_nodes = () # type: ignore[attr-defined]
283287
for node in nodes:
284288
new_method = _node_not_implemented(node)
285289
name = f"visit_{node}"
286-
cls.unsupported_nodes += (name,)
290+
# error: "Type[_T]" has no attribute "unsupported_nodes"
291+
cls.unsupported_nodes += (name,) # type: ignore[attr-defined]
287292
setattr(cls, name, new_method)
288293
return cls
289294

@@ -702,7 +707,8 @@ def visit_Call(self, node, side=None, **kwargs):
702707
if key.arg:
703708
kwargs[key.arg] = self.visit(key.value).value
704709

705-
return self.const_type(res(*new_args, **kwargs), self.env)
710+
name = self.env.add_tmp(res(*new_args, **kwargs))
711+
return self.term_type(name=name, env=self.env)
706712

707713
def translate_In(self, op):
708714
return op

pandas/core/groupby/generic.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Mapping,
2222
TypeVar,
2323
Union,
24+
cast,
2425
)
2526
import warnings
2627

@@ -30,7 +31,9 @@
3031
from pandas._typing import (
3132
ArrayLike,
3233
FrameOrSeries,
34+
Manager,
3335
Manager2D,
36+
SingleManager,
3437
)
3538
from pandas.util._decorators import (
3639
Appender,
@@ -80,7 +83,6 @@
8083
Index,
8184
MultiIndex,
8285
all_indexes_same,
83-
default_index,
8486
)
8587
from pandas.core.series import Series
8688
from pandas.core.util.numba_ import maybe_use_numba
@@ -159,19 +161,21 @@ def pinner(cls):
159161
class SeriesGroupBy(GroupBy[Series]):
160162
_apply_allowlist = base.series_apply_allowlist
161163

162-
def _wrap_agged_manager(self, mgr: Manager2D) -> Series:
163-
single = mgr.iget(0)
164+
def _wrap_agged_manager(self, mgr: Manager) -> Series:
165+
if mgr.ndim == 1:
166+
mgr = cast(SingleManager, mgr)
167+
single = mgr
168+
else:
169+
mgr = cast(Manager2D, mgr)
170+
single = mgr.iget(0)
164171
ser = self.obj._constructor(single, name=self.obj.name)
165172
# NB: caller is responsible for setting ser.index
166173
return ser
167174

168-
def _get_data_to_aggregate(self) -> Manager2D:
175+
def _get_data_to_aggregate(self) -> SingleManager:
169176
ser = self._obj_with_exclusions
170177
single = ser._mgr
171-
columns = default_index(1)
172-
# Much faster than using ser.to_frame() since we avoid inferring columns
173-
# from scalar
174-
return single.to_2d_mgr(columns)
178+
return single
175179

176180
def _iterate_slices(self) -> Iterable[Series]:
177181
yield self._selected_obj

pandas/core/groupby/groupby.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -1745,6 +1745,8 @@ def count(self) -> Series | DataFrame:
17451745
ids, _, ngroups = self.grouper.group_info
17461746
mask = ids != -1
17471747

1748+
is_series = data.ndim == 1
1749+
17481750
def hfunc(bvalues: ArrayLike) -> ArrayLike:
17491751
# TODO(2DEA): reshape would not be necessary with 2D EAs
17501752
if bvalues.ndim == 1:
@@ -1754,6 +1756,10 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
17541756
masked = mask & ~isna(bvalues)
17551757

17561758
counted = lib.count_level_2d(masked, labels=ids, max_bin=ngroups, axis=1)
1759+
if is_series:
1760+
assert counted.ndim == 2
1761+
assert counted.shape[0] == 1
1762+
return counted[0]
17571763
return counted
17581764

17591765
new_mgr = data.grouped_reduce(hfunc)
@@ -2702,7 +2708,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
27022708
mgr = self._get_data_to_aggregate()
27032709

27042710
res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
2705-
if len(res_mgr.items) != len(mgr.items):
2711+
if not is_ser and len(res_mgr.items) != len(mgr.items):
27062712
warnings.warn(
27072713
"Dropping invalid columns in "
27082714
f"{type(self).__name__}.quantile is deprecated. "
@@ -3134,14 +3140,15 @@ def blk_func(values: ArrayLike) -> ArrayLike:
31343140
obj = self._obj_with_exclusions
31353141

31363142
# Operate block-wise instead of column-by-column
3137-
orig_ndim = obj.ndim
3143+
is_ser = obj.ndim == 1
31383144
mgr = self._get_data_to_aggregate()
31393145

31403146
if numeric_only:
31413147
mgr = mgr.get_numeric_data()
31423148

31433149
res_mgr = mgr.grouped_reduce(blk_func, ignore_failures=True)
3144-
if len(res_mgr.items) != len(mgr.items):
3150+
3151+
if not is_ser and len(res_mgr.items) != len(mgr.items):
31453152
howstr = how.replace("group_", "")
31463153
warnings.warn(
31473154
"Dropping invalid columns in "
@@ -3162,7 +3169,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
31623169
# We should never get here
31633170
raise TypeError("All columns were dropped in grouped_reduce")
31643171

3165-
if orig_ndim == 1:
3172+
if is_ser:
31663173
out = self._wrap_agged_manager(res_mgr)
31673174
out.index = self.grouper.result_index
31683175
else:

pandas/core/internals/base.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
)
1111

1212
from pandas._typing import (
13+
ArrayLike,
1314
DtypeObj,
1415
Shape,
1516
)
@@ -18,7 +19,10 @@
1819
from pandas.core.dtypes.cast import find_common_type
1920

2021
from pandas.core.base import PandasObject
21-
from pandas.core.indexes.api import Index
22+
from pandas.core.indexes.api import (
23+
Index,
24+
default_index,
25+
)
2226

2327
T = TypeVar("T", bound="DataManager")
2428

@@ -171,6 +175,23 @@ def setitem_inplace(self, indexer, value) -> None:
171175
"""
172176
self.array[indexer] = value
173177

178+
def grouped_reduce(self, func, ignore_failures: bool = False):
179+
"""
180+
ignore_failures : bool, default False
181+
Not used; for compatibility with ArrayManager/BlockManager.
182+
"""
183+
184+
arr = self.array
185+
res = func(arr)
186+
index = default_index(len(res))
187+
188+
mgr = type(self).from_array(res, index)
189+
return mgr
190+
191+
@classmethod
192+
def from_array(cls, arr: ArrayLike, index: Index):
193+
raise AbstractMethodError(cls)
194+
174195

175196
def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None:
176197
"""

pandas/core/reshape/tile.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""
22
Quantilization functions and related stuff
33
"""
4+
from __future__ import annotations
5+
46
from typing import (
57
Any,
68
Callable,

pandas/core/tools/timedeltas.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
timedelta support tools
33
"""
4+
from __future__ import annotations
45

56
import numpy as np
67

pandas/io/formats/style_render.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ def _translate(
294294
d.update({"table_attributes": table_attr})
295295

296296
if self.tooltips:
297-
d = self.tooltips._translate(self.data, self.uuid, d)
297+
d = self.tooltips._translate(self, d)
298298

299299
return d
300300

@@ -1508,7 +1508,7 @@ def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
15081508
},
15091509
]
15101510

1511-
def _translate(self, styler_data: DataFrame | Series, uuid: str, d: dict):
1511+
def _translate(self, styler: StylerRenderer, d: dict):
15121512
"""
15131513
Mutate the render dictionary to allow for tooltips:
15141514
@@ -1529,21 +1529,23 @@ def _translate(self, styler_data: DataFrame | Series, uuid: str, d: dict):
15291529
-------
15301530
render_dict : Dict
15311531
"""
1532-
self.tt_data = self.tt_data.reindex_like(styler_data)
1533-
1532+
self.tt_data = self.tt_data.reindex_like(styler.data)
15341533
if self.tt_data.empty:
15351534
return d
15361535

15371536
name = self.class_name
1538-
15391537
mask = (self.tt_data.isna()) | (self.tt_data.eq("")) # empty string = no ttip
15401538
self.table_styles = [
15411539
style
15421540
for sublist in [
1543-
self._pseudo_css(uuid, name, i, j, str(self.tt_data.iloc[i, j]))
1541+
self._pseudo_css(styler.uuid, name, i, j, str(self.tt_data.iloc[i, j]))
15441542
for i in range(len(self.tt_data.index))
15451543
for j in range(len(self.tt_data.columns))
1546-
if not mask.iloc[i, j]
1544+
if not (
1545+
mask.iloc[i, j]
1546+
or i in styler.hidden_rows
1547+
or j in styler.hidden_columns
1548+
)
15471549
]
15481550
for style in sublist
15491551
]

pandas/tests/frame/test_query_eval.py

+20
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,26 @@ def test_check_tz_aware_index_query(self, tz_aware_fixture):
731731
result = df.reset_index().query('"2018-01-03 00:00:00+00" < time')
732732
tm.assert_frame_equal(result, expected)
733733

734+
def test_method_calls_in_query(self):
735+
# https://github.com/pandas-dev/pandas/issues/22435
736+
n = 10
737+
df = DataFrame({"a": 2 * np.random.rand(n), "b": np.random.rand(n)})
738+
expected = df[df["a"].astype("int") == 0]
739+
result = df.query(
740+
"a.astype('int') == 0", engine=self.engine, parser=self.parser
741+
)
742+
tm.assert_frame_equal(result, expected)
743+
744+
df = DataFrame(
745+
{
746+
"a": np.where(np.random.rand(n) < 0.5, np.nan, np.random.randn(n)),
747+
"b": np.random.randn(n),
748+
}
749+
)
750+
expected = df[df["a"].notnull()]
751+
result = df.query("a.notnull()", engine=self.engine, parser=self.parser)
752+
tm.assert_frame_equal(result, expected)
753+
734754

735755
@td.skip_if_no_ne
736756
class TestDataFrameQueryNumExprPython(TestDataFrameQueryNumExprPandas):

pandas/tests/tools/test_to_datetime.py

+22
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,28 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input_s, expected):
177177
result = to_datetime(input_s, format="%Y%m%d", errors="coerce")
178178
tm.assert_series_equal(result, expected)
179179

180+
@pytest.mark.parametrize(
181+
"data, format, expected",
182+
[
183+
([pd.NA], "%Y%m%d%H%M%S", DatetimeIndex(["NaT"])),
184+
([pd.NA], None, DatetimeIndex(["NaT"])),
185+
(
186+
[pd.NA, "20210202202020"],
187+
"%Y%m%d%H%M%S",
188+
DatetimeIndex(["NaT", "2021-02-02 20:20:20"]),
189+
),
190+
(["201010", pd.NA], "%y%m%d", DatetimeIndex(["2020-10-10", "NaT"])),
191+
(["201010", pd.NA], "%d%m%y", DatetimeIndex(["2010-10-20", "NaT"])),
192+
(["201010", pd.NA], None, DatetimeIndex(["2010-10-20", "NaT"])),
193+
([None, np.nan, pd.NA], None, DatetimeIndex(["NaT", "NaT", "NaT"])),
194+
([None, np.nan, pd.NA], "%Y%m%d", DatetimeIndex(["NaT", "NaT", "NaT"])),
195+
],
196+
)
197+
def test_to_datetime_with_NA(self, data, format, expected):
198+
# GH#42957
199+
result = to_datetime(data, format=format)
200+
tm.assert_index_equal(result, expected)
201+
180202
@pytest.mark.parametrize("cache", [True, False])
181203
def test_to_datetime_format_integer(self, cache):
182204
# GH 10178

0 commit comments

Comments
 (0)