Skip to content

Commit ae5f4b7

Browse files
Merge remote-tracking branch 'upstream/master' into fix-20432
2 parents b90967d + 64f0844 commit ae5f4b7

File tree

12 files changed

+190
-113
lines changed

12 files changed

+190
-113
lines changed

ci/run_tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ if [[ "not network" == *"$PATTERN"* ]]; then
1010
fi
1111

1212
if [ "$COVERAGE" ]; then
13-
COVERAGE="-s --cov=pandas --cov-report=xml"
13+
COVERAGE="-s --cov=pandas --cov-report=xml --cov-append"
1414
fi
1515

1616
# If no X server is found, we use xvfb to emulate it

codecov.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ coverage:
88
status:
99
project:
1010
default:
11-
target: '72'
11+
target: '82'
1212
patch:
1313
default:
1414
target: '50'

doc/source/whatsnew/v1.3.0.rst

+26
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,31 @@ both XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
110110
111111
For more, see :ref:`io.xml` in the user guide on IO tools.
112112

113+
Styler Upgrades
114+
^^^^^^^^^^^^^^^
115+
116+
We provided some focused development on :class:`.Styler`, including altering methods
117+
to accept more universal CSS language for arguments, such as ``'color:red;'`` instead of
118+
``[('color', 'red')]`` (:issue:`39564`). This is also added to the built-in methods
119+
to allow custom CSS highlighting instead of default background coloring (:issue:`40242`).
120+
121+
The :meth:`.Styler.apply` now consistently allows functions with ``ndarray`` output to
122+
allow more flexible development of UDFs when ``axis`` is ``None`` ``0`` or ``1`` (:issue:`39393`).
123+
124+
:meth:`.Styler.set_tooltips` is a new method that allows adding on hover tooltips to
125+
enhance interactive displays (:issue:`35643`). :meth:`.Styler.set_td_classes`, which was recently
126+
introduced in v1.2.0 (:issue:`36159`) to allow adding specific CSS classes to data cells, has
127+
been made as performant as :meth:`.Styler.apply` and :meth:`.Styler.applymap` (:issue:`40453`),
128+
if not more performant in some cases. The overall performance of HTML
129+
render times has been considerably improved to
130+
match :meth:`DataFrame.to_html` (:issue:`39952` :issue:`37792` :issue:`40425`).
131+
132+
The :meth:`.Styler.format` has had upgrades to easily format missing data,
133+
precision, and perform HTML escaping (:issue:`40437` :issue:`40134`). There have been numerous other bug fixes to
134+
properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`40356` :issue:`39807` :issue:`39889` :issue:`39627`)
135+
136+
Documentation has also seen major revisions in light of new features (:issue:`39720` :issue:`39317` :issue:`40493`)
137+
113138
.. _whatsnew_130.dataframe_honors_copy_with_dict:
114139

115140
DataFrame constructor honors ``copy=False`` with dict
@@ -572,6 +597,7 @@ Conversion
572597
- Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
573598
- Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
574599
- Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
600+
- Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`)
575601
- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
576602
-
577603

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ dependencies:
7878
- bottleneck>=1.2.1
7979
- ipykernel
8080
- ipython>=7.11.1
81+
- decorator=4 # temporary pin (dependency of IPython), see GH-40768
8182
- jinja2 # pandas.Styler
8283
- matplotlib>=2.2.2 # pandas.plotting, Series.plot, DataFrame.plot
8384
- numexpr>=2.6.8

pandas/core/arrays/string_.py

+3
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,9 @@ def astype(self, dtype, copy=True):
327327
arr[mask] = "0"
328328
values = arr.astype(dtype.numpy_dtype)
329329
return FloatingArray(values, mask, copy=False)
330+
elif isinstance(dtype, ExtensionDtype):
331+
cls = dtype.construct_array_type()
332+
return cls._from_sequence(self, dtype=dtype, copy=copy)
330333
elif np.issubdtype(dtype, np.floating):
331334
arr = self._ndarray.copy()
332335
mask = self.isna()

pandas/core/internals/blocks.py

+13-107
Original file line numberDiff line numberDiff line change
@@ -1106,128 +1106,34 @@ def interpolate(
11061106
# If there are no NAs, then interpolate is a no-op
11071107
return [self] if inplace else [self.copy()]
11081108

1109-
# a fill na type method
11101109
try:
11111110
m = missing.clean_fill_method(method)
11121111
except ValueError:
11131112
m = None
1113+
if m is None and self.dtype.kind != "f":
1114+
# only deal with floats
1115+
# bc we already checked that can_hold_na, we dont have int dtype here
1116+
# TODO: make a copy if not inplace?
1117+
return [self]
11141118

1115-
if m is not None:
1116-
if fill_value is not None:
1117-
# similar to validate_fillna_kwargs
1118-
raise ValueError("Cannot pass both fill_value and method")
1119-
1120-
return self._interpolate_with_fill(
1121-
method=m,
1122-
axis=axis,
1123-
inplace=inplace,
1124-
limit=limit,
1125-
limit_area=limit_area,
1126-
downcast=downcast,
1127-
)
1128-
# validate the interp method
1129-
m = missing.clean_interp_method(method, **kwargs)
1130-
1131-
assert index is not None # for mypy
1119+
data = self.values if inplace else self.values.copy()
1120+
data = cast(np.ndarray, data) # bc overridden by ExtensionBlock
11321121

1133-
return self._interpolate(
1134-
method=m,
1135-
index=index,
1122+
interp_values = missing.interpolate_array_2d(
1123+
data,
1124+
method=method,
11361125
axis=axis,
1126+
index=index,
11371127
limit=limit,
11381128
limit_direction=limit_direction,
11391129
limit_area=limit_area,
11401130
fill_value=fill_value,
1141-
inplace=inplace,
1142-
downcast=downcast,
11431131
**kwargs,
11441132
)
11451133

1146-
@final
1147-
def _interpolate_with_fill(
1148-
self,
1149-
method: str = "pad",
1150-
axis: int = 0,
1151-
inplace: bool = False,
1152-
limit: Optional[int] = None,
1153-
limit_area: Optional[str] = None,
1154-
downcast: Optional[str] = None,
1155-
) -> List[Block]:
1156-
""" fillna but using the interpolate machinery """
1157-
inplace = validate_bool_kwarg(inplace, "inplace")
1158-
1159-
assert self._can_hold_na # checked by caller
1160-
1161-
values = self.values if inplace else self.values.copy()
1162-
1163-
values = missing.interpolate_2d(
1164-
values,
1165-
method=method,
1166-
axis=axis,
1167-
limit=limit,
1168-
limit_area=limit_area,
1169-
)
1170-
1171-
values = maybe_coerce_values(values)
1172-
blocks = [self.make_block_same_class(values)]
1173-
return self._maybe_downcast(blocks, downcast)
1174-
1175-
@final
1176-
def _interpolate(
1177-
self,
1178-
method: str,
1179-
index: Index,
1180-
fill_value: Optional[Any] = None,
1181-
axis: int = 0,
1182-
limit: Optional[int] = None,
1183-
limit_direction: str = "forward",
1184-
limit_area: Optional[str] = None,
1185-
inplace: bool = False,
1186-
downcast: Optional[str] = None,
1187-
**kwargs,
1188-
) -> List[Block]:
1189-
""" interpolate using scipy wrappers """
1190-
inplace = validate_bool_kwarg(inplace, "inplace")
1191-
data = self.values if inplace else self.values.copy()
1192-
1193-
# only deal with floats
1194-
if self.dtype.kind != "f":
1195-
# bc we already checked that can_hold_na, we dont have int dtype here
1196-
return [self]
1197-
1198-
if is_valid_na_for_dtype(fill_value, self.dtype):
1199-
fill_value = self.fill_value
1200-
1201-
if method in ("krogh", "piecewise_polynomial", "pchip"):
1202-
if not index.is_monotonic:
1203-
raise ValueError(
1204-
f"{method} interpolation requires that the index be monotonic."
1205-
)
1206-
# process 1-d slices in the axis direction
1207-
1208-
def func(yvalues: np.ndarray) -> np.ndarray:
1209-
1210-
# process a 1-d slice, returning it
1211-
# should the axis argument be handled below in apply_along_axis?
1212-
# i.e. not an arg to missing.interpolate_1d
1213-
return missing.interpolate_1d(
1214-
xvalues=index,
1215-
yvalues=yvalues,
1216-
method=method,
1217-
limit=limit,
1218-
limit_direction=limit_direction,
1219-
limit_area=limit_area,
1220-
fill_value=fill_value,
1221-
bounds_error=False,
1222-
**kwargs,
1223-
)
1224-
1225-
# interp each column independently
1226-
interp_values = np.apply_along_axis(func, axis, data)
12271134
interp_values = maybe_coerce_values(interp_values)
1228-
1229-
blocks = [self.make_block_same_class(interp_values)]
1230-
return self._maybe_downcast(blocks, downcast)
1135+
nbs = [self.make_block_same_class(interp_values)]
1136+
return self._maybe_downcast(nbs, downcast)
12311137

12321138
def take_nd(
12331139
self,

pandas/core/missing.py

+109-3
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@
3636
is_numeric_v_string_like,
3737
needs_i8_conversion,
3838
)
39-
from pandas.core.dtypes.missing import isna
39+
from pandas.core.dtypes.missing import (
40+
is_valid_na_for_dtype,
41+
isna,
42+
na_value_for_dtype,
43+
)
4044

4145
if TYPE_CHECKING:
4246
from pandas import Index
@@ -145,7 +149,7 @@ def clean_fill_method(method, allow_nearest: bool = False):
145149
]
146150

147151

148-
def clean_interp_method(method: str, **kwargs) -> str:
152+
def clean_interp_method(method: str, index: Index, **kwargs) -> str:
149153
order = kwargs.get("order")
150154

151155
if method in ("spline", "polynomial") and order is None:
@@ -155,6 +159,12 @@ def clean_interp_method(method: str, **kwargs) -> str:
155159
if method not in valid:
156160
raise ValueError(f"method must be one of {valid}. Got '{method}' instead.")
157161

162+
if method in ("krogh", "piecewise_polynomial", "pchip"):
163+
if not index.is_monotonic:
164+
raise ValueError(
165+
f"{method} interpolation requires that the index be monotonic."
166+
)
167+
158168
return method
159169

160170

@@ -195,6 +205,102 @@ def find_valid_index(values, *, how: str) -> Optional[int]:
195205
return idxpos
196206

197207

208+
def interpolate_array_2d(
209+
data: np.ndarray,
210+
method: str = "pad",
211+
axis: int = 0,
212+
index: Optional[Index] = None,
213+
limit: Optional[int] = None,
214+
limit_direction: str = "forward",
215+
limit_area: Optional[str] = None,
216+
fill_value: Optional[Any] = None,
217+
coerce: bool = False,
218+
downcast: Optional[str] = None,
219+
**kwargs,
220+
):
221+
"""
222+
Wrapper to dispatch to either interpolate_2d or interpolate_2d_with_fill.
223+
"""
224+
try:
225+
m = clean_fill_method(method)
226+
except ValueError:
227+
m = None
228+
229+
if m is not None:
230+
if fill_value is not None:
231+
# similar to validate_fillna_kwargs
232+
raise ValueError("Cannot pass both fill_value and method")
233+
234+
interp_values = interpolate_2d(
235+
data,
236+
method=m,
237+
axis=axis,
238+
limit=limit,
239+
limit_area=limit_area,
240+
)
241+
else:
242+
assert index is not None # for mypy
243+
244+
interp_values = interpolate_2d_with_fill(
245+
data=data,
246+
index=index,
247+
axis=axis,
248+
method=method,
249+
limit=limit,
250+
limit_direction=limit_direction,
251+
limit_area=limit_area,
252+
fill_value=fill_value,
253+
**kwargs,
254+
)
255+
return interp_values
256+
257+
258+
def interpolate_2d_with_fill(
259+
data: np.ndarray, # floating dtype
260+
index: Index,
261+
axis: int,
262+
method: str = "linear",
263+
limit: Optional[int] = None,
264+
limit_direction: str = "forward",
265+
limit_area: Optional[str] = None,
266+
fill_value: Optional[Any] = None,
267+
**kwargs,
268+
) -> np.ndarray:
269+
"""
270+
Column-wise application of interpolate_1d.
271+
272+
Notes
273+
-----
274+
The signature does differs from interpolate_1d because it only
275+
includes what is needed for Block.interpolate.
276+
"""
277+
# validate the interp method
278+
clean_interp_method(method, index, **kwargs)
279+
280+
if is_valid_na_for_dtype(fill_value, data.dtype):
281+
fill_value = na_value_for_dtype(data.dtype, compat=False)
282+
283+
def func(yvalues: np.ndarray) -> np.ndarray:
284+
# process 1-d slices in the axis direction, returning it
285+
286+
# should the axis argument be handled below in apply_along_axis?
287+
# i.e. not an arg to interpolate_1d
288+
return interpolate_1d(
289+
xvalues=index,
290+
yvalues=yvalues,
291+
method=method,
292+
limit=limit,
293+
limit_direction=limit_direction,
294+
limit_area=limit_area,
295+
fill_value=fill_value,
296+
bounds_error=False,
297+
**kwargs,
298+
)
299+
300+
# interp each column independently
301+
return np.apply_along_axis(func, axis, data)
302+
303+
198304
def interpolate_1d(
199305
xvalues: Index,
200306
yvalues: np.ndarray,
@@ -638,7 +744,7 @@ def interpolate_2d(
638744
Perform an actual interpolation of values, values will be make 2-d if
639745
needed fills inplace, returns the result.
640746
641-
Parameters
747+
Parameters
642748
----------
643749
values: array-like
644750
Input array.

pandas/tests/indexes/datetimes/test_misc.py

+1
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ def test_datetime_name_accessors(self, time_locale):
366366
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
367367
name = name.capitalize()
368368
assert dti.day_name(locale=time_locale)[day] == name
369+
assert dti.day_name(locale=None)[day] == eng_name
369370
ts = Timestamp(datetime(2016, 4, day))
370371
assert ts.day_name(locale=time_locale) == name
371372
dti = dti.append(DatetimeIndex([pd.NaT]))

pandas/tests/indexes/period/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -589,7 +589,7 @@ def test_where(self, klass):
589589
def test_where_other(self):
590590
i = period_range("20130101", periods=5, freq="D")
591591
for arr in [np.nan, NaT]:
592-
result = i.where(notna(i), other=np.nan)
592+
result = i.where(notna(i), other=arr)
593593
expected = i
594594
tm.assert_index_equal(result, expected)
595595

pandas/tests/series/accessors/test_dt_accessor.py

+1
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ def test_dt_accessor_datetime_name_accessors(self, time_locale):
443443
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
444444
name = name.capitalize()
445445
assert s.dt.day_name(locale=time_locale)[day] == name
446+
assert s.dt.day_name(locale=None)[day] == eng_name
446447
s = s.append(Series([pd.NaT]))
447448
assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1])
448449

0 commit comments

Comments
 (0)