Skip to content

Commit bae65ec

Browse files
committed
Merge branch 'master' into styler_bar_colors
2 parents d395c4d + 556f437 commit bae65ec

40 files changed

+486
-329
lines changed

.github/workflows/database.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ on:
44
push:
55
branches:
66
- master
7+
- 1.3.x
78
pull_request:
89
branches:
910
- master
@@ -79,7 +80,7 @@ jobs:
7980
- uses: conda-incubator/setup-miniconda@v2
8081
with:
8182
activate-environment: pandas-dev
82-
channel-priority: flexible
83+
channel-priority: strict
8384
environment-file: ${{ matrix.ENV_FILE }}
8485
use-only-tar-bz2: true
8586

asv_bench/benchmarks/algorithms.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ def setup(self, unique, sort, dtype):
4444
raise NotImplementedError
4545

4646
data = {
47-
"int": pd.Int64Index(np.arange(N)),
48-
"uint": pd.UInt64Index(np.arange(N)),
49-
"float": pd.Float64Index(np.random.randn(N)),
47+
"int": pd.Index(np.arange(N), dtype="int64"),
48+
"uint": pd.Index(np.arange(N), dtype="uint64"),
49+
"float": pd.Index(np.random.randn(N), dtype="float64"),
5050
"object": string_index,
5151
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
5252
"datetime64[ns, tz]": pd.date_range(
@@ -76,9 +76,9 @@ class Duplicated:
7676
def setup(self, unique, keep, dtype):
7777
N = 10 ** 5
7878
data = {
79-
"int": pd.Int64Index(np.arange(N)),
80-
"uint": pd.UInt64Index(np.arange(N)),
81-
"float": pd.Float64Index(np.random.randn(N)),
79+
"int": pd.Index(np.arange(N), dtype="int64"),
80+
"uint": pd.Index(np.arange(N), dtype="uint64"),
81+
"float": pd.Index(np.random.randn(N), dtype="float64"),
8282
"string": tm.makeStringIndex(N),
8383
"datetime64[ns]": pd.date_range("2011-01-01", freq="H", periods=N),
8484
"datetime64[ns, tz]": pd.date_range(

asv_bench/benchmarks/io/style.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ def peakmem_format_render(self, cols, rows):
4242
self._style_format()
4343
self.st._render_html(True, True)
4444

45+
def time_apply_format_hide_render(self, cols, rows):
46+
self._style_apply_format_hide()
47+
self.st._render_html(True, True)
48+
49+
def peakmem_apply_format_hide_render(self, cols, rows):
50+
self._style_apply_format_hide()
51+
self.st._render_html(True, True)
52+
4553
def _style_apply(self):
4654
def _apply_func(s):
4755
return [
@@ -63,3 +71,9 @@ def _style_format(self):
6371
self.st = self.df.style.format(
6472
"{:,.3f}", subset=IndexSlice["row_1":f"row_{ir}", "float_1":f"float_{ic}"]
6573
)
74+
75+
def _style_apply_format_hide(self):
76+
self.st = self.df.style.applymap(lambda v: "color: red;")
77+
self.st.format("{:.3f}")
78+
self.st.hide_index(self.st.index[1:])
79+
self.st.hide_columns(self.st.columns[1:])

asv_bench/benchmarks/sparse.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,4 +180,19 @@ def time_min_max(self, func, fill_value):
180180
getattr(self.sp_arr, func)()
181181

182182

183+
class Take:
184+
185+
params = ([np.array([0]), np.arange(100_000), np.full(100_000, -1)], [True, False])
186+
param_names = ["indices", "allow_fill"]
187+
188+
def setup(self, indices, allow_fill):
189+
N = 1_000_000
190+
fill_value = 0.0
191+
arr = make_array(N, 1e-5, fill_value, np.float64)
192+
self.sp_arr = SparseArray(arr, fill_value=fill_value)
193+
194+
def time_take(self, indices, allow_fill):
195+
self.sp_arr.take(indices, allow_fill=allow_fill)
196+
197+
183198
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v1.3.4.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Fixed regressions
1919
- Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`)
2020
- Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`)
2121
- Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`)
22+
- Fixed regression in :meth:`pandas.read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`)
2223
-
2324

2425
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ Styler
7979
- Keyword arguments ``level`` and ``names`` added to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for additional control of visibility of MultiIndexes and index names (:issue:`25475`, :issue:`43404`, :issue:`43346`)
8080
- Global options have been extended to configure default ``Styler`` properties including formatting and encoding and mathjax options and LaTeX (:issue:`41395`)
8181
- Naive sparsification is now possible for LaTeX without the multirow package (:issue:`43369`)
82+
- :meth:`Styler.to_html` omits CSSStyle rules for hidden table elements (:issue:`43619`)
8283

8384
Formerly Styler relied on ``display.html.use_mathjax``, which has now been replaced by ``styler.html.mathjax``.
8485

@@ -123,6 +124,7 @@ Other enhancements
123124
- Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`)
124125
- Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`)
125126
- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`)
127+
- :meth:`IntegerArray.all` , :meth:`IntegerArray.any`, :meth:`FloatingArray.any`, and :meth:`FloatingArray.all` use Kleene logic (:issue:`41967`)
126128
- Added support for nullable boolean and integer types in :meth:`DataFrame.to_stata`, :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`40855`)
127129
-
128130

@@ -354,6 +356,7 @@ Performance improvements
354356
- Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
355357
- Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)
356358
- :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`)
359+
- Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`)
357360
-
358361

359362
.. ---------------------------------------------------------------------------

pandas/_libs/index.pyx

Lines changed: 10 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ cdef class IndexEngine:
8484
if self.over_size_threshold and self.is_monotonic_increasing:
8585
if not self.is_unique:
8686
return self._get_loc_duplicates(val)
87-
values = self._get_index_values()
87+
values = self.values
8888

8989
self._check_type(val)
9090
try:
@@ -116,7 +116,7 @@ cdef class IndexEngine:
116116
Py_ssize_t diff
117117

118118
if self.is_monotonic_increasing:
119-
values = self._get_index_values()
119+
values = self.values
120120
try:
121121
left = values.searchsorted(val, side='left')
122122
right = values.searchsorted(val, side='right')
@@ -139,7 +139,7 @@ cdef class IndexEngine:
139139
cdef:
140140
ndarray[uint8_t, ndim=1, cast=True] indexer
141141

142-
indexer = self._get_index_values() == val
142+
indexer = self.values == val
143143
return self._unpack_bool_indexer(indexer, val)
144144

145145
cdef _unpack_bool_indexer(self,
@@ -199,7 +199,7 @@ cdef class IndexEngine:
199199
cdef:
200200
bint is_unique
201201
try:
202-
values = self._get_index_values()
202+
values = self.values
203203
self.monotonic_inc, self.monotonic_dec, is_unique = \
204204
self._call_monotonic(values)
205205
except TypeError:
@@ -214,17 +214,14 @@ cdef class IndexEngine:
214214
self.unique = 1
215215
self.need_unique_check = 0
216216

217-
cdef ndarray _get_index_values(self):
218-
return self.values
219-
220217
cdef _call_monotonic(self, values):
221218
return algos.is_monotonic(values, timelike=False)
222219

223220
def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
224-
return algos.backfill(self._get_index_values(), other, limit=limit)
221+
return algos.backfill(self.values, other, limit=limit)
225222

226223
def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
227-
return algos.pad(self._get_index_values(), other, limit=limit)
224+
return algos.pad(self.values, other, limit=limit)
228225

229226
cdef _make_hash_table(self, Py_ssize_t n):
230227
raise NotImplementedError
@@ -243,7 +240,7 @@ cdef class IndexEngine:
243240

244241
if not self.is_mapping_populated:
245242

246-
values = self._get_index_values()
243+
values = self.values
247244
self.mapping = self._make_hash_table(len(values))
248245
self._call_map_locations(values)
249246

@@ -291,7 +288,7 @@ cdef class IndexEngine:
291288
bint d_has_nan = False, stargets_has_nan = False, need_nan_check = True
292289

293290
self._ensure_mapping_populated()
294-
values = np.array(self._get_index_values(), copy=False)
291+
values = self.values
295292
stargets = set(targets)
296293

297294
n = len(values)
@@ -411,9 +408,6 @@ cdef class ObjectEngine(IndexEngine):
411408

412409
cdef class DatetimeEngine(Int64Engine):
413410

414-
cdef str _get_box_dtype(self):
415-
return 'M8[ns]'
416-
417411
cdef int64_t _unbox_scalar(self, scalar) except? -1:
418412
# NB: caller is responsible for ensuring tzawareness compat
419413
# before we get here
@@ -431,16 +425,13 @@ cdef class DatetimeEngine(Int64Engine):
431425
if self.over_size_threshold and self.is_monotonic_increasing:
432426
if not self.is_unique:
433427
return self._get_loc_duplicates(conv)
434-
values = self._get_index_values()
428+
values = self.values
435429
loc = values.searchsorted(conv, side='left')
436430
return values[loc] == conv
437431

438432
self._ensure_mapping_populated()
439433
return conv in self.mapping
440434

441-
cdef ndarray _get_index_values(self):
442-
return self.values.view('i8')
443-
444435
cdef _call_monotonic(self, values):
445436
return algos.is_monotonic(values, timelike=True)
446437

@@ -462,7 +453,7 @@ cdef class DatetimeEngine(Int64Engine):
462453
if self.over_size_threshold and self.is_monotonic_increasing:
463454
if not self.is_unique:
464455
return self._get_loc_duplicates(conv)
465-
values = self._get_index_values()
456+
values = self.values
466457

467458
loc = values.searchsorted(conv, side='left')
468459

@@ -479,35 +470,9 @@ cdef class DatetimeEngine(Int64Engine):
479470
except KeyError:
480471
raise KeyError(val)
481472

482-
def get_indexer_non_unique(self, ndarray targets):
483-
# we may get datetime64[ns] or timedelta64[ns], cast these to int64
484-
return super().get_indexer_non_unique(targets.view("i8"))
485-
486-
def get_indexer(self, ndarray values) -> np.ndarray:
487-
self._ensure_mapping_populated()
488-
if values.dtype != self._get_box_dtype():
489-
return np.repeat(-1, len(values)).astype(np.intp)
490-
values = np.asarray(values).view('i8')
491-
return self.mapping.lookup(values)
492-
493-
def get_pad_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
494-
if other.dtype != self._get_box_dtype():
495-
return np.repeat(-1, len(other)).astype(np.intp)
496-
other = np.asarray(other).view('i8')
497-
return algos.pad(self._get_index_values(), other, limit=limit)
498-
499-
def get_backfill_indexer(self, other: np.ndarray, limit=None) -> np.ndarray:
500-
if other.dtype != self._get_box_dtype():
501-
return np.repeat(-1, len(other)).astype(np.intp)
502-
other = np.asarray(other).view('i8')
503-
return algos.backfill(self._get_index_values(), other, limit=limit)
504-
505473

506474
cdef class TimedeltaEngine(DatetimeEngine):
507475

508-
cdef str _get_box_dtype(self):
509-
return 'm8[ns]'
510-
511476
cdef int64_t _unbox_scalar(self, scalar) except? -1:
512477
if not (isinstance(scalar, _Timedelta) or scalar is NaT):
513478
raise TypeError(scalar)

pandas/_libs/index_class_helper.pxi.in

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,11 @@ cdef class {{name}}Engine(IndexEngine):
4949
# Returns ndarray[bool] or int
5050
cdef:
5151
ndarray[uint8_t, ndim=1, cast=True] indexer
52-
ndarray[intp_t, ndim=1] found
5352
ndarray[{{dtype}}_t, ndim=1] values
54-
int count = 0
5553

5654
self._check_type(val)
5755

58-
values = self._get_index_values()
56+
values = self.values
5957
try:
6058
with warnings.catch_warnings():
6159
# e.g. if values is float64 and `val` is a str, suppress warning

pandas/core/algorithms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def unique(values):
422422
"""
423423
values = _ensure_arraylike(values)
424424

425-
if is_extension_array_dtype(values):
425+
if is_extension_array_dtype(values.dtype):
426426
# Dispatch to extension dtype's unique.
427427
return values.unique()
428428

0 commit comments

Comments
 (0)