Skip to content

Commit 0977799

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents ec017f9 + 648eb40 commit 0977799

39 files changed

+798
-365
lines changed

doc/source/reference/style.rst

+3
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ Style application
3535
Styler.apply
3636
Styler.applymap
3737
Styler.format
38+
Styler.hide_index
39+
Styler.hide_columns
3840
Styler.set_td_classes
3941
Styler.set_table_styles
4042
Styler.set_table_attributes
4143
Styler.set_tooltips
4244
Styler.set_caption
45+
Styler.set_sticky
4346
Styler.set_properties
4447
Styler.set_uuid
4548
Styler.clear

doc/source/user_guide/style.ipynb

+22-6
Original file line numberDiff line numberDiff line change
@@ -1405,7 +1405,26 @@
14051405
"source": [
14061406
"### Sticky Headers\n",
14071407
"\n",
1408-
"If you display a large matrix or DataFrame in a notebook, but you want to always see the column and row headers you can use the following CSS to make them stick. We might make this into an API function later."
1408+
"If you display a large matrix or DataFrame in a notebook, but you want to always see the column and row headers you can use the [.set_sticky][sticky] method which manipulates the table styles CSS.\n",
1409+
"\n",
1410+
"[sticky]: ../reference/api/pandas.io.formats.style.Styler.set_sticky.rst"
1411+
]
1412+
},
1413+
{
1414+
"cell_type": "code",
1415+
"execution_count": null,
1416+
"metadata": {},
1417+
"outputs": [],
1418+
"source": [
1419+
"bigdf = pd.DataFrame(np.random.randn(16, 100))\n",
1420+
"bigdf.style.set_sticky(axis=\"index\")"
1421+
]
1422+
},
1423+
{
1424+
"cell_type": "markdown",
1425+
"metadata": {},
1426+
"source": [
1427+
"It is also possible to stick MultiIndexes and even only specific levels."
14091428
]
14101429
},
14111430
{
@@ -1414,11 +1433,8 @@
14141433
"metadata": {},
14151434
"outputs": [],
14161435
"source": [
1417-
"bigdf = pd.DataFrame(np.random.randn(15, 100))\n",
1418-
"bigdf.style.set_table_styles([\n",
1419-
" {'selector': 'thead th', 'props': 'position: sticky; top:0; background-color:salmon;'},\n",
1420-
" {'selector': 'tbody th', 'props': 'position: sticky; left:0; background-color:lightgreen;'} \n",
1421-
"])"
1436+
"bigdf.index = pd.MultiIndex.from_product([[\"A\",\"B\"],[0,1],[0,1,2,3]])\n",
1437+
"bigdf.style.set_sticky(axis=\"index\", pixel_size=18, levels=[1,2])"
14221438
]
14231439
},
14241440
{

doc/source/whatsnew/v1.3.0.rst

+7
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ which has been revised and improved (:issue:`39720`, :issue:`39317`, :issue:`404
138138
- Added the option ``styler.render.max_elements`` to avoid browser overload when styling large DataFrames (:issue:`40712`)
139139
- Added the method :meth:`.Styler.to_latex` (:issue:`21673`), which also allows some limited CSS conversion (:issue:`40731`)
140140
- Added the method :meth:`.Styler.to_html` (:issue:`13379`)
141+
- Added the method :meth:`.Styler.set_sticky` to make index and column headers permanently visible in scrolling HTML frames (:issue:`29072`)
141142

142143
.. _whatsnew_130.enhancements.dataframe_honors_copy_with_dict:
143144

@@ -986,6 +987,7 @@ Indexing
986987
^^^^^^^^
987988
- Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
988989
- Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
990+
- Bug in :meth:`IntervalIndex.get_indexer` when ``target`` has ``CategoricalDtype`` and both the index and the target contain NA values (:issue:`41934`)
989991
- Bug in :meth:`Series.loc` raising a ``ValueError`` when input was filtered with a Boolean list and values to set were a list with lower dimension (:issue:`20438`)
990992
- Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
991993
- Bug in :meth:`DataFrame.__setitem__` raising a ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)
@@ -1025,6 +1027,7 @@ Indexing
10251027
- Bug in :meth:`PeriodIndex.get_loc` failing to raise a ``KeyError`` when given a :class:`Period` with a mismatched ``freq`` (:issue:`41670`)
10261028
- Bug ``.loc.__getitem__`` with a :class:`UInt64Index` and negative-integer keys raising ``OverflowError`` instead of ``KeyError`` in some cases, wrapping around to positive integers in others (:issue:`41777`)
10271029
- Bug in :meth:`Index.get_indexer` failing to raise ``ValueError`` in some cases with invalid ``method``, ``limit``, or ``tolerance`` arguments (:issue:`41918`)
1030+
- Bug when slicing a :class:`Series` or :class:`DataFrame` with a :class:`TimedeltaIndex` when passing an invalid string raising ``ValueError`` instead of a ``TypeError`` (:issue:`41821`)
10281031

10291032
Missing
10301033
^^^^^^^
@@ -1041,6 +1044,7 @@ MultiIndex
10411044
- Bug in :meth:`MultiIndex.intersection` duplicating ``NaN`` in the result (:issue:`38623`)
10421045
- Bug in :meth:`MultiIndex.equals` incorrectly returning ``True`` when the :class:`MultiIndex` contained ``NaN`` even when they are differently ordered (:issue:`38439`)
10431046
- Bug in :meth:`MultiIndex.intersection` always returning an empty result when intersecting with :class:`CategoricalIndex` (:issue:`38653`)
1047+
- Bug in :meth:`MultiIndex.difference` incorrectly raising ``TypeError`` when indexes contain non-sortable entries (:issue:`41915`)
10441048
- Bug in :meth:`MultiIndex.reindex` raising a ``ValueError`` when used on an empty :class:`MultiIndex` and indexing only a specific level (:issue:`41170`)
10451049
- Bug in :meth:`MultiIndex.reindex` raising ``TypeError`` when reindexing against a flat :class:`Index` (:issue:`41707`)
10461050

@@ -1080,6 +1084,7 @@ I/O
10801084
- Bug in the conversion from PyArrow to pandas (e.g. for reading Parquet) with nullable dtypes and a PyArrow array whose data buffer size is not a multiple of the dtype size (:issue:`40896`)
10811085
- Bug in :func:`read_excel` would raise an error when pandas could not determine the file type even though the user specified the ``engine`` argument (:issue:`41225`)
10821086
- Bug in :func:`read_clipboard` copying from an excel file shifts values into the wrong column if there are null values in first column (:issue:`41108`)
1087+
- Bug in :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` raising a ``TypeError`` when trying to append a string column to an incompatible column (:issue:`41897`)
10831088

10841089
Period
10851090
^^^^^^
@@ -1139,6 +1144,8 @@ Groupby/resample/rolling
11391144
- Bug in :class:`DataFrameGroupBy` aggregations incorrectly failing to drop columns with invalid dtypes for that aggregation when there are no valid columns (:issue:`41291`)
11401145
- Bug in :meth:`DataFrame.rolling.__iter__` where ``on`` was not assigned to the index of the resulting objects (:issue:`40373`)
11411146
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.DataFrameGroupBy.agg` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`41647`)
1147+
- Bug in :class:`DataFrameGroupBy` methods ``agg``, ``transform``, ``sum``, ``bfill``, ``ffill``, ``pad``, ``pct_change``, ``shift``, ``ohlc`` dropping ``.columns.names`` (:issue:`41497`)
1148+
11421149

11431150
Reshaping
11441151
^^^^^^^^^

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ Missing
167167

168168
MultiIndex
169169
^^^^^^^^^^
170-
-
170+
- Bug in :meth:`MultiIndex.reindex` when passing a ``level`` that corresponds to an ``ExtensionDtype`` level (:issue:`42043`)
171171
-
172172

173173
I/O

pandas/_libs/algos.pyx

+50-52
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,32 @@ ctypedef fused rank_t:
931931
int64_t
932932

933933

934+
cdef rank_t get_rank_nan_fill_val(bint rank_nans_highest, rank_t[:] _=None):
935+
"""
936+
Return the value we'll use to represent missing values when sorting depending
937+
on if we'd like missing values to end up at the top/bottom. (The second parameter
938+
is unused, but needed for fused type specialization)
939+
"""
940+
if rank_nans_highest:
941+
if rank_t is object:
942+
return Infinity()
943+
elif rank_t is int64_t:
944+
return util.INT64_MAX
945+
elif rank_t is uint64_t:
946+
return util.UINT64_MAX
947+
else:
948+
return np.inf
949+
else:
950+
if rank_t is object:
951+
return NegInfinity()
952+
elif rank_t is int64_t:
953+
return NPY_NAT
954+
elif rank_t is uint64_t:
955+
return 0
956+
else:
957+
return -np.inf
958+
959+
934960
@cython.wraparound(False)
935961
@cython.boundscheck(False)
936962
def rank_1d(
@@ -980,7 +1006,7 @@ def rank_1d(
9801006
ndarray[rank_t, ndim=1] masked_vals
9811007
rank_t[:] masked_vals_memview
9821008
uint8_t[:] mask
983-
bint keep_na, check_labels, check_mask
1009+
bint keep_na, nans_rank_highest, check_labels, check_mask
9841010
rank_t nan_fill_val
9851011

9861012
tiebreak = tiebreakers[ties_method]
@@ -1026,27 +1052,12 @@ def rank_1d(
10261052
# If descending, fill with highest value since descending
10271053
# will flip the ordering to still end up with lowest rank.
10281054
# Symmetric logic applies to `na_option == 'bottom'`
1029-
if ascending ^ (na_option == 'top'):
1030-
if rank_t is object:
1031-
nan_fill_val = Infinity()
1032-
elif rank_t is int64_t:
1033-
nan_fill_val = util.INT64_MAX
1034-
elif rank_t is uint64_t:
1035-
nan_fill_val = util.UINT64_MAX
1036-
else:
1037-
nan_fill_val = np.inf
1055+
nans_rank_highest = ascending ^ (na_option == 'top')
1056+
nan_fill_val = get_rank_nan_fill_val[rank_t](nans_rank_highest)
1057+
if nans_rank_highest:
10381058
order = (masked_vals, mask, labels)
10391059
else:
1040-
if rank_t is object:
1041-
nan_fill_val = NegInfinity()
1042-
elif rank_t is int64_t:
1043-
nan_fill_val = NPY_NAT
1044-
elif rank_t is uint64_t:
1045-
nan_fill_val = 0
1046-
else:
1047-
nan_fill_val = -np.inf
1048-
1049-
order = (masked_vals, ~(np.array(mask, copy=False)), labels)
1060+
order = (masked_vals, ~(np.asarray(mask)), labels)
10501061

10511062
np.putmask(masked_vals, mask, nan_fill_val)
10521063
# putmask doesn't accept a memoryview, so we assign as a separate step
@@ -1073,14 +1084,11 @@ def rank_1d(
10731084
check_mask,
10741085
check_labels,
10751086
keep_na,
1087+
pct,
10761088
N,
10771089
)
1078-
if pct:
1079-
for i in range(N):
1080-
if grp_sizes[i] != 0:
1081-
out[i] = out[i] / grp_sizes[i]
10821090

1083-
return np.array(out)
1091+
return np.asarray(out)
10841092

10851093

10861094
@cython.wraparound(False)
@@ -1097,6 +1105,7 @@ cdef void rank_sorted_1d(
10971105
bint check_mask,
10981106
bint check_labels,
10991107
bint keep_na,
1108+
bint pct,
11001109
Py_ssize_t N,
11011110
) nogil:
11021111
"""
@@ -1108,7 +1117,7 @@ cdef void rank_sorted_1d(
11081117
out : float64_t[::1]
11091118
Array to store computed ranks
11101119
grp_sizes : int64_t[::1]
1111-
Array to store group counts.
1120+
Array to store group counts, only used if pct=True
11121121
labels : See rank_1d.__doc__
11131122
sort_indexer : intp_t[:]
11141123
Array of indices which sorts masked_vals
@@ -1118,12 +1127,14 @@ cdef void rank_sorted_1d(
11181127
Array where entries are True if the value is missing, False otherwise
11191128
tiebreak : TiebreakEnumType
11201129
See rank_1d.__doc__ for the different modes
1121-
check_mask : bint
1130+
check_mask : bool
11221131
If False, assumes the mask is all False to skip mask indexing
1123-
check_labels : bint
1132+
check_labels : bool
11241133
If False, assumes all labels are the same to skip group handling logic
1125-
keep_na : bint
1134+
keep_na : bool
11261135
Whether or not to keep nulls
1136+
pct : bool
1137+
Compute percentage rank of data within each group
11271138
N : Py_ssize_t
11281139
The number of elements to rank. Note: it is not always true that
11291140
N == len(out) or N == len(masked_vals) (see `nancorr_spearman` usage for why)
@@ -1342,6 +1353,11 @@ cdef void rank_sorted_1d(
13421353
grp_start = i + 1
13431354
grp_vals_seen = 1
13441355

1356+
if pct:
1357+
for i in range(N):
1358+
if grp_sizes[i] != 0:
1359+
out[i] = out[i] / grp_sizes[i]
1360+
13451361

13461362
def rank_2d(
13471363
ndarray[rank_t, ndim=2] in_arr,
@@ -1362,11 +1378,11 @@ def rank_2d(
13621378
ndarray[rank_t, ndim=2] values
13631379
ndarray[intp_t, ndim=2] argsort_indexer
13641380
ndarray[uint8_t, ndim=2] mask
1365-
rank_t val, nan_value
1381+
rank_t val, nan_fill_val
13661382
float64_t count, sum_ranks = 0.0
13671383
int tiebreak = 0
13681384
int64_t idx
1369-
bint check_mask, condition, keep_na
1385+
bint check_mask, condition, keep_na, nans_rank_highest
13701386

13711387
tiebreak = tiebreakers[ties_method]
13721388

@@ -1384,27 +1400,9 @@ def rank_2d(
13841400
if values.dtype != np.object_:
13851401
values = values.astype('O')
13861402

1403+
nans_rank_highest = ascending ^ (na_option == 'top')
13871404
if check_mask:
1388-
if ascending ^ (na_option == 'top'):
1389-
if rank_t is object:
1390-
nan_value = Infinity()
1391-
elif rank_t is float64_t:
1392-
nan_value = np.inf
1393-
1394-
# int64 and datetimelike
1395-
else:
1396-
nan_value = util.INT64_MAX
1397-
1398-
else:
1399-
if rank_t is object:
1400-
nan_value = NegInfinity()
1401-
elif rank_t is float64_t:
1402-
nan_value = -np.inf
1403-
1404-
# int64 and datetimelike
1405-
else:
1406-
nan_value = NPY_NAT
1407-
1405+
nan_fill_val = get_rank_nan_fill_val[rank_t](nans_rank_highest)
14081406
if rank_t is object:
14091407
mask = missing.isnaobj2d(values)
14101408
elif rank_t is float64_t:
@@ -1414,7 +1412,7 @@ def rank_2d(
14141412
else:
14151413
mask = values == NPY_NAT
14161414

1417-
np.putmask(values, mask, nan_value)
1415+
np.putmask(values, mask, nan_fill_val)
14181416
else:
14191417
mask = np.zeros_like(values, dtype=bool)
14201418

pandas/_libs/algos_take_helper.pxi.in

-25
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,6 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
99
# ----------------------------------------------------------------------
1010

1111

12-
@cython.wraparound(False)
13-
@cython.boundscheck(False)
14-
def take_1d_intp_intp(
15-
const intp_t[:] values,
16-
const intp_t[:] indexer,
17-
intp_t[::1] out,
18-
intp_t fill_value=-1,
19-
):
20-
cdef:
21-
Py_ssize_t i, n, idx
22-
intp_t fv
23-
24-
n = indexer.shape[0]
25-
26-
fv = fill_value
27-
28-
with nogil:
29-
for i in range(n):
30-
idx = indexer[i]
31-
if idx == -1:
32-
out[i] = fv
33-
else:
34-
out[i] = values[idx]
35-
36-
3712
{{py:
3813

3914
# c_type_in, c_type_out

0 commit comments

Comments
 (0)