Skip to content

Commit 631cf65

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ref-numeric-validate_fill_value
2 parents 214820d + 1cc030e commit 631cf65

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+1656
-1301
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Here are just a few of the things that pandas does well:
6060
and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
6161
- [**Time series**][timeseries]-specific functionality: date range
6262
generation and frequency conversion, moving window statistics,
63-
date shifting and lagging.
63+
date shifting and lagging
6464

6565

6666
[missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data

asv_bench/benchmarks/reshape.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ def setup(self):
103103
nidvars = 20
104104
N = 5000
105105
self.letters = list("ABCD")
106-
yrvars = [l + str(num) for l, num in product(self.letters, range(1, nyrs + 1))]
106+
yrvars = [
107+
letter + str(num)
108+
for letter, num in product(self.letters, range(1, nyrs + 1))
109+
]
107110
columns = [str(i) for i in range(nidvars)] + yrvars
108111
self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), columns=columns)
109112
self.df["id"] = self.df.index

doc/source/user_guide/style.ipynb

+30-4
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,8 @@
793793
"source": [
794794
"The next option you have are \"table styles\".\n",
795795
"These are styles that apply to the table as a whole, but don't look at the data.\n",
796-
"Certain stylings, including pseudo-selectors like `:hover` can only be used this way."
796+
"Certain stylings, including pseudo-selectors like `:hover` can only be used this way.\n",
797+
"These can also be used to set specific row or column based class selectors, as will be shown."
797798
]
798799
},
799800
{
@@ -831,9 +832,32 @@
831832
"The value for `props` should be a list of tuples of `('attribute', 'value')`.\n",
832833
"\n",
833834
"`table_styles` are extremely flexible, but not as fun to type out by hand.\n",
834-
"We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
835+
"We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here.\n",
836+
"\n",
837+
"`table_styles` can be used to add column and row based class descriptors. For large tables this can increase performance by avoiding repetitive individual css for each cell, and it can also simplify style construction in some cases.\n",
838+
"If `table_styles` is given as a dictionary each key should be a specified column or index value and this will map to specific class CSS selectors of the given column or row.\n",
839+
"\n",
840+
"Note that `Styler.set_table_styles` will overwrite existing styles but can be chained by setting the `overwrite` argument to `False`."
835841
]
836842
},
843+
{
844+
"cell_type": "code",
845+
"execution_count": null,
846+
"outputs": [],
847+
"source": [
848+
"html = html.set_table_styles({\n",
849+
" 'B': [dict(selector='', props=[('color', 'green')])],\n",
850+
" 'C': [dict(selector='td', props=[('color', 'red')])], \n",
851+
" }, overwrite=False)\n",
852+
"html"
853+
],
854+
"metadata": {
855+
"collapsed": false,
856+
"pycharm": {
857+
"name": "#%%\n"
858+
}
859+
}
860+
},
837861
{
838862
"cell_type": "markdown",
839863
"metadata": {},
@@ -922,10 +946,12 @@
922946
"- DataFrame only `(use Series.to_frame().style)`\n",
923947
"- The index and columns must be unique\n",
924948
"- No large repr, and performance isn't great; this is intended for summary DataFrames\n",
925-
"- You can only style the *values*, not the index or columns\n",
949+
"- You can only style the *values*, not the index or columns (except with `table_styles` above)\n",
926950
"- You can only apply styles, you can't insert new HTML entities\n",
927951
"\n",
928-
"Some of these will be addressed in the future.\n"
952+
"Some of these will be addressed in the future.\n",
953+
"Performance can suffer when adding styles to each cell in a large DataFrame.\n",
954+
"It is recommended to apply table or column based styles where possible to limit overall HTML length, as well as setting a shorter UUID to avoid unnecessary repeated data transmission. \n"
929955
]
930956
},
931957
{

doc/source/whatsnew/v1.1.5.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Fixed regressions
1717
- Regression in addition of a timedelta-like scalar to a :class:`DatetimeIndex` raising incorrectly (:issue:`37295`)
1818
- Fixed regression in :meth:`Series.groupby` raising when the :class:`Index` of the :class:`Series` had a tuple as its name (:issue:`37755`)
1919
- Fixed regression in :meth:`DataFrame.loc` and :meth:`Series.loc` for ``__setitem__`` when one-dimensional tuple was given to select from :class:`MultiIndex` (:issue:`37711`)
20-
-
20+
- Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`)
2121

2222
.. ---------------------------------------------------------------------------
2323

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ Other enhancements
232232
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
233233
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
234234
- :meth:`DataFrame.hist` now supports time series (datetime) data (:issue:`32590`)
235+
- :meth:`Styler.set_table_styles` now allows the direct styling of rows and columns and can be chained (:issue:`35607`)
235236
- ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
236237
- :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
237238
- :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
@@ -619,6 +620,7 @@ Indexing
619620
- Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using listlike indexer that contains elements that are in the index's ``categories`` but not in the index itself failing to raise ``KeyError`` (:issue:`37901`)
620621
- Bug in :meth:`DataFrame.iloc` and :meth:`Series.iloc` aligning objects in ``__setitem__`` (:issue:`22046`)
621622
- Bug in :meth:`DataFrame.loc` did not raise ``KeyError`` when missing combination was given with ``slice(None)`` for remaining levels (:issue:`19556`)
623+
- Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`)
622624
- Bug on inserting a boolean label into a :class:`DataFrame` with a numeric :class:`Index` columns incorrectly casting to integer (:issue:`36319`)
623625

624626
Missing

pandas/_libs/index_class_helper.pxi.in

+14-16
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,29 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1010

1111
{{py:
1212

13-
# name, dtype, hashtable_name
14-
dtypes = [('Float64', 'float64', 'Float64'),
15-
('Float32', 'float32', 'Float64'),
16-
('Int64', 'int64', 'Int64'),
17-
('Int32', 'int32', 'Int64'),
18-
('Int16', 'int16', 'Int64'),
19-
('Int8', 'int8', 'Int64'),
20-
('UInt64', 'uint64', 'UInt64'),
21-
('UInt32', 'uint32', 'UInt64'),
22-
('UInt16', 'uint16', 'UInt64'),
23-
('UInt8', 'uint8', 'UInt64'),
13+
# name, dtype
14+
dtypes = [('Float64', 'float64'),
15+
('Float32', 'float32'),
16+
('Int64', 'int64'),
17+
('Int32', 'int32'),
18+
('Int16', 'int16'),
19+
('Int8', 'int8'),
20+
('UInt64', 'uint64'),
21+
('UInt32', 'uint32'),
22+
('UInt16', 'uint16'),
23+
('UInt8', 'uint8'),
2424
]
2525
}}
2626

27-
{{for name, dtype, hashtable_name in dtypes}}
27+
{{for name, dtype in dtypes}}
2828

2929

3030
cdef class {{name}}Engine(IndexEngine):
3131
# constructor-caller is responsible for ensuring that vgetter()
3232
# returns an ndarray with dtype {{dtype}}_t
3333

3434
cdef _make_hash_table(self, Py_ssize_t n):
35-
return _hash.{{hashtable_name}}HashTable(n)
35+
return _hash.{{name}}HashTable(n)
3636

3737
{{if name not in {'Float64', 'Float32'} }}
3838
cdef _check_type(self, object val):
@@ -41,9 +41,7 @@ cdef class {{name}}Engine(IndexEngine):
4141
{{endif}}
4242

4343
cdef void _call_map_locations(self, values):
44-
# self.mapping is of type {{hashtable_name}}HashTable,
45-
# so convert dtype of values
46-
self.mapping.map_locations(algos.ensure_{{hashtable_name.lower()}}(values))
44+
self.mapping.map_locations(algos.ensure_{{name.lower()}}(values))
4745

4846
cdef _maybe_get_bool_indexer(self, object val):
4947
cdef:

pandas/_testing.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -749,19 +749,19 @@ def assert_index_equal(
749749
"""
750750
__tracebackhide__ = True
751751

752-
def _check_types(l, r, obj="Index"):
752+
def _check_types(left, right, obj="Index"):
753753
if exact:
754-
assert_class_equal(l, r, exact=exact, obj=obj)
754+
assert_class_equal(left, right, exact=exact, obj=obj)
755755

756756
# Skip exact dtype checking when `check_categorical` is False
757757
if check_categorical:
758-
assert_attr_equal("dtype", l, r, obj=obj)
758+
assert_attr_equal("dtype", left, right, obj=obj)
759759

760760
# allow string-like to have different inferred_types
761-
if l.inferred_type in ("string"):
762-
assert r.inferred_type in ("string")
761+
if left.inferred_type in ("string"):
762+
assert right.inferred_type in ("string")
763763
else:
764-
assert_attr_equal("inferred_type", l, r, obj=obj)
764+
assert_attr_equal("inferred_type", left, right, obj=obj)
765765

766766
def _get_ilevel_values(index, level):
767767
# accept level number only
@@ -1147,9 +1147,9 @@ def _raise(left, right, err_msg):
11471147
)
11481148

11491149
diff = 0
1150-
for l, r in zip(left, right):
1150+
for left_arr, right_arr in zip(left, right):
11511151
# count up differences
1152-
if not array_equivalent(l, r, strict_nan=strict_nan):
1152+
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan):
11531153
diff += 1
11541154

11551155
diff = diff * 100.0 / left.size

pandas/conftest.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@
3333

3434
import pandas.util._test_decorators as td
3535

36+
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype
37+
3638
import pandas as pd
37-
from pandas import DataFrame, Series
39+
from pandas import DataFrame, Interval, Period, Series, Timedelta, Timestamp
3840
import pandas._testing as tm
3941
from pandas.core import ops
4042
from pandas.core.indexes.api import Index, MultiIndex
@@ -687,6 +689,26 @@ def float_frame():
687689
return DataFrame(tm.getSeriesData())
688690

689691

692+
# ----------------------------------------------------------------
693+
# Scalars
694+
# ----------------------------------------------------------------
695+
@pytest.fixture(
696+
params=[
697+
(Interval(left=0, right=5), IntervalDtype("int64")),
698+
(Interval(left=0.1, right=0.5), IntervalDtype("float64")),
699+
(Period("2012-01", freq="M"), "period[M]"),
700+
(Period("2012-02-01", freq="D"), "period[D]"),
701+
(
702+
Timestamp("2011-01-01", tz="US/Eastern"),
703+
DatetimeTZDtype(tz="US/Eastern"),
704+
),
705+
(Timedelta(seconds=500), "timedelta64[ns]"),
706+
]
707+
)
708+
def ea_scalar_and_dtype(request):
709+
return request.param
710+
711+
690712
# ----------------------------------------------------------------
691713
# Operators & Operations
692714
# ----------------------------------------------------------------

pandas/core/algorithms.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
from pandas.core.indexers import validate_indices
6161

6262
if TYPE_CHECKING:
63-
from pandas import Categorical, DataFrame, Series
63+
from pandas import Categorical, DataFrame, Index, Series
6464

6565
_shared_docs: Dict[str, str] = {}
6666

@@ -433,10 +433,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
433433
return cast("Categorical", comps).isin(values)
434434

435435
if needs_i8_conversion(comps):
436-
# Dispatch to DatetimeLikeIndexMixin.isin
437-
from pandas import Index
438-
439-
return Index(comps).isin(values)
436+
# Dispatch to DatetimeLikeArrayMixin.isin
437+
return array(comps).isin(values)
440438

441439
comps, dtype = _ensure_data(comps)
442440
values, _ = _ensure_data(values, dtype=dtype)
@@ -542,7 +540,7 @@ def factorize(
542540
sort: bool = False,
543541
na_sentinel: Optional[int] = -1,
544542
size_hint: Optional[int] = None,
545-
) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
543+
) -> Tuple[np.ndarray, Union[np.ndarray, "Index"]]:
546544
"""
547545
Encode the object as an enumerated type or categorical variable.
548546

pandas/core/arrays/datetimelike.py

+56-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
6363

6464
from pandas.core import nanops, ops
65-
from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
65+
from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts
6666
from pandas.core.arraylike import OpsMixin
6767
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
6868
import pandas.core.common as com
@@ -101,6 +101,8 @@ class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
101101
_generate_range
102102
"""
103103

104+
# _infer_matches -> which infer_dtype strings are close enough to our own
105+
_infer_matches: Tuple[str, ...]
104106
_is_recognized_dtype: Callable[[DtypeObj], bool]
105107
_recognized_scalars: Tuple[Type, ...]
106108
_data: np.ndarray
@@ -697,6 +699,59 @@ def map(self, mapper):
697699

698700
return Index(self).map(mapper).array
699701

702+
def isin(self, values) -> np.ndarray:
703+
"""
704+
Compute boolean array of whether each value is found in the
705+
passed set of values.
706+
707+
Parameters
708+
----------
709+
values : set or sequence of values
710+
711+
Returns
712+
-------
713+
ndarray[bool]
714+
"""
715+
if not hasattr(values, "dtype"):
716+
values = np.asarray(values)
717+
718+
if values.dtype.kind in ["f", "i", "u", "c"]:
719+
# TODO: de-duplicate with equals, validate_comparison_value
720+
return np.zeros(self.shape, dtype=bool)
721+
722+
if not isinstance(values, type(self)):
723+
inferrable = [
724+
"timedelta",
725+
"timedelta64",
726+
"datetime",
727+
"datetime64",
728+
"date",
729+
"period",
730+
]
731+
if values.dtype == object:
732+
inferred = lib.infer_dtype(values, skipna=False)
733+
if inferred not in inferrable:
734+
if inferred == "string":
735+
pass
736+
737+
elif "mixed" in inferred:
738+
return isin(self.astype(object), values)
739+
else:
740+
return np.zeros(self.shape, dtype=bool)
741+
742+
try:
743+
values = type(self)._from_sequence(values)
744+
except ValueError:
745+
return isin(self.astype(object), values)
746+
747+
try:
748+
self._check_compatible_with(values)
749+
except (TypeError, ValueError):
750+
# Includes tzawareness mismatch and IncompatibleFrequencyError
751+
return np.zeros(self.shape, dtype=bool)
752+
753+
return isin(self.asi8, values.asi8)
754+
700755
# ------------------------------------------------------------------
701756
# Null Handling
702757

pandas/core/arrays/datetimes.py

+1
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
154154
_scalar_type = Timestamp
155155
_recognized_scalars = (datetime, np.datetime64)
156156
_is_recognized_dtype = is_datetime64_any_dtype
157+
_infer_matches = ("datetime", "datetime64", "date")
157158

158159
# define my properties & methods for delegation
159160
_bool_ops = [

0 commit comments

Comments
 (0)