Skip to content

Commit 7d562c0

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 5328ffc + 2fadd59 commit 7d562c0

33 files changed

+654
-392
lines changed

doc/source/whatsnew/v1.2.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,6 @@ Datetimelike
607607
- Bug in :meth:`Series.isin` with ``datetime64[ns]`` dtype and :meth:`.DatetimeIndex.isin` failing to consider timezone-aware and timezone-naive datetimes as always different (:issue:`35728`)
608608
- Bug in :meth:`Series.isin` with ``PeriodDtype`` dtype and :meth:`PeriodIndex.isin` failing to consider arguments with different ``PeriodDtype`` as always different (:issue:`37528`)
609609
- Bug in :class:`Period` constructor now correctly handles nanoseconds in the ``value`` argument (:issue:`34621` and :issue:`17053`)
610-
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
611610

612611
Timedelta
613612
^^^^^^^^^
@@ -747,6 +746,7 @@ I/O
747746
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)
748747
- :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`)
749748
- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`)
749+
- :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`)
750750

751751
Period
752752
^^^^^^
@@ -858,7 +858,7 @@ Other
858858
- Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`)
859859
- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`)
860860
- Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`)
861-
861+
- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`)
862862

863863

864864
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.3.0.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ Other enhancements
2020
^^^^^^^^^^^^^^^^^^
2121

2222
- Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
23-
-
23+
- Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
2424

2525
.. ---------------------------------------------------------------------------
2626
@@ -133,7 +133,7 @@ Deprecations
133133

134134
Performance improvements
135135
~~~~~~~~~~~~~~~~~~~~~~~~
136-
136+
- Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`)
137137
-
138138
-
139139

@@ -217,7 +217,11 @@ MultiIndex
217217
I/O
218218
^^^
219219

220+
- Bug in :func:`read_csv` interpreting ``NA`` value as comment, when ``NA`` does contain the comment string fixed for ``engine="python"`` (:issue:`34002`)
221+
- Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
222+
- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
220223
- Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
224+
- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
221225
-
222226

223227
Period

pandas/_libs/parsers.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,8 @@ cdef class TextReader:
738738
elif self.names is None and nuse < passed_count:
739739
self.leading_cols = field_count - passed_count
740740
elif passed_count != field_count:
741-
raise ValueError('Passed header names '
742-
'mismatches usecols')
741+
raise ValueError('Number of passed names did not match number of '
742+
'header fields in the file')
743743
# oh boy, #2442, #2981
744744
elif self.allow_leading_cols and passed_count < field_count:
745745
self.leading_cols = field_count - passed_count

pandas/_typing.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,5 +146,12 @@
146146
CompressionOptions = Optional[Union[str, CompressionDict]]
147147

148148

149-
# type of float formatter in DataFrameFormatter
149+
# types in DataFrameFormatter
150+
FormattersType = Union[
151+
List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
152+
]
153+
ColspaceType = Mapping[Label, Union[str, int]]
150154
FloatFormatType = Union[str, Callable, "EngFormatter"]
155+
ColspaceArgType = Union[
156+
str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]]
157+
]

pandas/core/algorithms.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
is_float_dtype,
3737
is_integer,
3838
is_integer_dtype,
39+
is_interval_dtype,
3940
is_list_like,
4041
is_numeric_dtype,
4142
is_object_dtype,
@@ -63,7 +64,7 @@
6364

6465
if TYPE_CHECKING:
6566
from pandas import Categorical, DataFrame, Index, Series
66-
from pandas.core.arrays import DatetimeArray, TimedeltaArray
67+
from pandas.core.arrays import DatetimeArray, IntervalArray, TimedeltaArray
6768

6869
_shared_docs: Dict[str, str] = {}
6970

@@ -453,7 +454,10 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
453454
# handle categoricals
454455
return cast("Categorical", comps).isin(values)
455456

456-
if needs_i8_conversion(comps.dtype):
457+
elif is_interval_dtype(comps.dtype):
458+
return cast("IntervalArray", comps).isin(values)
459+
460+
elif needs_i8_conversion(comps.dtype):
457461
# Dispatch to DatetimeLikeArrayMixin.isin
458462
return array(comps).isin(values)
459463
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):

pandas/core/arrays/interval.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@
1414
intervals_to_interval_bounds,
1515
)
1616
from pandas._libs.missing import NA
17+
from pandas._typing import ArrayLike
1718
from pandas.compat.numpy import function as nv
1819
from pandas.util._decorators import Appender
1920

2021
from pandas.core.dtypes.cast import maybe_convert_platform
2122
from pandas.core.dtypes.common import (
2223
is_categorical_dtype,
2324
is_datetime64_any_dtype,
25+
is_dtype_equal,
2426
is_float_dtype,
2527
is_integer_dtype,
2628
is_interval_dtype,
@@ -29,6 +31,7 @@
2931
is_scalar,
3032
is_string_dtype,
3133
is_timedelta64_dtype,
34+
needs_i8_conversion,
3235
pandas_dtype,
3336
)
3437
from pandas.core.dtypes.dtypes import IntervalDtype
@@ -40,7 +43,7 @@
4043
)
4144
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
4245

43-
from pandas.core.algorithms import take, value_counts
46+
from pandas.core.algorithms import isin, take, value_counts
4447
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
4548
from pandas.core.arrays.categorical import Categorical
4649
import pandas.core.common as com
@@ -1435,6 +1438,43 @@ def contains(self, other):
14351438
other < self._right if self.open_right else other <= self._right
14361439
)
14371440

1441+
def isin(self, values) -> np.ndarray:
1442+
if not hasattr(values, "dtype"):
1443+
values = np.array(values)
1444+
values = extract_array(values, extract_numpy=True)
1445+
1446+
if is_interval_dtype(values.dtype):
1447+
if self.closed != values.closed:
1448+
# not comparable -> no overlap
1449+
return np.zeros(self.shape, dtype=bool)
1450+
1451+
if is_dtype_equal(self.dtype, values.dtype):
1452+
# GH#38353 instead of casting to object, operating on a
1453+
# complex128 ndarray is much more performant.
1454+
1455+
# error: "ArrayLike" has no attribute "view" [attr-defined]
1456+
left = self._combined.view("complex128") # type:ignore[attr-defined]
1457+
right = values._combined.view("complex128")
1458+
return np.in1d(left, right)
1459+
1460+
elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
1461+
values.left.dtype
1462+
):
1463+
# not comparable -> no overlap
1464+
return np.zeros(self.shape, dtype=bool)
1465+
1466+
return isin(self.astype(object), values.astype(object))
1467+
1468+
@property
1469+
def _combined(self) -> ArrayLike:
1470+
left = self.left._values.reshape(-1, 1)
1471+
right = self.right._values.reshape(-1, 1)
1472+
if needs_i8_conversion(left.dtype):
1473+
comb = left._concat_same_type([left, right], axis=1)
1474+
else:
1475+
comb = np.concatenate([left, right], axis=1)
1476+
return comb
1477+
14381478

14391479
def maybe_convert_platform_interval(values):
14401480
"""

pandas/core/dtypes/cast.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import numpy as np
2222

23-
from pandas._libs import lib, tslib
23+
from pandas._libs import lib, missing as libmissing, tslib
2424
from pandas._libs.tslibs import (
2525
NaT,
2626
OutOfBoundsDatetime,
@@ -519,6 +519,11 @@ def maybe_promote(dtype, fill_value=np.nan):
519519
Upcasted from dtype argument if necessary.
520520
fill_value
521521
Upcasted from fill_value argument if necessary.
522+
523+
Raises
524+
------
525+
ValueError
526+
If fill_value is a non-scalar and dtype is not object.
522527
"""
523528
if not is_scalar(fill_value) and not is_object_dtype(dtype):
524529
# with object dtype there is nothing to promote, and the user can
@@ -550,6 +555,9 @@ def maybe_promote(dtype, fill_value=np.nan):
550555
dtype = np.dtype(np.object_)
551556
elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)):
552557
dtype = np.dtype(np.object_)
558+
elif is_valid_nat_for_dtype(fill_value, dtype):
559+
# e.g. pd.NA, which is not accepted by Timestamp constructor
560+
fill_value = np.datetime64("NaT", "ns")
553561
else:
554562
try:
555563
fill_value = Timestamp(fill_value).to_datetime64()
@@ -563,6 +571,9 @@ def maybe_promote(dtype, fill_value=np.nan):
563571
):
564572
# TODO: What about str that can be a timedelta?
565573
dtype = np.dtype(np.object_)
574+
elif is_valid_nat_for_dtype(fill_value, dtype):
575+
# e.g pd.NA, which is not accepted by the Timedelta constructor
576+
fill_value = np.timedelta64("NaT", "ns")
566577
else:
567578
try:
568579
fv = Timedelta(fill_value)
@@ -636,7 +647,7 @@ def maybe_promote(dtype, fill_value=np.nan):
636647
# e.g. mst is np.complex128 and dtype is np.complex64
637648
dtype = mst
638649

639-
elif fill_value is None:
650+
elif fill_value is None or fill_value is libmissing.NA:
640651
if is_float_dtype(dtype) or is_complex_dtype(dtype):
641652
fill_value = np.nan
642653
elif is_integer_dtype(dtype):
@@ -646,7 +657,8 @@ def maybe_promote(dtype, fill_value=np.nan):
646657
fill_value = dtype.type("NaT", "ns")
647658
else:
648659
dtype = np.dtype(np.object_)
649-
fill_value = np.nan
660+
if fill_value is not libmissing.NA:
661+
fill_value = np.nan
650662
else:
651663
dtype = np.dtype(np.object_)
652664

pandas/core/dtypes/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1397,7 +1397,7 @@ def is_bool_dtype(arr_or_dtype) -> bool:
13971397
# guess this
13981398
return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean"
13991399
elif is_extension_array_dtype(arr_or_dtype):
1400-
return getattr(arr_or_dtype, "dtype", arr_or_dtype)._is_boolean
1400+
return getattr(dtype, "_is_boolean", False)
14011401

14021402
return issubclass(dtype.type, np.bool_)
14031403

0 commit comments

Comments
 (0)