Skip to content

Commit dea7dce

Browse files
committed
Merge remote-tracking branch 'upstream/master' into unique-index
2 parents 2a37fc0 + ae71459 commit dea7dce

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2274
-308
lines changed

ci/code_checks.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
188188
set -o pipefail
189189
if [[ "$AZURE" == "true" ]]; then
190190
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
191-
! grep -n '--exclude=*.'{svg,c,cpp,html} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
191+
! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
192192
else
193-
! grep -n '--exclude=*.'{svg,c,cpp,html} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
193+
! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
194194
fi
195195
RET=$(($RET + $?)) ; echo $MSG "DONE"
196196
fi

doc/source/whatsnew/v1.0.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Categorical
122122
^^^^^^^^^^^
123123

124124
- Added test to assert the :func:`fillna` raises the correct ValueError message when the value isn't a value from categories (:issue:`13628`)
125+
- Bug in :meth:`Categorical.astype` where ``NaN`` values were handled incorrectly when casting to int (:issue:`28406`)
125126
-
126127
-
127128

@@ -184,6 +185,7 @@ Indexing
184185
- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`)
185186
- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
186187
- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
188+
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
187189

188190
Missing
189191
^^^^^^^
@@ -240,7 +242,7 @@ Sparse
240242
ExtensionArray
241243
^^^^^^^^^^^^^^
242244

243-
-
245+
- Bug in :class:`arrays.PandasArray` when setting a scalar string (:issue:`28118`, :issue:`28150`).
244246
-
245247

246248

environment.yml

+6
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ dependencies:
3636
- nbsphinx
3737
- pandoc
3838

39+
# web (jinja2 is also needed, but it's also an optional pandas dependency)
40+
- markdown
41+
- feedparser
42+
- pyyaml
43+
- requests
44+
3945
# testing
4046
- boto3
4147
- botocore>=1.11

pandas/_libs/testing.pyx

+3-5
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,9 @@ cpdef assert_almost_equal(a, b,
143143
from pandas.util.testing import assert_attr_equal
144144
assert_attr_equal('dtype', a, b, obj=obj)
145145

146-
try:
147-
if array_equivalent(a, b, strict_nan=True):
148-
return True
149-
except:
150-
pass
146+
if array_equivalent(a, b, strict_nan=True):
147+
return True
148+
151149
else:
152150
na, nb = len(a), len(b)
153151

pandas/core/arrays/categorical.py

+3
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,9 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
520520
if dtype == self.dtype:
521521
return self
522522
return self._set_dtype(dtype)
523+
if is_integer_dtype(dtype) and self.isna().any():
524+
msg = "Cannot convert float NaN to integer"
525+
raise ValueError(msg)
523526
return np.array(self, dtype=dtype, copy=copy)
524527

525528
@cache_readonly

pandas/core/arrays/numpy_.py

+2-9
Original file line numberDiff line numberDiff line change
@@ -235,15 +235,8 @@ def __setitem__(self, key, value):
235235
if not lib.is_scalar(value):
236236
value = np.asarray(value)
237237

238-
values = self._ndarray
239-
t = np.result_type(value, values)
240-
if t != self._ndarray.dtype:
241-
values = values.astype(t, casting="safe")
242-
values[key] = value
243-
self._dtype = PandasDtype(t)
244-
self._ndarray = values
245-
else:
246-
self._ndarray[key] = value
238+
value = np.asarray(value, dtype=self._ndarray.dtype)
239+
self._ndarray[key] = value
247240

248241
def __len__(self) -> int:
249242
return len(self._ndarray)

pandas/core/dtypes/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,7 @@ def array_equivalent(left, right, strict_nan=False):
445445
if not isinstance(right_value, float) or not np.isnan(right_value):
446446
return False
447447
else:
448-
if left_value != right_value:
448+
if np.any(left_value != right_value):
449449
return False
450450
return True
451451

pandas/core/indexes/base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -4735,13 +4735,13 @@ def set_value(self, arr, key, value):
47354735
@Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
47364736
def get_indexer_non_unique(self, target):
47374737
target = ensure_index(target)
4738-
if is_categorical(target):
4739-
target = target.astype(target.dtype.categories.dtype)
47404738
pself, ptarget = self._maybe_promote(target)
47414739
if pself is not self or ptarget is not target:
47424740
return pself.get_indexer_non_unique(ptarget)
47434741

4744-
if self.is_all_dates:
4742+
if is_categorical(target):
4743+
tgt_values = np.asarray(target)
4744+
elif self.is_all_dates:
47454745
tgt_values = target.asi8
47464746
else:
47474747
tgt_values = target._ndarray_values
@@ -4753,7 +4753,7 @@ def get_indexer_for(self, target, **kwargs):
47534753
"""
47544754
Guaranteed return of an indexer even when non-unique.
47554755
4756-
This dispatches to get_indexer or get_indexer_nonunique
4756+
This dispatches to get_indexer or get_indexer_non_unique
47574757
as appropriate.
47584758
47594759
Returns

pandas/core/indexes/numeric.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas._libs import index as libindex
66
from pandas.util._decorators import Appender, cache_readonly
77

8+
from pandas.core.dtypes.cast import astype_nansafe
89
from pandas.core.dtypes.common import (
910
is_bool,
1011
is_bool_dtype,
@@ -367,12 +368,11 @@ def astype(self, dtype, copy=True):
367368
"values are required for conversion"
368369
).format(dtype=dtype)
369370
raise TypeError(msg)
370-
elif (
371-
is_integer_dtype(dtype) and not is_extension_array_dtype(dtype)
372-
) and self.hasnans:
371+
elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype):
373372
# TODO(jreback); this can change once we have an EA Index type
374373
# GH 13149
375-
raise ValueError("Cannot convert NA to integer")
374+
arr = astype_nansafe(self.values, dtype=dtype)
375+
return Int64Index(arr)
376376
return super().astype(dtype, copy=copy)
377377

378378
@Appender(_index_shared_docs["_convert_scalar_indexer"])

pandas/tests/arrays/test_numpy.py

+15
Original file line numberDiff line numberDiff line change
@@ -211,3 +211,18 @@ def test_basic_binop():
211211
result = x + x
212212
expected = PandasArray(np.array([2, 4, 6]))
213213
tm.assert_extension_array_equal(result, expected)
214+
215+
216+
@pytest.mark.parametrize("dtype", [None, object])
217+
def test_setitem_object_typecode(dtype):
218+
arr = PandasArray(np.array(["a", "b", "c"], dtype=dtype))
219+
arr[0] = "t"
220+
expected = PandasArray(np.array(["t", "b", "c"], dtype=dtype))
221+
tm.assert_extension_array_equal(arr, expected)
222+
223+
224+
def test_setitem_no_coercion():
225+
# https://github.com/pandas-dev/pandas/issues/28150
226+
arr = PandasArray(np.array([1, 2, 3]))
227+
with pytest.raises(ValueError, match="int"):
228+
arr[0] = "a"

pandas/tests/dtypes/test_missing.py

+14
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,20 @@ def test_array_equivalent_str():
360360
)
361361

362362

363+
def test_array_equivalent_nested():
364+
# reached in groupby aggregations, make sure we use np.any when checking
365+
# if the comparison is truthy
366+
left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
367+
right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object)
368+
369+
assert array_equivalent(left, right, strict_nan=True)
370+
assert not array_equivalent(left, right[::-1], strict_nan=True)
371+
372+
left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object)
373+
right = np.array([50, 40])
374+
assert not array_equivalent(left, right, strict_nan=True)
375+
376+
363377
@pytest.mark.parametrize(
364378
"dtype, na_value",
365379
[

pandas/tests/extension/test_categorical.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import pytest
2020

2121
import pandas as pd
22-
from pandas import Categorical
22+
from pandas import Categorical, CategoricalIndex, Timestamp
2323
from pandas.api.types import CategoricalDtype
2424
from pandas.tests.extension import base
2525
import pandas.util.testing as tm
@@ -197,7 +197,15 @@ def test_searchsorted(self, data_for_sorting):
197197

198198

199199
class TestCasting(base.BaseCastingTests):
200-
pass
200+
@pytest.mark.parametrize("cls", [Categorical, CategoricalIndex])
201+
@pytest.mark.parametrize("values", [[1, np.nan], [Timestamp("2000"), pd.NaT]])
202+
def test_cast_nan_to_int(self, cls, values):
203+
# GH 28406
204+
s = cls(values)
205+
206+
msg = "Cannot (cast|convert)"
207+
with pytest.raises((ValueError, TypeError), match=msg):
208+
s.astype(int)
201209

202210

203211
class TestArithmeticOps(base.BaseArithmeticOpsTests):

pandas/tests/indexes/interval/test_astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def test_subtype_integer(self, subtype):
143143
tm.assert_index_equal(result, expected)
144144

145145
# raises with NA
146-
msg = "Cannot convert NA to integer"
146+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
147147
with pytest.raises(ValueError, match=msg):
148148
index.insert(0, np.nan).astype(dtype)
149149

pandas/tests/indexes/test_numeric.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -242,10 +242,17 @@ def test_astype(self):
242242
# GH 13149
243243
for dtype in ["int16", "int32", "int64"]:
244244
i = Float64Index([0, 1.1, np.NAN])
245-
msg = "Cannot convert NA to integer"
245+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
246246
with pytest.raises(ValueError, match=msg):
247247
i.astype(dtype)
248248

249+
def test_cannot_cast_inf_to_int(self):
250+
idx = pd.Float64Index([1, 2, np.inf])
251+
252+
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
253+
with pytest.raises(ValueError, match=msg):
254+
idx.astype(int)
255+
249256
def test_type_coercion_fail(self, any_int_dtype):
250257
# see gh-15832
251258
msg = "Trying to coerce float values to integers"

pandas/tests/io/json/conftest.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import pytest
2+
3+
4+
@pytest.fixture(params=["split", "records", "index", "columns", "values"])
5+
def orient(request):
6+
"""
7+
Fixture for orients excluding the table format.
8+
"""
9+
return request.param

0 commit comments

Comments
 (0)