Skip to content

Commit 33e2c63

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 28e24bd + 85c221a commit 33e2c63

File tree

11 files changed

+101
-79
lines changed

11 files changed

+101
-79
lines changed

.github/workflows/posix.yml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ jobs:
3131
[actions-38-slow.yaml, "slow", "", "", "", "", ""],
3232
[actions-38-locale.yaml, "not slow and not network", "language-pack-zh-hans xsel", "zh_CN.utf8", "zh_CN.utf8", "", ""],
3333
[actions-39-slow.yaml, "slow", "", "", "", "", ""],
34+
[actions-pypy-38.yaml, "not slow and not clipboard", "", "", "", "", ""],
3435
[actions-39-numpydev.yaml, "not slow and not network", "xsel", "", "", "deprecate", "-W error"],
3536
[actions-39.yaml, "not slow and not clipboard", "", "", "", "", ""]
3637
]

ci/deps/actions-pypy-38.yaml

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: pandas-dev
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
# TODO: Add the rest of the dependencies in here
6+
# once the other plentiful failures/segfaults
7+
# with base pandas has been dealt with
8+
- python=3.8
9+
10+
# tools
11+
- cython>=0.29.24
12+
- pytest>=6.0
13+
- pytest-cov
14+
- pytest-xdist>=1.31
15+
- hypothesis>=5.5.3
16+
17+
# required
18+
- numpy
19+
- python-dateutil
20+
- pytz

doc/source/whatsnew/v1.3.5.rst

-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ Fixed regressions
2222
- Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`)
2323
- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`)
2424
- Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`)
25-
- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
2625

2726

2827
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -660,7 +660,7 @@ Conversion
660660

661661
Strings
662662
^^^^^^^
663-
-
663+
- Fixed bug in checking for ``string[pyarrow]`` dtype incorrectly raising an ImportError when pyarrow is not installed (:issue:`44327`)
664664
-
665665

666666
Interval
@@ -702,6 +702,7 @@ Indexing
702702
- Bug in :meth:`DataFrame.loc.__setitem__` changing dtype when indexer was completely ``False`` (:issue:`37550`)
703703
- Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`)
704704
- Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`)
705+
- Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`)
705706
-
706707

707708
Missing
@@ -710,6 +711,7 @@ Missing
710711
- Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`)
711712
- Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`)
712713
- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`)
714+
- Bug in :meth:`Index.fillna` incorrectly returning an un-filled :class:`Index` when NA values are present and ``downcast`` argument is specified. This now raises ``NotImplementedError`` instead; do not pass ``downcast`` argument (:issue:`44873`)
713715
-
714716

715717
MultiIndex

pandas/core/dtypes/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ def is_dtype_equal(source, target) -> bool:
613613
src = get_dtype(source)
614614
if isinstance(src, ExtensionDtype):
615615
return src == target
616-
except (TypeError, AttributeError):
616+
except (TypeError, AttributeError, ImportError):
617617
return False
618618
elif isinstance(source, str):
619619
return is_dtype_equal(target, source)
@@ -622,7 +622,7 @@ def is_dtype_equal(source, target) -> bool:
622622
source = get_dtype(source)
623623
target = get_dtype(target)
624624
return source == target
625-
except (TypeError, AttributeError):
625+
except (TypeError, AttributeError, ImportError):
626626

627627
# invalid comparison
628628
# object == category will hit this

pandas/core/indexes/base.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -2723,13 +2723,18 @@ def fillna(self, value=None, downcast=None):
27232723
DataFrame.fillna : Fill NaN values of a DataFrame.
27242724
Series.fillna : Fill NaN Values of a Series.
27252725
"""
2726+
27262727
value = self._require_scalar(value)
27272728
if self.hasnans:
27282729
result = self.putmask(self._isnan, value)
27292730
if downcast is None:
27302731
# no need to care metadata other than name
2731-
# because it can't have freq if
2732+
# because it can't have freq if it has NaTs
27322733
return Index._with_infer(result, name=self.name)
2734+
raise NotImplementedError(
2735+
f"{type(self).__name__}.fillna does not support 'downcast' "
2736+
"argument values other than 'None'."
2737+
)
27332738
return self._view()
27342739

27352740
def dropna(self: _IndexT, how: str_t = "any") -> _IndexT:

pandas/core/indexes/category.py

-14
Original file line numberDiff line numberDiff line change
@@ -377,20 +377,6 @@ def __contains__(self, key: Any) -> bool:
377377

378378
return contains(self, key, container=self._engine)
379379

380-
@doc(Index.fillna)
381-
def fillna(self, value, downcast=None):
382-
value = self._require_scalar(value)
383-
try:
384-
cat = self._data.fillna(value)
385-
except (ValueError, TypeError):
386-
# invalid fill_value
387-
if not self.hasnans:
388-
# nothing to fill, we can get away without casting
389-
return self.copy()
390-
return self.astype(object).fillna(value, downcast=downcast)
391-
392-
return type(self)._simple_new(cat, name=self.name)
393-
394380
# TODO(2.0): remove reindex once non-unique deprecation is enforced
395381
def reindex(
396382
self, target, method=None, level=None, limit=None, tolerance=None

pandas/tests/dtypes/test_common.py

+7
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def test_period_dtype(self, dtype):
116116
"float": np.dtype(np.float64),
117117
"object": np.dtype(object),
118118
"category": com.pandas_dtype("category"),
119+
"string": pd.StringDtype(),
119120
}
120121

121122

@@ -129,6 +130,12 @@ def test_dtype_equal(name1, dtype1, name2, dtype2):
129130
assert not com.is_dtype_equal(dtype1, dtype2)
130131

131132

133+
@pytest.mark.parametrize("name,dtype", list(dtypes.items()), ids=lambda x: str(x))
134+
def test_pyarrow_string_import_error(name, dtype):
135+
# GH-44276
136+
assert not com.is_dtype_equal(dtype, "string[pyarrow]")
137+
138+
132139
@pytest.mark.parametrize(
133140
"dtype1,dtype2",
134141
[

pandas/tests/indexes/categorical/test_fillna.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,19 @@ def test_fillna_categorical(self):
2525
tm.assert_index_equal(result, expected)
2626

2727
def test_fillna_copies_with_no_nas(self):
28-
# Nothing to fill, should still get a copy
28+
# Nothing to fill, should still get a copy for the Categorical method,
29+
# but OK to get a view on CategoricalIndex method
2930
ci = CategoricalIndex([0, 1, 1])
30-
cat = ci._data
3131
result = ci.fillna(0)
32-
assert result._values._ndarray is not cat._ndarray
33-
assert result._values._ndarray.base is None
32+
assert result is not ci
33+
assert tm.shares_memory(result, ci)
3434

35-
# Same check directly on the Categorical object
35+
# But at the EA level we always get a copy.
36+
cat = ci._data
3637
result = cat.fillna(0)
3738
assert result._ndarray is not cat._ndarray
3839
assert result._ndarray.base is None
40+
assert not tm.shares_memory(result, cat)
3941

4042
def test_fillna_validates_with_no_nas(self):
4143
# We validate the fill value even if fillna is a no-op

pandas/tests/indexes/common.py

+5
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,11 @@ def test_fillna(self, index):
516516

517517
idx = type(index)(values)
518518

519+
msg = "does not support 'downcast'"
520+
with pytest.raises(NotImplementedError, match=msg):
521+
# For now at least, we only raise if there are NAs present
522+
idx.fillna(idx[0], downcast="infer")
523+
519524
expected = np.array([False] * len(idx), dtype=bool)
520525
expected[1] = True
521526
tm.assert_numpy_array_equal(idx._isnan, expected)

pandas/tests/strings/test_strings.py

+50-55
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,11 @@ def test_repeat_mixed_object():
137137
tm.assert_series_equal(result, expected)
138138

139139

140-
def test_repeat_with_null(any_string_dtype):
140+
@pytest.mark.parametrize("arg, repeat", [[None, 4], ["b", None]])
141+
def test_repeat_with_null(any_string_dtype, arg, repeat):
141142
# GH: 31632
142-
ser = Series(["a", None], dtype=any_string_dtype)
143-
result = ser.str.repeat([3, 4])
144-
expected = Series(["aaa", np.nan], dtype=any_string_dtype)
145-
tm.assert_series_equal(result, expected)
146-
147-
ser = Series(["a", "b"], dtype=any_string_dtype)
148-
result = ser.str.repeat([3, None])
143+
ser = Series(["a", arg], dtype=any_string_dtype)
144+
result = ser.str.repeat([3, repeat])
149145
expected = Series(["aaa", np.nan], dtype=any_string_dtype)
150146
tm.assert_series_equal(result, expected)
151147

@@ -397,27 +393,28 @@ def test_index_not_found_raises(index_or_series, any_string_dtype):
397393
obj.str.index("DE")
398394

399395

400-
def test_index_wrong_type_raises(index_or_series, any_string_dtype):
396+
@pytest.mark.parametrize("method", ["index", "rindex"])
397+
def test_index_wrong_type_raises(index_or_series, any_string_dtype, method):
401398
obj = index_or_series([], dtype=any_string_dtype)
402399
msg = "expected a string object, not int"
403400

404401
with pytest.raises(TypeError, match=msg):
405-
obj.str.index(0)
406-
407-
with pytest.raises(TypeError, match=msg):
408-
obj.str.rindex(0)
402+
getattr(obj.str, method)(0)
409403

410404

411-
def test_index_missing(any_string_dtype):
405+
@pytest.mark.parametrize(
406+
"method, exp",
407+
[
408+
["index", [1, 1, 0]],
409+
["rindex", [3, 1, 2]],
410+
],
411+
)
412+
def test_index_missing(any_string_dtype, method, exp):
412413
ser = Series(["abcb", "ab", "bcbe", np.nan], dtype=any_string_dtype)
413414
expected_dtype = np.float64 if any_string_dtype == "object" else "Int64"
414415

415-
result = ser.str.index("b")
416-
expected = Series([1, 1, 0, np.nan], dtype=expected_dtype)
417-
tm.assert_series_equal(result, expected)
418-
419-
result = ser.str.rindex("b")
420-
expected = Series([3, 1, 2, np.nan], dtype=expected_dtype)
416+
result = getattr(ser.str, method)("b")
417+
expected = Series(exp + [np.nan], dtype=expected_dtype)
421418
tm.assert_series_equal(result, expected)
422419

423420

@@ -488,53 +485,51 @@ def test_slice_replace(start, stop, repl, expected, any_string_dtype):
488485
tm.assert_series_equal(result, expected)
489486

490487

491-
def test_strip_lstrip_rstrip(any_string_dtype):
488+
@pytest.mark.parametrize(
489+
"method, exp",
490+
[
491+
["strip", ["aa", "bb", np.nan, "cc"]],
492+
["lstrip", ["aa ", "bb \n", np.nan, "cc "]],
493+
["rstrip", [" aa", " bb", np.nan, "cc"]],
494+
],
495+
)
496+
def test_strip_lstrip_rstrip(any_string_dtype, method, exp):
492497
ser = Series([" aa ", " bb \n", np.nan, "cc "], dtype=any_string_dtype)
493498

494-
result = ser.str.strip()
495-
expected = Series(["aa", "bb", np.nan, "cc"], dtype=any_string_dtype)
496-
tm.assert_series_equal(result, expected)
497-
498-
result = ser.str.lstrip()
499-
expected = Series(["aa ", "bb \n", np.nan, "cc "], dtype=any_string_dtype)
500-
tm.assert_series_equal(result, expected)
501-
502-
result = ser.str.rstrip()
503-
expected = Series([" aa", " bb", np.nan, "cc"], dtype=any_string_dtype)
499+
result = getattr(ser.str, method)()
500+
expected = Series(exp, dtype=any_string_dtype)
504501
tm.assert_series_equal(result, expected)
505502

506503

507-
def test_strip_lstrip_rstrip_mixed_object():
504+
@pytest.mark.parametrize(
505+
"method, exp",
506+
[
507+
["strip", ["aa", np.nan, "bb"]],
508+
["lstrip", ["aa ", np.nan, "bb \t\n"]],
509+
["rstrip", [" aa", np.nan, " bb"]],
510+
],
511+
)
512+
def test_strip_lstrip_rstrip_mixed_object(method, exp):
508513
ser = Series([" aa ", np.nan, " bb \t\n", True, datetime.today(), None, 1, 2.0])
509514

510-
result = ser.str.strip()
511-
expected = Series(["aa", np.nan, "bb", np.nan, np.nan, np.nan, np.nan, np.nan])
512-
tm.assert_series_equal(result, expected)
513-
514-
result = ser.str.lstrip()
515-
expected = Series(
516-
["aa ", np.nan, "bb \t\n", np.nan, np.nan, np.nan, np.nan, np.nan]
517-
)
518-
tm.assert_series_equal(result, expected)
519-
520-
result = ser.str.rstrip()
521-
expected = Series([" aa", np.nan, " bb", np.nan, np.nan, np.nan, np.nan, np.nan])
515+
result = getattr(ser.str, method)()
516+
expected = Series(exp + [np.nan, np.nan, np.nan, np.nan, np.nan])
522517
tm.assert_series_equal(result, expected)
523518

524519

525-
def test_strip_lstrip_rstrip_args(any_string_dtype):
520+
@pytest.mark.parametrize(
521+
"method, exp",
522+
[
523+
["strip", ["ABC", " BNSD", "LDFJH "]],
524+
["lstrip", ["ABCxx", " BNSD", "LDFJH xx"]],
525+
["rstrip", ["xxABC", "xx BNSD", "LDFJH "]],
526+
],
527+
)
528+
def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp):
526529
ser = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype)
527530

528-
result = ser.str.strip("x")
529-
expected = Series(["ABC", " BNSD", "LDFJH "], dtype=any_string_dtype)
530-
tm.assert_series_equal(result, expected)
531-
532-
result = ser.str.lstrip("x")
533-
expected = Series(["ABCxx", " BNSD", "LDFJH xx"], dtype=any_string_dtype)
534-
tm.assert_series_equal(result, expected)
535-
536-
result = ser.str.rstrip("x")
537-
expected = Series(["xxABC", "xx BNSD", "LDFJH "], dtype=any_string_dtype)
531+
result = getattr(ser.str, method)("x")
532+
expected = Series(exp, dtype=any_string_dtype)
538533
tm.assert_series_equal(result, expected)
539534

540535

0 commit comments

Comments
 (0)