Skip to content

Commit 2b4d109

Browse files
committed
Merge remote-tracking branch 'upstream/master' into string-use-inf-as-na
2 parents df626f5 + 8aa7072 commit 2b4d109

File tree

8 files changed

+55
-115
lines changed

8 files changed

+55
-115
lines changed

ci/deps/azure-37-numpydev.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ dependencies:
1414
- pytz
1515
- pip
1616
- pip:
17-
- cython==0.29.16
18-
# GH#33507 cython 3.0a1 is causing TypeErrors 2020-04-13
17+
- cython>=0.29.16
1918
- "git+git://github.com/dateutil/dateutil.git"
2019
- "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"
2120
- "--pre"

doc/source/user_guide/enhancingperf.rst

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ Consider the following toy example of doubling each observation:
396396
1000 loops, best of 3: 233 us per loop
397397
398398
# Custom function with numba
399-
In [7]: %timeit (df['col1_doubled'] = double_every_value_withnumba(df['a'].to_numpy())
399+
In [7]: %timeit df['col1_doubled'] = double_every_value_withnumba(df['a'].to_numpy())
400400
1000 loops, best of 3: 145 us per loop
401401
402402
Caveats
@@ -599,13 +599,6 @@ identifier.
599599
The ``inplace`` keyword determines whether this assignment will performed
600600
on the original ``DataFrame`` or return a copy with the new column.
601601

602-
.. warning::
603-
604-
For backwards compatibility, ``inplace`` defaults to ``True`` if not
605-
specified. This will change in a future version of pandas - if your
606-
code depends on an inplace assignment you should update to explicitly
607-
set ``inplace=True``.
608-
609602
.. ipython:: python
610603
611604
df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
@@ -614,7 +607,7 @@ on the original ``DataFrame`` or return a copy with the new column.
614607
df.eval('a = 1', inplace=True)
615608
df
616609
617-
When ``inplace`` is set to ``False``, a copy of the ``DataFrame`` with the
610+
When ``inplace`` is set to ``False``, the default, a copy of the ``DataFrame`` with the
618611
new or modified columns is returned and the original frame is unchanged.
619612

620613
.. ipython:: python
@@ -653,11 +646,6 @@ whether the query modifies the original frame.
653646
df.query('a > 2', inplace=True)
654647
df
655648
656-
.. warning::
657-
658-
Unlike with ``eval``, the default value for ``inplace`` for ``query``
659-
is ``False``. This is consistent with prior versions of pandas.
660-
661649
Local variables
662650
~~~~~~~~~~~~~~~
663651

doc/source/whatsnew/v1.1.0.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,7 @@ Numeric
569569
- Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`)
570570
- Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
571571
- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
572-
-
572+
- Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`)
573573

574574
Conversion
575575
^^^^^^^^^^
@@ -732,7 +732,7 @@ ExtensionArray
732732

733733
- Fixed bug where :meth:`Series.value_counts` would raise on empty input of ``Int64`` dtype (:issue:`33317`)
734734
- Fixed bug where :meth:`StringArray.isna` would return ``False`` for NA values when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33655`)
735-
735+
-
736736

737737
Other
738738
^^^^^

pandas/core/frame.py

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@
8484
validate_numeric_casting,
8585
)
8686
from pandas.core.dtypes.common import (
87-
ensure_float64,
8887
ensure_int64,
8988
ensure_platform_int,
9089
infer_dtype_from_object,
@@ -7871,16 +7870,16 @@ def corr(self, method="pearson", min_periods=1) -> "DataFrame":
78717870
numeric_df = self._get_numeric_data()
78727871
cols = numeric_df.columns
78737872
idx = cols.copy()
7874-
mat = numeric_df.values
7873+
mat = numeric_df.astype(float, copy=False).to_numpy()
78757874

78767875
if method == "pearson":
7877-
correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods)
7876+
correl = libalgos.nancorr(mat, minp=min_periods)
78787877
elif method == "spearman":
7879-
correl = libalgos.nancorr_spearman(ensure_float64(mat), minp=min_periods)
7878+
correl = libalgos.nancorr_spearman(mat, minp=min_periods)
78807879
elif method == "kendall" or callable(method):
78817880
if min_periods is None:
78827881
min_periods = 1
7883-
mat = ensure_float64(mat).T
7882+
mat = mat.T
78847883
corrf = nanops.get_corr_func(method)
78857884
K = len(cols)
78867885
correl = np.empty((K, K), dtype=float)
@@ -8006,19 +8005,19 @@ def cov(self, min_periods=None) -> "DataFrame":
80068005
numeric_df = self._get_numeric_data()
80078006
cols = numeric_df.columns
80088007
idx = cols.copy()
8009-
mat = numeric_df.values
8008+
mat = numeric_df.astype(float, copy=False).to_numpy()
80108009

80118010
if notna(mat).all():
80128011
if min_periods is not None and min_periods > len(mat):
8013-
baseCov = np.empty((mat.shape[1], mat.shape[1]))
8014-
baseCov.fill(np.nan)
8012+
base_cov = np.empty((mat.shape[1], mat.shape[1]))
8013+
base_cov.fill(np.nan)
80158014
else:
8016-
baseCov = np.cov(mat.T)
8017-
baseCov = baseCov.reshape((len(cols), len(cols)))
8015+
base_cov = np.cov(mat.T)
8016+
base_cov = base_cov.reshape((len(cols), len(cols)))
80188017
else:
8019-
baseCov = libalgos.nancorr(ensure_float64(mat), cov=True, minp=min_periods)
8018+
base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)
80208019

8021-
return self._constructor(baseCov, index=idx, columns=cols)
8020+
return self._constructor(base_cov, index=idx, columns=cols)
80228021

80238022
def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series:
80248023
"""

pandas/core/indexes/datetimelike.py

Lines changed: 4 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Base and utility classes for tseries type pandas objects.
33
"""
4-
from datetime import datetime, timedelta
4+
from datetime import datetime
55
from typing import Any, List, Optional, Union, cast
66

77
import numpy as np
@@ -16,18 +16,14 @@
1616
from pandas.core.dtypes.common import (
1717
ensure_int64,
1818
is_bool_dtype,
19-
is_datetime64_any_dtype,
2019
is_dtype_equal,
2120
is_integer,
2221
is_list_like,
23-
is_object_dtype,
2422
is_period_dtype,
2523
is_scalar,
26-
is_timedelta64_dtype,
2724
)
2825
from pandas.core.dtypes.concat import concat_compat
2926
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
30-
from pandas.core.dtypes.missing import isna
3127

3228
from pandas.core import algorithms
3329
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
@@ -46,7 +42,6 @@
4642
from pandas.core.tools.timedeltas import to_timedelta
4743

4844
from pandas.tseries.frequencies import DateOffset
49-
from pandas.tseries.offsets import Tick
5045

5146
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
5247

@@ -77,33 +72,13 @@ def wrapper(left, right):
7772
return wrapper
7873

7974

80-
def _make_wrapped_arith_op_with_freq(opname: str):
81-
"""
82-
Dispatch the operation to the underlying ExtensionArray, and infer
83-
the appropriate frequency for the result.
84-
"""
85-
meth = make_wrapped_arith_op(opname)
86-
87-
def wrapped(self, other):
88-
result = meth(self, other)
89-
if result is NotImplemented:
90-
return NotImplemented
91-
92-
new_freq = self._get_addsub_freq(other, result)
93-
result._freq = new_freq
94-
return result
95-
96-
wrapped.__name__ = opname
97-
return wrapped
98-
99-
10075
@inherit_names(
10176
["inferred_freq", "_isnan", "_resolution", "resolution"],
10277
DatetimeLikeArrayMixin,
10378
cache=True,
10479
)
10580
@inherit_names(
106-
["mean", "asi8", "_box_func"], DatetimeLikeArrayMixin,
81+
["mean", "asi8", "freq", "freqstr", "_box_func"], DatetimeLikeArrayMixin,
10782
)
10883
class DatetimeIndexOpsMixin(ExtensionIndex):
10984
"""
@@ -437,44 +412,8 @@ def _partial_date_slice(
437412
# --------------------------------------------------------------------
438413
# Arithmetic Methods
439414

440-
def _get_addsub_freq(self, other, result) -> Optional[DateOffset]:
441-
"""
442-
Find the freq we expect the result of an addition/subtraction operation
443-
to have.
444-
"""
445-
if is_period_dtype(self.dtype):
446-
if is_period_dtype(result.dtype):
447-
# Only used for ops that stay PeriodDtype
448-
return self.freq
449-
return None
450-
elif self.freq is None:
451-
return None
452-
elif lib.is_scalar(other) and isna(other):
453-
return None
454-
455-
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
456-
new_freq = None
457-
if isinstance(self.freq, Tick):
458-
new_freq = self.freq
459-
return new_freq
460-
461-
elif isinstance(other, DateOffset):
462-
# otherwise just DatetimeArray
463-
return None # TODO: Should we infer if it matches self.freq * n?
464-
elif isinstance(other, (datetime, np.datetime64)):
465-
return self.freq
466-
467-
elif is_timedelta64_dtype(other):
468-
return None # TODO: shouldnt we be able to do self.freq + other.freq?
469-
elif is_object_dtype(other):
470-
return None # TODO: is this quite right? sometimes we unpack singletons
471-
elif is_datetime64_any_dtype(other):
472-
return None # TODO: shouldnt we be able to do self.freq + other.freq?
473-
else:
474-
raise NotImplementedError
475-
476-
__add__ = _make_wrapped_arith_op_with_freq("__add__")
477-
__sub__ = _make_wrapped_arith_op_with_freq("__sub__")
415+
__add__ = make_wrapped_arith_op("__add__")
416+
__sub__ = make_wrapped_arith_op("__sub__")
478417
__radd__ = make_wrapped_arith_op("__radd__")
479418
__rsub__ = make_wrapped_arith_op("__rsub__")
480419
__pow__ = make_wrapped_arith_op("__pow__")
@@ -643,25 +582,6 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
643582
_is_monotonic_increasing = Index.is_monotonic_increasing
644583
_is_monotonic_decreasing = Index.is_monotonic_decreasing
645584
_is_unique = Index.is_unique
646-
_freq = lib.no_default
647-
648-
@property
649-
def freq(self):
650-
"""
651-
In limited circumstances, our freq may differ from that of our _data.
652-
"""
653-
if self._freq is not lib.no_default:
654-
return self._freq
655-
return self._data.freq
656-
657-
@property
658-
def freqstr(self):
659-
"""
660-
Return the frequency object as a string if its set, otherwise None.
661-
"""
662-
if self.freq is None:
663-
return None
664-
return self.freq.freqstr
665585

666586
def _with_freq(self, freq):
667587
arr = self._data._with_freq(freq)

pandas/core/indexes/period.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _new_PeriodIndex(cls, **d):
7070
PeriodArray,
7171
wrap=True,
7272
)
73-
@inherit_names(["is_leap_year", "freq", "freqstr", "_format_native_types"], PeriodArray)
73+
@inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
7474
class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
7575
"""
7676
Immutable ndarray holding ordinal values indicating regular periods in time.

pandas/tests/frame/methods/test_cov_corr.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ def test_cov(self, float_frame, float_string_frame):
5858
)
5959
tm.assert_frame_equal(result, expected)
6060

61+
@pytest.mark.parametrize(
62+
"other_column", [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])]
63+
)
64+
def test_cov_nullable_integer(self, other_column):
65+
# https://github.com/pandas-dev/pandas/issues/33803
66+
data = pd.DataFrame({"a": pd.array([1, 2, None]), "b": other_column})
67+
result = data.cov()
68+
arr = np.array([[0.5, 0.5], [0.5, 1.0]])
69+
expected = pd.DataFrame(arr, columns=["a", "b"], index=["a", "b"])
70+
tm.assert_frame_equal(result, expected)
71+
6172

6273
class TestDataFrameCorr:
6374
# DataFrame.corr(), as opposed to DataFrame.corrwith
@@ -153,6 +164,22 @@ def test_corr_int(self):
153164
df3.cov()
154165
df3.corr()
155166

167+
@td.skip_if_no_scipy
168+
@pytest.mark.parametrize(
169+
"nullable_column", [pd.array([1, 2, 3]), pd.array([1, 2, None])]
170+
)
171+
@pytest.mark.parametrize(
172+
"other_column",
173+
[pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, np.nan])],
174+
)
175+
@pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"])
176+
def test_corr_nullable_integer(self, nullable_column, other_column, method):
177+
# https://github.com/pandas-dev/pandas/issues/33803
178+
data = pd.DataFrame({"a": nullable_column, "b": other_column})
179+
result = data.corr(method=method)
180+
expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
181+
tm.assert_frame_equal(result, expected)
182+
156183

157184
class TestDataFrameCorrWith:
158185
def test_corrwith(self, datetime_frame):

pandas/tests/indexes/datetimelike.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,10 @@ def test_map_dictlike(self, mapper):
9696
expected = pd.Index([np.nan] * len(index))
9797
result = index.map(mapper([], []))
9898
tm.assert_index_equal(result, expected)
99+
100+
def test_getitem_preserves_freq(self):
101+
index = self.create_index()
102+
assert index.freq is not None
103+
104+
result = index[:]
105+
assert result.freq == index.freq

0 commit comments

Comments
 (0)