Skip to content

BUG: Series.asof fails for all NaN Series (GH15713) #15758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 15 commits into from
Closed
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -930,3 +930,5 @@ Bug Fixes
- Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
- Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI in the future, if you put the whatnew notes in a blank space in Bug Fixes (these are on purpose), you wont' get merge conflicts

- Bug in ``Series.asof`` which raised an error if the series contained all ``nans`` (:issue:`15713`)
8 changes: 8 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3972,6 +3972,14 @@ def asof(self, where, subset=None):
where = Index(where) if is_list else Index([where])

nulls = self.isnull() if is_series else self[subset].isnull().any(1)
if nulls.all():
if is_series:
return pd.Series(np.nan, index=where, name=self.name)
elif is_list:
return pd.DataFrame(np.nan, index=where, columns=self.columns)
else:
return pd.Series(np.nan, index=self.columns, name=where[0])

locs = self.index.asof_locs(where, ~(nulls.values))

# mask the missing
Expand Down
47 changes: 36 additions & 11 deletions pandas/tests/frame/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,19 @@
from pandas import (DataFrame, date_range, Timestamp, Series,
to_datetime)

from pandas.util.testing import assert_frame_equal, assert_series_equal
import pandas.util.testing as tm

from .common import TestData


class TestFrameAsof(TestData, tm.TestCase):

def setUp(self):
self.N = N = 50
rng = date_range('1/1/1990', periods=N, freq='53s')
self.rng = date_range('1/1/1990', periods=N, freq='53s')
self.df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
index=rng)
index=self.rng)

def test_basic(self):

df = self.df.copy()
df.loc[15:30, 'A'] = np.nan
dates = date_range('1/1/1990', periods=self.N * 3,
Expand All @@ -39,7 +36,6 @@ def test_basic(self):
self.assertTrue((rs == 14).all(1).all())

def test_subset(self):

N = 10
rng = date_range('1/1/1990', periods=N, freq='53s')
df = DataFrame({'A': np.arange(N), 'B': np.arange(N)},
Expand All @@ -51,19 +47,19 @@ def test_subset(self):
# with a subset of A should be the same
result = df.asof(dates, subset='A')
expected = df.asof(dates)
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

# same with A/B
result = df.asof(dates, subset=['A', 'B'])
expected = df.asof(dates)
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

# B gives self.df.asof
result = df.asof(dates, subset='B')
expected = df.resample('25s', closed='right').ffill().reindex(dates)
expected.iloc[20:] = 9

assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_missing(self):
# GH 15118
Expand All @@ -75,9 +71,38 @@ def test_missing(self):
result = df.asof('1989-12-31')

expected = Series(index=['A', 'B'], name=Timestamp('1989-12-31'))
assert_series_equal(result, expected)
tm.assert_series_equal(result, expected)

result = df.asof(to_datetime(['1989-12-31']))
expected = DataFrame(index=to_datetime(['1989-12-31']),
columns=['A', 'B'], dtype='float64')
assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_all_nans(self):
# GH 15713
# DataFrame is all nans
result = DataFrame([np.nan]).asof([0])
expected = DataFrame([np.nan])
tm.assert_frame_equal(result, expected)

# testing non-default indexes, multiple inputs
dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
result = DataFrame(np.nan, index=self.rng, columns=['A']).asof(dates)
expected = DataFrame(np.nan, index=dates, columns=['A'])
tm.assert_frame_equal(result, expected)

# testing multiple columns
dates = date_range('1/1/1990', periods=self.N * 3, freq='25s')
result = DataFrame(np.nan, index=self.rng,
columns=['A', 'B', 'C']).asof(dates)
expected = DataFrame(np.nan, index=dates, columns=['A', 'B', 'C'])
tm.assert_frame_equal(result, expected)

# testing scalar input
result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof([3])
expected = DataFrame(np.nan, index=[3], columns=['A', 'B'])
tm.assert_frame_equal(result, expected)

result = DataFrame(np.nan, index=[1, 2], columns=['A', 'B']).asof(3)
expected = Series(np.nan, index=['A', 'B'], name=3)
tm.assert_series_equal(result, expected)
25 changes: 25 additions & 0 deletions pandas/tests/series/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,28 @@ def test_errors(self):
s = Series(np.random.randn(N), index=rng)
with self.assertRaises(ValueError):
s.asof(s.index[0], subset='foo')

def test_all_nans(self):
# GH 15713
# series is all nans
result = Series([np.nan]).asof([0])
expected = Series([np.nan])
tm.assert_series_equal(result, expected)

# testing non-default indexes
N = 50
rng = date_range('1/1/1990', periods=N, freq='53s')

dates = date_range('1/1/1990', periods=N * 3, freq='25s')
result = Series(np.nan, index=rng).asof(dates)
expected = Series(np.nan, index=dates)
tm.assert_series_equal(result, expected)

# testing scalar input
date = date_range('1/1/1990', periods=N * 3, freq='25s')[0]
result = Series(np.nan, index=rng).asof(date)
assert isnull(result)

# test name is propagated
result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5])
self.assertEqual(result.name, 'test')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs a tm.assert_series_equal with the expected result

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but I'll do on merge