Skip to content

Commit b9c96d1

Browse files
committed
ENH: Initial pass at implementing DataFrame.asof, GH 2941
This can almost certainly be made quicker, still digging into the internals to understand the various underlying indexers.
1 parent efc4a08 commit b9c96d1

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed

pandas/core/frame.py

+58
Original file line numberDiff line numberDiff line change
@@ -2737,6 +2737,64 @@ def _maybe_casted_values(index, labels=None):
27372737
#----------------------------------------------------------------------
27382738
# Reindex-based selection methods
27392739

2740+
def asof(self, where, skipna='percolumn'):
2741+
"""
2742+
Return last good (non-null) value for each column of DataFrame for the
2743+
request dates. Definition of 'good' value controlled by skipna argument.
2744+
2745+
If there is no good value, NaN is returned.
2746+
2747+
Parameters
2748+
----------
2749+
where : date or sequence of dates
2750+
skipna : {'any', 'all', 'none', 'percolumn'}, default 'percolumn'
2751+
* any: Ignore/skip rows where any of the columns are null.
2752+
* all: Ignore/skip rows where all of the columns are null.
2753+
* none: Don't ignore/skip any rows.
2754+
* percolumn: Ignore/skip null rows for each column seperately.
2755+
Equivalent to df.apply(lambda s: s.asof(where)).
2756+
2757+
Notes
2758+
-----
2759+
Dates are assumed to be sorted
2760+
2761+
Returns
2762+
-------
2763+
Series if where is a date, DataFrame if where is a sequence of dates.
2764+
"""
2765+
if isinstance(where, compat.string_types):
2766+
where = datetools.to_datetime(where)
2767+
2768+
if skipna == 'percolumn':
2769+
return self.apply(lambda s: s.asof(where))
2770+
elif skipna == 'none':
2771+
row_mask = np.ones((self.shape[0],), dtype=np.bool)
2772+
elif skipna == 'any':
2773+
row_mask = ~(self.isnull().any(axis=1).values)
2774+
elif skipna == 'all':
2775+
row_mask = ~(self.isnull().all(axis=1).values)
2776+
else:
2777+
raise ValueError("skipna must be one of percolumn, none, any, all.")
2778+
2779+
if not hasattr(where, '__iter__'):
2780+
loc = self.index.asof_locs(Index([where]), row_mask)[0]
2781+
if loc == -1:
2782+
return Series(index=self.columns, data=np.nan)
2783+
2784+
s = self.iloc[loc,:].copy()
2785+
s.name = None
2786+
return s
2787+
2788+
# take uses tiling from the end with negative values, but we want blanks
2789+
# this is almost certainly a slow way fo doing it, but it will do for now.
2790+
locs = self.index.asof_locs(where, row_mask)
2791+
2792+
must_blank = np.nonzero(locs == -1)[0]
2793+
new_df = self.take(locs).copy()
2794+
new_df.iloc[must_blank, :] = None
2795+
new_df.index = where
2796+
return new_df
2797+
27402798
def dropna(self, axis=0, how='any', thresh=None, subset=None,
27412799
inplace=False):
27422800
"""

pandas/tests/test_frame.py

+72
Original file line numberDiff line numberDiff line change
@@ -2266,6 +2266,78 @@ def test_get_axis(self):
22662266
assertRaisesRegexp(ValueError, 'No axis.*None', f._get_axis_name, None)
22672267
assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, None)
22682268

2269+
def test_asof(self):
2270+
dates = date_range('2014/01/02', periods=4, freq='3D')
2271+
df = pd.DataFrame(data={'a': ["a", None, "b", "c"],
2272+
'b': [1, None, 2, 3],
2273+
'c': [1, None, None, 3],
2274+
'd': [None, None, 2, 3]},
2275+
index=dates)
2276+
2277+
test_dates = date_range('2014/01/01', periods=5, freq='3D')
2278+
2279+
# test using skipna = none, the simplest case
2280+
result_skipna_none = df.asof(test_dates, skipna='none')
2281+
# make sure the index matches
2282+
self.assertTrue((result_skipna_none.index == test_dates).all())
2283+
# compare with the expected frame
2284+
expected_result = pd.DataFrame(data={'a': [None, "a", None, "b", "c"],
2285+
'b': [None, 1, None, 2, 3],
2286+
'c': [None, 1, None, None, 3],
2287+
'd': [None, None, None, 2, 3]},
2288+
index=test_dates)
2289+
assert_frame_equal(result_skipna_none, expected_result)
2290+
2291+
# test using skipna=any
2292+
result_skipna_any = df.asof(test_dates, skipna='any')
2293+
# compare with the expected result
2294+
expected_result = pd.DataFrame(data={'a': [None, None, None, None, "c"],
2295+
'b': [None, None, None, None, 3],
2296+
'c': [None, None, None, None, 3],
2297+
'd': [None, None, None, None, 3]},
2298+
index=test_dates)
2299+
assert_frame_equal(result_skipna_any, expected_result)
2300+
2301+
result_skipna_all = df.asof(test_dates, skipna='all')
2302+
# compare with expected result
2303+
expected_result = pd.DataFrame(data={'a': [None, "a", "a", "b", "c"],
2304+
'b': [None, 1, 1, 2, 3],
2305+
'c': [None, 1, 1, None, 3],
2306+
'd': [None, None, None, 2, 3]},
2307+
index=test_dates)
2308+
assert_frame_equal(result_skipna_all, expected_result)
2309+
2310+
# finally the most complicated case, skipna=percolumn
2311+
result_skipna_percolumn = df.asof(test_dates, skipna='percolumn')
2312+
# compare with expected result
2313+
expected_result = pd.DataFrame(data={'a': [None, "a", "a", "b", "c"],
2314+
'b': [None, 1, 1, 2, 3],
2315+
'c': [None, 1, 1, 1, 3],
2316+
'd': [None, None, None, 2, 3]},
2317+
index=test_dates)
2318+
assert_frame_equal(result_skipna_percolumn, expected_result)
2319+
2320+
# test calling with scalar values
2321+
s1 = df.asof(test_dates[0], skipna='none')
2322+
self.assertIsNone(s1.name)
2323+
self.assertTrue(isnull(s1).all())
2324+
2325+
s2 = df.asof(test_dates[2], skipna='none')
2326+
self.assertIsNone(s2.name)
2327+
assert_series_equal(result_skipna_none.iloc[2,:], s2)
2328+
2329+
s3 = df.asof(test_dates[2], skipna='any')
2330+
self.assertIsNone(s3.name)
2331+
self.assertTrue(isnull(s3).all())
2332+
2333+
s4 = df.asof(test_dates[2], skipna='all')
2334+
self.assertIsNone(s4.name)
2335+
assert_series_equal(result_skipna_all.iloc[2,:], s4)
2336+
2337+
s5 = df.asof(test_dates[2], skipna='percolumn')
2338+
self.assertIsNone(s5.name)
2339+
assert_series_equal(result_skipna_percolumn.iloc[2,:], s5)
2340+
22692341
def test_set_index(self):
22702342
idx = Index(np.arange(len(self.mixed_frame)))
22712343

0 commit comments

Comments
 (0)