Skip to content

Commit 5c92545

Browse files
committed
ENH: Initial pass at implementing DataFrame.asof, GH 2941
Implements DataFrame.asof with various possible logics for skipping missing elements. Default case is equivalent to df.apply(lambda s: s.asof(where))
1 parent 814dbe8 commit 5c92545

File tree

2 files changed

+141
-1
lines changed

2 files changed

+141
-1
lines changed

pandas/core/frame.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from pandas.core.indexing import (maybe_droplevels,
3535
convert_to_index_sliceable,
3636
check_bool_indexer)
37-
from pandas.core.internals import (BlockManager,
37+
from pandas.core.internals import (BlockManager, make_block,
3838
create_block_manager_from_arrays,
3939
create_block_manager_from_blocks)
4040
from pandas.core.series import Series
@@ -2737,6 +2737,69 @@ def _maybe_casted_values(index, labels=None):
27372737
#----------------------------------------------------------------------
27382738
# Reindex-based selection methods
27392739

2740+
def asof(self, where, skipna='percolumn'):
2741+
"""
2742+
Return last good (non-null) value for each column of DataFrame for the
2743+
request dates. Definition of 'good' value controlled by skipna argument.
2744+
2745+
If there is no good value, NaN is returned.
2746+
2747+
Parameters
2748+
----------
2749+
where : date or sequence of dates
2750+
skipna : {'any', 'all', 'none', 'percolumn'}, default 'percolumn'
2751+
* any: Ignore/skip rows where any of the columns are null.
2752+
* all: Ignore/skip rows where all of the columns are null.
2753+
* none: Don't ignore/skip any rows.
2754+
* percolumn: Ignore/skip null rows for each column seperately.
2755+
Equivalent to df.apply(lambda s: s.asof(where)).
2756+
2757+
Notes
2758+
-----
2759+
Dates are assumed to be sorted
2760+
2761+
Returns
2762+
-------
2763+
Series if where is a date, DataFrame if where is a sequence of dates.
2764+
"""
2765+
if isinstance(where, compat.string_types):
2766+
where = datetools.to_datetime(where)
2767+
2768+
if skipna == 'percolumn':
2769+
return self.apply(lambda s: s.asof(where))
2770+
elif skipna == 'none':
2771+
row_mask = np.ones((self.shape[0],), dtype=np.bool)
2772+
elif skipna == 'any':
2773+
row_mask = ~(self.isnull().any(axis=1).values)
2774+
elif skipna == 'all':
2775+
row_mask = ~(self.isnull().all(axis=1).values)
2776+
else:
2777+
raise ValueError("skipna must be one of percolumn, none, any, all.")
2778+
2779+
if not hasattr(where, '__iter__'):
2780+
loc = self.index.asof_locs(Index([where]), row_mask)[0]
2781+
if loc == -1:
2782+
return Series(index=self.columns, data=np.nan)
2783+
2784+
s = self.iloc[loc, :].copy()
2785+
s.name = None
2786+
return s
2787+
2788+
locs = self.index.asof_locs(where, row_mask)
2789+
2790+
new_blocks = []
2791+
for block in self._data.blocks:
2792+
new_values = com.take_2d_multi(block.values, [None, locs])
2793+
# can we use make_block_same_Class? not sure how that interacts with
2794+
# needing to cast an int to a float once you get missings
2795+
#b = block.make_block_same_class(new_values, block.mgr_locs)
2796+
new_block = make_block(new_values, block.mgr_locs)
2797+
new_blocks.append(new_block)
2798+
new_mgr = create_block_manager_from_blocks(new_blocks,
2799+
[self._data.axes[0], where])
2800+
new_df = self._constructor(new_mgr)
2801+
return new_df
2802+
27402803
def dropna(self, axis=0, how='any', thresh=None, subset=None,
27412804
inplace=False):
27422805
"""

pandas/tests/test_frame.py

+77
Original file line numberDiff line numberDiff line change
@@ -2270,6 +2270,83 @@ def test_get_axis(self):
22702270
assertRaisesRegexp(ValueError, 'No axis.*None', f._get_axis_name, None)
22712271
assertRaisesRegexp(ValueError, 'No axis named', f._get_axis_number, None)
22722272

2273+
def test_asof(self):
2274+
dates = date_range('2014/01/02', periods=4, freq='3D')
2275+
df = pd.DataFrame(data={'a': ["a", None, "b", "c"],
2276+
'b': [1, None, 2, 3],
2277+
'c': [1, None, None, 3],
2278+
'd': [None, None, 2, 3]},
2279+
index=dates)
2280+
2281+
test_dates = date_range('2014/01/01', periods=5, freq='3D')
2282+
2283+
# test using skipna = none, the simplest case
2284+
result_skipna_none = df.asof(test_dates, skipna='none')
2285+
# make sure the index matches
2286+
self.assertTrue((result_skipna_none.index == test_dates).all())
2287+
# compare with the expected frame
2288+
expected_result = pd.DataFrame(data={'a': [None, "a", None, "b", "c"],
2289+
'b': [None, 1, None, 2, 3],
2290+
'c': [None, 1, None, None, 3],
2291+
'd': [None, None, None, 2, 3]},
2292+
index=test_dates)
2293+
assert_frame_equal(result_skipna_none, expected_result)
2294+
2295+
# test using skipna=any
2296+
result_skipna_any = df.asof(test_dates, skipna='any')
2297+
# compare with the expected result
2298+
expected_result = pd.DataFrame(data={'a': [None, None, None, None, "c"],
2299+
'b': [None, None, None, None, 3],
2300+
'c': [None, None, None, None, 3],
2301+
'd': [None, None, None, None, 3]},
2302+
index=test_dates)
2303+
assert_frame_equal(result_skipna_any, expected_result)
2304+
2305+
result_skipna_all = df.asof(test_dates, skipna='all')
2306+
# compare with expected result
2307+
expected_result = pd.DataFrame(data={'a': [None, "a", "a", "b", "c"],
2308+
'b': [None, 1, 1, 2, 3],
2309+
'c': [None, 1, 1, None, 3],
2310+
'd': [None, None, None, 2, 3]},
2311+
index=test_dates)
2312+
assert_frame_equal(result_skipna_all, expected_result)
2313+
2314+
# finally the most complicated case, skipna=percolumn
2315+
result_skipna_percolumn = df.asof(test_dates, skipna='percolumn')
2316+
# compare with expected result
2317+
expected_result = pd.DataFrame(data={'a': [None, "a", "a", "b", "c"],
2318+
'b': [None, 1, 1, 2, 3],
2319+
'c': [None, 1, 1, 1, 3],
2320+
'd': [None, None, None, 2, 3]},
2321+
index=test_dates)
2322+
assert_frame_equal(result_skipna_percolumn, expected_result)
2323+
2324+
# test calling with scalar values
2325+
s1 = df.asof(test_dates[0], skipna='none')
2326+
self.assertIsNone(s1.name)
2327+
self.assertTrue(isnull(s1).all())
2328+
2329+
s2 = df.asof(test_dates[2], skipna='none')
2330+
self.assertIsNone(s2.name)
2331+
s2_expected = result_skipna_none.iloc[2, :]
2332+
s2_expected.name = None
2333+
assert_series_equal(s2_expected, s2)
2334+
2335+
s3 = df.asof(test_dates[2], skipna='any')
2336+
self.assertIsNone(s3.name)
2337+
self.assertTrue(isnull(s3).all())
2338+
2339+
s4 = df.asof(test_dates[2], skipna='all')
2340+
self.assertIsNone(s4.name)
2341+
s4_expected = result_skipna_all.iloc[2, :]
2342+
s4_expected.name = None
2343+
assert_series_equal(s4_expected, s4)
2344+
2345+
s5 = df.asof(test_dates[2], skipna='percolumn')
2346+
self.assertIsNone(s5.name)
2347+
s5_expected = df.apply(lambda s: s.asof(test_dates[2]))
2348+
assert_series_equal(s5_expected, s5)
2349+
22732350
def test_set_index(self):
22742351
idx = Index(np.arange(len(self.mixed_frame)))
22752352

0 commit comments

Comments
 (0)