Skip to content

Issue #806 DataFrame/Series.align can now specify fill_value or fill_method #807

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 23, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 29 additions & 19 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,8 @@ def lookup(self, row_labels, col_labels):
#----------------------------------------------------------------------
# Reindexing and alignment

def align(self, other, join='outer', axis=None, level=None, copy=True):
def align(self, other, join='outer', axis=None, level=None, copy=True,
fill_value=None, fill_method=None):
"""
Align two DataFrame object on their index and columns with the
specified join method for each axis Index
Expand All @@ -1683,6 +1684,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True):
level : int or name
Broadcast across a level, matching Index values on the
passed MultiIndex level
copy : boolean, default True
Always returns new objects. If copy=False and no reindexing is
required then original objects are returned.
fill_value : object, default None
fill_method : str, default None

Returns
-------
Expand All @@ -1691,15 +1697,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True):
"""
if isinstance(other, DataFrame):
return self._align_frame(other, join=join, axis=axis, level=level,
copy=copy)
copy=copy,
fill_value=fill_value,
fill_method=fill_method)
elif isinstance(other, Series):
return self._align_series(other, join=join, axis=axis, level=level,
copy=copy)
copy=copy,
fill_value=fill_value,
fill_method=fill_method)
else: # pragma: no cover
raise TypeError('unsupported type: %s' % type(other))

def _align_frame(self, other, join='outer', axis=None, level=None,
copy=True):
copy=True, fill_value=None, fill_method=None):
# defaults
join_index, join_columns = None, None
ilidx, iridx = None, None
Expand All @@ -1721,10 +1731,15 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
join_columns, clidx, copy)
right = other._reindex_with_indexers(join_index, iridx,
join_columns, cridx, copy)
return left, right
fill_na = (fill_value is not None) or (fill_method is not None)
if fill_na:
return (left.fillna(fill_value, method=fill_method),
right.fillna(fill_value, method=fill_method))
else:
return left, right

def _align_series(self, other, join='outer', axis=None, level=None,
copy=True):
copy=True, fill_value=None, fill_method=None):
fdata = self._data
if axis == 0:
join_index = self.index
Expand Down Expand Up @@ -1753,7 +1768,13 @@ def _align_series(self, other, join='outer', axis=None, level=None,

left_result = DataFrame(fdata)
right_result = other if ridx is None else other.reindex(join_index)
return left_result, right_result

fill_na = (fill_value is not None) or (fill_method is not None)
if fill_na:
return (left_result.fillna(fill_value, fill_method=fill_method),
right_result.fillna(fill_value, fill_method=fill_method))
else:
return left_result, right_result

def reindex(self, index=None, columns=None, method=None, level=None,
copy=True):
Expand Down Expand Up @@ -4080,18 +4101,6 @@ def _to_sdict(data, columns):
else: # pragma: no cover
raise TypeError('No logic to handle %s type' % type(data[0]))

def _list_to_sdict(data, columns):
if len(data) > 0 and isinstance(data[0], tuple):
content = list(lib.to_object_array_tuples(data).T)
elif len(data) > 0:
# list of lists
content = list(lib.to_object_array(data).T)
else:
if columns is None:
columns = []
return {}, columns
return _convert_object_array(content, columns)

def _list_of_series_to_sdict(data, columns):
from pandas.core.index import _get_combined_index

Expand All @@ -4107,6 +4116,7 @@ def _list_of_series_to_sdict(data, columns):
else:
return values, columns


def _list_of_dict_to_sdict(data, columns):
if columns is None:
gen = (x.keys() for x in data)
Expand Down
12 changes: 10 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,8 @@ def apply(self, func):
mapped = lib.map_infer(self.values, func)
return Series(mapped, index=self.index, name=self.name)

def align(self, other, join='outer', level=None, copy=True):
def align(self, other, join='outer', level=None, copy=True,
fill_value=None, fill_method=None):
"""
Align two Series object with the specified join method

Expand All @@ -1800,6 +1801,8 @@ def align(self, other, join='outer', level=None, copy=True):
copy : boolean, default True
Always return new objects. If copy=False and no reindexing is
required, the same object will be returned (for better performance)
fill_value : object, default None
fill_method : str, default 'pad'

Returns
-------
Expand All @@ -1812,7 +1815,12 @@ def align(self, other, join='outer', level=None, copy=True):

left = self._reindex_indexer(join_index, lidx, copy)
right = other._reindex_indexer(join_index, ridx, copy)
return left, right
fill_na = (fill_value is not None) or (fill_method is not None)
if fill_na:
return (left.fillna(fill_value, method=fill_method),
right.fillna(fill_value, method=fill_method))
else:
return left, right

def _reindex_indexer(self, new_index, indexer, copy):
if indexer is not None:
Expand Down
19 changes: 17 additions & 2 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3180,8 +3180,15 @@ def test_align(self):

# axis = 0
other = self.frame.ix[:-5, :3]
af, bf = self.frame.align(other, axis=0)
self.assert_(bf.columns.equals(other.columns))
af, bf = self.frame.align(other, axis=0, fill_value=-1)
self.assert_(bf.columns.equals(other.columns))
#test fill value
join_idx = self.frame.index.join(other.index)
diff_a = self.frame.index.diff(join_idx)
diff_b = other.index.diff(join_idx)
diff_a_vals = af.reindex(diff_a).values
diff_b_vals = bf.reindex(diff_b).values
self.assert_((diff_a_vals == -1).all())

af, bf = self.frame.align(other, join='right', axis=0)
self.assert_(bf.columns.equals(other.columns))
Expand All @@ -3194,6 +3201,14 @@ def test_align(self):
self.assert_(bf.columns.equals(self.frame.columns))
self.assert_(bf.index.equals(other.index))

#test fill value
join_idx = self.frame.index.join(other.index)
diff_a = self.frame.index.diff(join_idx)
diff_b = other.index.diff(join_idx)
diff_a_vals = af.reindex(diff_a).values
diff_b_vals = bf.reindex(diff_b).values
self.assert_((diff_a_vals == -1).all())

af, bf = self.frame.align(other, join='inner', axis=1)
self.assert_(bf.columns.equals(other.columns))

Expand Down
15 changes: 12 additions & 3 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1783,10 +1783,18 @@ def test_apply(self):
assert_series_equal(result, self.ts * 2)

def test_align(self):
def _check_align(a, b, how='left'):
aa, ab = a.align(b, join=how)

def _check_align(a, b, how='left', fill=None):
aa, ab = a.align(b, join=how, fill_value=fill)
join_index = a.index.join(b.index, how=how)
if fill is not None:
diff_a = aa.index.diff(join_index)
diff_b = ab.index.diff(join_index)
if len(diff_a) > 0:
self.assert_((aa.reindex(diff_a) == fill).all())
if len(diff_b) > 0:
self.assert_((ab.reindex(diff_b) == fill).all())

ea = a.reindex(join_index)
eb = b.reindex(join_index)

Expand All @@ -1795,6 +1803,7 @@ def _check_align(a, b, how='left'):

for kind in JOIN_TYPES:
_check_align(self.ts[2:], self.ts[:-5])
_check_align(self.ts[2:], self.ts[:-5], -1)

# empty left
_check_align(self.ts[:0], self.ts[:-5])
Expand Down