diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1e1f18765083..48bf326c416de 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1669,7 +1669,8 @@ def lookup(self, row_labels, col_labels): #---------------------------------------------------------------------- # Reindexing and alignment - def align(self, other, join='outer', axis=None, level=None, copy=True): + def align(self, other, join='outer', axis=None, level=None, copy=True, + fill_value=None, fill_method=None): """ Align two DataFrame object on their index and columns with the specified join method for each axis Index @@ -1683,6 +1684,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True): level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level + copy : boolean, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : object, default None + fill_method : str, default None Returns ------- @@ -1691,15 +1697,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True): """ if isinstance(other, DataFrame): return self._align_frame(other, join=join, axis=axis, level=level, - copy=copy) + copy=copy, + fill_value=fill_value, + fill_method=fill_method) elif isinstance(other, Series): return self._align_series(other, join=join, axis=axis, level=level, - copy=copy) + copy=copy, + fill_value=fill_value, + fill_method=fill_method) else: # pragma: no cover raise TypeError('unsupported type: %s' % type(other)) def _align_frame(self, other, join='outer', axis=None, level=None, - copy=True): + copy=True, fill_value=None, fill_method=None): # defaults join_index, join_columns = None, None ilidx, iridx = None, None @@ -1721,10 +1731,15 @@ def _align_frame(self, other, join='outer', axis=None, level=None, join_columns, clidx, copy) right = other._reindex_with_indexers(join_index, iridx, join_columns, cridx, copy) - return left, right + fill_na = (fill_value is not None) or (fill_method is not None) + if fill_na: + return (left.fillna(fill_value, method=fill_method), + right.fillna(fill_value, method=fill_method)) + else: + return left, right def _align_series(self, other, join='outer', axis=None, level=None, - copy=True): + copy=True, fill_value=None, fill_method=None): fdata = self._data if axis == 0: join_index = self.index @@ -1753,7 +1768,13 @@ def _align_series(self, other, join='outer', axis=None, level=None, left_result = DataFrame(fdata) right_result = other if ridx is None else other.reindex(join_index) - return left_result, right_result + + fill_na = (fill_value is not None) or (fill_method is not None) + if fill_na: + return (left_result.fillna(fill_value, fill_method=fill_method), + right_result.fillna(fill_value, fill_method=fill_method)) + else: + return left_result, right_result def reindex(self, index=None, columns=None, method=None, level=None, copy=True): @@ -4080,18 +4101,6 @@ def _to_sdict(data, columns): else: # pragma: no cover raise TypeError('No logic to handle %s type' % type(data[0])) -def _list_to_sdict(data, columns): - if len(data) > 0 and isinstance(data[0], tuple): - content = list(lib.to_object_array_tuples(data).T) - elif len(data) > 0: - # list of lists - content = list(lib.to_object_array(data).T) - else: - if columns is None: - columns = [] - return {}, columns - return _convert_object_array(content, columns) - def _list_of_series_to_sdict(data, columns): from pandas.core.index import _get_combined_index @@ -4107,6 +4116,7 @@ def _list_of_series_to_sdict(data, columns): else: return values, columns + def _list_of_dict_to_sdict(data, columns): if columns is None: gen = (x.keys() for x in data) diff --git a/pandas/core/series.py b/pandas/core/series.py index 374b7b917ef18..4e3774687f1dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1786,7 +1786,8 @@ def apply(self, func): mapped = lib.map_infer(self.values, func) return Series(mapped, index=self.index, name=self.name) - def align(self, other, join='outer', level=None, copy=True): + def align(self, other, join='outer', level=None, copy=True, + fill_value=None, fill_method=None): """ Align two Series object with the specified join method @@ -1800,6 +1801,8 @@ def align(self, other, join='outer', level=None, copy=True): copy : boolean, default True Always return new objects. If copy=False and no reindexing is required, the same object will be returned (for better performance) + fill_value : object, default None + fill_method : str, default 'pad' Returns ------- @@ -1812,7 +1815,12 @@ def align(self, other, join='outer', level=None, copy=True): left = self._reindex_indexer(join_index, lidx, copy) right = other._reindex_indexer(join_index, ridx, copy) - return left, right + fill_na = (fill_value is not None) or (fill_method is not None) + if fill_na: + return (left.fillna(fill_value, method=fill_method), + right.fillna(fill_value, method=fill_method)) + else: + return left, right def _reindex_indexer(self, new_index, indexer, copy): if indexer is not None: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5fd54466d92b3..6f460dfaa79a1 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3180,8 +3180,15 @@ def test_align(self): # axis = 0 other = self.frame.ix[:-5, :3] - af, bf = self.frame.align(other, axis=0) - self.assert_(bf.columns.equals(other.columns)) + af, bf = self.frame.align(other, axis=0, fill_value=-1) + self.assert_(bf.columns.equals(other.columns)) + #test fill value + join_idx = self.frame.index.join(other.index) + diff_a = self.frame.index.diff(join_idx) + diff_b = other.index.diff(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + self.assert_((diff_a_vals == -1).all()) af, bf = self.frame.align(other, join='right', axis=0) self.assert_(bf.columns.equals(other.columns)) @@ -3194,6 +3201,14 @@ def test_align(self): self.assert_(bf.columns.equals(self.frame.columns)) self.assert_(bf.index.equals(other.index)) + #test fill value + join_idx = self.frame.index.join(other.index) + diff_a = self.frame.index.diff(join_idx) + diff_b = other.index.diff(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + self.assert_((diff_a_vals == -1).all()) + af, bf = self.frame.align(other, join='inner', axis=1) self.assert_(bf.columns.equals(other.columns)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 425b5a8fad1a6..c36b8c9b27d4f 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1783,10 +1783,18 @@ def test_apply(self): assert_series_equal(result, self.ts * 2) def test_align(self): - def _check_align(a, b, how='left'): - aa, ab = a.align(b, join=how) - + def _check_align(a, b, how='left', fill=None): + aa, ab = a.align(b, join=how, fill_value=fill) + join_index = a.index.join(b.index, how=how) + if fill is not None: + diff_a = aa.index.diff(join_index) + diff_b = ab.index.diff(join_index) + if len(diff_a) > 0: + self.assert_((aa.reindex(diff_a) == fill).all()) + if len(diff_b) > 0: + self.assert_((ab.reindex(diff_b) == fill).all()) + ea = a.reindex(join_index) eb = b.reindex(join_index) @@ -1795,6 +1803,7 @@ def _check_align(a, b, how='left'): for kind in JOIN_TYPES: _check_align(self.ts[2:], self.ts[:-5]) + _check_align(self.ts[2:], self.ts[:-5], -1) # empty left _check_align(self.ts[:0], self.ts[:-5])