From 86c2250fa955753efa8c224f6c6e005cdb9cbd19 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 8 Feb 2012 23:55:15 -0500 Subject: [PATCH 1/7] TST: test coverage, pep8 --- pandas/core/frame.py | 75 ++++++++++++++++++------------ pandas/sparse/tests/test_sparse.py | 8 ++++ 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 44d08c50276b9..bb490a92d72f5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -228,8 +228,8 @@ class DataFrame(NDFrame): _het_axis = 1 _AXIS_NUMBERS = { - 'index' : 0, - 'columns' : 1 + 'index': 0, + 'columns': 1 } _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) @@ -246,8 +246,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, data : numpy ndarray (structured or homogeneous), dict, or DataFrame Dict can contain Series, arrays, constants, or list-like objects index : Index or array-like - Index to use for resulting frame. Will default to np.arange(n) if no - indexing information part of input data and no index provided + Index to use for resulting frame. Will default to np.arange(n) if + no indexing information part of input data and no index provided columns : Index or array-like Will default to np.arange(n) if not column labels provided dtype : dtype, default None @@ -257,7 +257,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, Examples -------- - >>> d = {'col1' : ts1, 'col2' : ts2} + >>> d = {'col1': ts1, 'col2': ts2} >>> df = DataFrame(data=d, index=index) >>> df2 = DataFrame(np.random.randn(10, 5)) >>> df3 = DataFrame(np.random.randn(10, 5), @@ -535,7 +535,8 @@ def __contains__(self, key): # Python 2 division methods if not py3compat.PY3: __div__ = _arith_method(operator.div, '__div__', default_axis=None) - __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', default_axis=None) + __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', + default_axis=None) def __neg__(self): arr = operator.neg(self.values) @@ -855,7 +856,7 @@ def _helper_csvexcel(self, writer, na_rep=None, cols=None, header=True, index_label = [] for i, name in enumerate(self.index.names): if name is None: - name = '' # 'level_%d' % i + name = '' index_label.append(name) else: index_label = self.index.name @@ -892,7 +893,7 @@ def _helper_csvexcel(self, writer, na_rep=None, cols=None, header=True, if index: if nlevels == 1: row_fields = [idx] - else: # handle MultiIndex + else: # handle MultiIndex row_fields = list(idx) for i, col in enumerate(cols): val = series[col].get(idx) @@ -960,8 +961,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', cols=None, if close: f.close() - def to_excel(self, excel_writer, sheet_name = 'sheet1', na_rep='', cols=None, header=True, - index=True, index_label=None): + def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', + cols=None, header=True, index=True, index_label=None): """ Write DataFrame to a excel sheet @@ -987,8 +988,8 @@ def to_excel(self, excel_writer, sheet_name = 'sheet1', na_rep='', cols=None, he Notes ----- If passing an existing ExcelWriter object, then the sheet will be added - to the existing workbook. This can be used to save different DataFrames - to one workbook + to the existing workbook. This can be used to save different + DataFrames to one workbook >>> writer = ExcelWriter('output.xlsx') >>> df1.to_excel(writer,'sheet1') >>> df2.to_excel(writer,'sheet2') @@ -1000,8 +1001,9 @@ def to_excel(self, excel_writer, sheet_name = 'sheet1', na_rep='', cols=None, he excel_writer = ExcelWriter(excel_writer) need_save = True excel_writer.cur_sheet = sheet_name - self._helper_csvexcel(excel_writer, na_rep=na_rep, cols=cols, header=header, - index=index, index_label=index_label, encoding=None) + self._helper_csvexcel(excel_writer, na_rep=na_rep, cols=cols, + header=header, index=index, + index_label=index_label, encoding=None) if need_save: excel_writer.save() @@ -1657,8 +1659,8 @@ def lookup(self, row_labels, col_labels): def align(self, other, join='outer', axis=None, level=None, copy=True): """ - Align two DataFrame object on their index and columns with the specified - join method for each axis Index + Align two DataFrame object on their index and columns with the + specified join method for each axis Index Parameters ---------- @@ -2084,7 +2086,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): agg_obj = self if subset is not None: agg_axis_name = self._get_axis_name(agg_axis) - agg_obj = self.reindex(**{agg_axis_name : subset}) + agg_obj = self.reindex(**{agg_axis_name: subset}) count = agg_obj.count(axis=agg_axis) @@ -2102,7 +2104,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None): labels = self._get_axis(axis) new_labels = labels[mask] - return self.reindex(**{axis_name : new_labels}) + return self.reindex(**{axis_name: new_labels}) def drop_duplicates(self, cols=None, take_last=False): """ @@ -2280,7 +2282,8 @@ def reorder_levels(self, order, axis=0): ------- type of caller (new object) """ - if not isinstance(self._get_axis(axis), MultiIndex): # pragma: no cover + if not isinstance(self._get_axis(axis), + MultiIndex): # pragma: no cover raise Exception('Can only reorder levels on a hierarchical axis.') result = self.copy() @@ -2751,7 +2754,8 @@ def asfreq(self, freq, method=None): if isinstance(freq, datetools.DateOffset): dateRange = DateRange(self.index[0], self.index[-1], offset=freq) else: - dateRange = DateRange(self.index[0], self.index[-1], time_rule=freq) + dateRange = DateRange(self.index[0], self.index[-1], + time_rule=freq) return self.reindex(dateRange, method=method) @@ -2864,8 +2868,8 @@ def apply(self, func, axis=0, broadcast=False, raw=False, Notes ----- - Function passed should not have side effects. If the result is a Series, - it should have the same index + Function passed should not have side effects. If the result is a + Series, it should have the same index Returns ------- @@ -3038,7 +3042,8 @@ def append(self, other, ignore_index=False, verify_integrity=True): if isinstance(other, dict): other = Series(other) if other.name is None and not ignore_index: - raise Exception('Can only append a Series if ignore_index=True') + raise Exception('Can only append a Series if ' + 'ignore_index=True') index = None if other.name is None else [other.name] other = other.reindex(self.columns, copy=False) @@ -3114,7 +3119,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='', if isinstance(other, Series): assert(other.name is not None) - other = DataFrame({other.name : other}) + other = DataFrame({other.name: other}) if isinstance(other, DataFrame): return merge(self, other, left_on=on, how=how, @@ -3343,7 +3348,8 @@ def _count_level(self, level, axis=0, numeric_only=False): if axis == 1: frame = frame.T - mask = notnull(frame.values).view(np.uint8) # python 2.5 + # python 2.5 + mask = notnull(frame.values).view(np.uint8) level_index = frame.index.levels[level] counts = lib.count_level_2d(mask, frame.index.labels[level], @@ -3687,8 +3693,8 @@ def boxplot(self, column=None, by=None, ax=None, fontsize=None, """ import pandas.tools.plotting as plots import matplotlib.pyplot as plt - ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, - grid=grid, rot=rot, **kwds) + ax = plots.boxplot(self, column=column, by=by, ax=ax, + fontsize=fontsize, grid=grid, rot=rot, **kwds) plt.draw_if_interactive() return ax @@ -3791,7 +3797,7 @@ def _bar_plot(self, axes, subplots=False, use_index=True, grid=True, bottom=np.zeros(N), linewidth=1, **kwds) ax.set_title(col) else: - rects.append(ax.bar(xinds + i * 0.5/K, y, 0.5/K, + rects.append(ax.bar(xinds + i * 0.5 / K, y, 0.5 / K, bottom=np.zeros(N), label=col, color=colors[i % len(colors)], **kwds)) labels.append(col) @@ -3907,7 +3913,7 @@ def group_agg(values, bounds, f): else: right_bound = bounds[i + 1] - result[i] = f(values[left_bound : right_bound]) + result[i] = f(values[left_bound:right_bound]) return result @@ -4027,6 +4033,7 @@ def _rec_to_dict(arr): return columns, sdict + def _list_to_sdict(data, columns): if len(data) > 0 and isinstance(data[0], tuple): content = list(lib.to_object_array_tuples(data).T) @@ -4039,6 +4046,7 @@ def _list_to_sdict(data, columns): return {}, columns return _convert_object_array(content, columns) + def _list_of_dict_to_sdict(data, columns): if columns is None: gen = (x.keys() for x in data) @@ -4047,6 +4055,7 @@ def _list_of_dict_to_sdict(data, columns): content = list(lib.dicts_to_array(data, list(columns)).T) return _convert_object_array(content, columns) + def _convert_object_array(content, columns): if columns is None: columns = range(len(content)) @@ -4059,6 +4068,7 @@ def _convert_object_array(content, columns): for c, vals in zip(columns, content)) return sdict, columns + def _homogenize(data, index, columns, dtype=None): from pandas.core.series import _sanitize_array @@ -4104,9 +4114,11 @@ def _homogenize(data, index, columns, dtype=None): return homogenized + def _put_str(s, space): return ('%s' % s)[:space].ljust(space) + def _is_sequence(x): try: iter(x) @@ -4115,6 +4127,7 @@ def _is_sequence(x): except Exception: return False + def install_ipython_completers(): # pragma: no cover """Register the DataFrame type with IPython's tab completion machinery, so that it knows about accessing column names as attributes.""" @@ -4125,6 +4138,7 @@ def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.columns \ if isinstance(c, basestring) and py3compat.isidentifier(c)] + # Importing IPython brings in about 200 modules, so we want to avoid it unless # we're in IPython (when those modules are loaded anyway). if "IPython" in sys.modules: # pragma: no cover @@ -4133,6 +4147,7 @@ def complete_dataframe(obj, prev_completions): except Exception: pass + def _indexer_from_factorized(labels, shape, compress=True): from pandas.core.groupby import get_group_index, _compress_group_index @@ -4149,6 +4164,7 @@ def _indexer_from_factorized(labels, shape, compress=True): return indexer + def _lexsort_indexer(keys): labels = [] shape = [] @@ -4163,6 +4179,7 @@ def _lexsort_indexer(keys): shape.append(len(rizer.uniques)) return _indexer_from_factorized(labels, shape) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index af638f8d214aa..4635f31430fda 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -864,6 +864,9 @@ def _compare_to_dense(a, b, da, db, op): _compare_to_dense(s, frame, s, frame.to_dense(), op) + # it works! + result = self.frame + self.frame.ix[:, ['A', 'B']] + def test_op_corners(self): empty = self.empty + self.empty self.assert_(not empty) @@ -1126,6 +1129,11 @@ def _check_frame(frame): reindexed['G'] = reindexed['A'] self.assert_('G' not in self.frame) + def test_take(self): + result = self.frame.take([1, 0, 2], axis=1) + expected = self.frame.reindex(columns=['B', 'A', 'C']) + assert_sp_frame_equal(result, expected) + def test_density(self): df = SparseDataFrame({'A' : [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B' : [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], From 2546ac784d4733d0e2f2ecff386e55072ff238db Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 9 Feb 2012 00:08:30 -0500 Subject: [PATCH 2/7] TST: test coverage --- pandas/sparse/frame.py | 21 +++++++-------------- pandas/sparse/tests/test_sparse.py | 4 ++-- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 4686f35a6f1d8..d0749ea59e3e2 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -408,26 +408,16 @@ def xs(self, key, axis=0, copy=False): # Arithmetic-related methods def _combine_frame(self, other, func, fill_value=None, level=None): - new_index = self.index.union(other.index) - new_columns = self.columns.union(other.columns) + this, other = self.align(other, join='outer', level=level, + copy=False) + new_index, new_columns = this.index, this.columns if fill_value is not None or level is not None: raise NotImplementedError - this = self - if self.index is not new_index: - this = self.reindex(new_index) - other = other.reindex(new_index) - if not self and not other: return SparseDataFrame(index=new_index) - if not other: - return self * nan - - if not self: - return other * nan - new_data = {} for col in new_columns: if col in this and col in other: @@ -535,7 +525,10 @@ def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, for col in columns: if col not in self: continue - new_arrays[col] = com.take_1d(self[col].values, row_indexer) + if row_indexer is not None: + new_arrays[col] = com.take_1d(self[col].values, row_indexer) + else: + new_arrays[col] = self[col] return self._constructor(new_arrays, index=index, columns=columns) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 4635f31430fda..eb989b012553c 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -872,10 +872,10 @@ def test_op_corners(self): self.assert_(not empty) foo = self.frame + self.empty - assert_sp_frame_equal(foo, self.frame * np.nan) + assert_frame_equal(foo, self.frame * np.nan) foo = self.empty + self.frame - assert_sp_frame_equal(foo, self.frame * np.nan) + assert_frame_equal(foo, self.frame * np.nan) def test_scalar_ops(self): pass From 0430ff75e9fe25518771a3ddda4062f3c62c9743 Mon Sep 17 00:00:00 2001 From: Chang She Date: Mon, 20 Feb 2012 22:01:27 -0500 Subject: [PATCH 3/7] Added fill_value argument to Series/DataFrame.align --- pandas/core/frame.py | 16 +++++++++++----- pandas/core/series.py | 7 +++++-- pandas/tests/test_frame.py | 19 +++++++++++++++++-- pandas/tests/test_series.py | 21 ++++++++++++++------- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bb490a92d72f5..6dfbd25f13feb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1657,7 +1657,8 @@ def lookup(self, row_labels, col_labels): #---------------------------------------------------------------------- # Reindexing and alignment - def align(self, other, join='outer', axis=None, level=None, copy=True): + def align(self, other, join='outer', axis=None, level=None, copy=True, + fill_value=None): """ Align two DataFrame object on their index and columns with the specified join method for each axis Index @@ -1671,6 +1672,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True): level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level + copy : boolean, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : object, default None + Fills na's if not None Returns ------- @@ -1687,7 +1693,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True): raise TypeError('unsupported type: %s' % type(other)) def _align_frame(self, other, join='outer', axis=None, level=None, - copy=True): + copy=True, fill_value=None): # defaults join_index, join_columns = None, None ilidx, iridx = None, None @@ -1709,10 +1715,10 @@ def _align_frame(self, other, join='outer', axis=None, level=None, join_columns, clidx, copy) right = other._reindex_with_indexers(join_index, iridx, join_columns, cridx, copy) - return left, right + return left.fillna(fill_value), right.fillna(fill_value) def _align_series(self, other, join='outer', axis=None, level=None, - copy=True): + copy=True, fill_value=None): fdata = self._data if axis == 0: join_index = self.index @@ -1741,7 +1747,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, left_result = DataFrame(fdata) right_result = other if ridx is None else other.reindex(join_index) - return left_result, right_result + return left_result.fillna(fill_value), right_result.fillna(fill_value) def reindex(self, index=None, columns=None, method=None, level=None, copy=True): diff --git a/pandas/core/series.py b/pandas/core/series.py index 3105248d60f21..853d6477c22c5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1712,7 +1712,8 @@ def apply(self, func): mapped = lib.map_infer(self.values, func) return Series(mapped, index=self.index, name=self.name) - def align(self, other, join='outer', level=None, copy=True): + def align(self, other, join='outer', level=None, copy=True, + fill_value=None): """ Align two Series object with the specified join method @@ -1726,6 +1727,8 @@ def align(self, other, join='outer', level=None, copy=True): copy : boolean, default True Always return new objects. If copy=False and no reindexing is required, the same object will be returned (for better performance) + fill_value : object, default None + Fills na's if not None Returns ------- @@ -1738,7 +1741,7 @@ def align(self, other, join='outer', level=None, copy=True): left = self._reindex_indexer(join_index, lidx, copy) right = other._reindex_indexer(join_index, ridx, copy) - return left, right + return left.fillna(fill_value), right.fillna(fill_value) def _reindex_indexer(self, new_index, indexer, copy): if indexer is not None: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index dd77103654f0d..f0c86db7e8518 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3102,8 +3102,15 @@ def test_align(self): # axis = 0 other = self.frame.ix[:-5, :3] - af, bf = self.frame.align(other, axis=0) - self.assert_(bf.columns.equals(other.columns)) + af, bf = self.frame.align(other, axis=0, fill_value=-1) + self.assert_(bf.columns.equals(other.columns)) + #test fill value + join_idx = self.frame.index.join(other.index) + diff_a = self.frame.index.diff(join_idx) + diff_b = other.index.diff(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + self.assert_((diff_a_vals == -1).all()) af, bf = self.frame.align(other, join='right', axis=0) self.assert_(bf.columns.equals(other.columns)) @@ -3116,6 +3123,14 @@ def test_align(self): self.assert_(bf.columns.equals(self.frame.columns)) self.assert_(bf.index.equals(other.index)) + #test fill value + join_idx = self.frame.index.join(other.index) + diff_a = self.frame.index.diff(join_idx) + diff_b = other.index.diff(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + self.assert_((diff_a_vals == -1).all()) + af, bf = self.frame.align(other, join='inner', axis=1) self.assert_(bf.columns.equals(other.columns)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 8ba2ab50cc9dd..07da55aa09d43 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1719,10 +1719,16 @@ def test_apply(self): assert_series_equal(result, self.ts * 2) def test_align(self): - def _check_align(a, b, how='left'): - aa, ab = a.align(b, join=how) - + def _check_align(a, b, how='left', fill=None): + aa, ab = a.align(b, join=how, fill_value=fill) + join_index = a.index.join(b.index, how=how) + if fill is not None: + diff_a = a.index.diff(join_index) + self.asset_((aa.reindex(diff_a) == fill).all()) + diff_b = b.index.diff(join_index) + self.assert_((ab.reindex(diff_b) == fill).all()) + ea = a.reindex(join_index) eb = b.reindex(join_index) @@ -1730,16 +1736,17 @@ def _check_align(a, b, how='left'): assert_series_equal(ab, eb) for kind in JOIN_TYPES: - _check_align(self.ts[2:], self.ts[:-5]) + _check_align(self.ts[2:], self.ts[:-5], kind) + _check_align(self.ts[2:], self.ts[:-5], kind, -1) # empty left - _check_align(self.ts[:0], self.ts[:-5]) + _check_align(self.ts[:0], self.ts[:-5], kind) # empty right - _check_align(self.ts[:-5], self.ts[:0]) + _check_align(self.ts[:-5], self.ts[:0], kind) # both empty - _check_align(self.ts[:0], self.ts[:0]) + _check_align(self.ts[:0], self.ts[:0], kind) def test_align_nocopy(self): b = self.ts[:5].copy() From c1ac80a98fa551916c7e7d32b1a86b9518746775 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 8 Feb 2012 23:55:15 -0500 Subject: [PATCH 4/7] TST: test coverage, pep8 --- pandas/core/frame.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1e1f18765083..56729662381dc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4080,18 +4080,6 @@ def _to_sdict(data, columns): else: # pragma: no cover raise TypeError('No logic to handle %s type' % type(data[0])) -def _list_to_sdict(data, columns): - if len(data) > 0 and isinstance(data[0], tuple): - content = list(lib.to_object_array_tuples(data).T) - elif len(data) > 0: - # list of lists - content = list(lib.to_object_array(data).T) - else: - if columns is None: - columns = [] - return {}, columns - return _convert_object_array(content, columns) - def _list_of_series_to_sdict(data, columns): from pandas.core.index import _get_combined_index From 49ad2153fa4bd870b1a1bdf4eb401ecc1d483ac1 Mon Sep 17 00:00:00 2001 From: Chang She Date: Mon, 20 Feb 2012 22:01:27 -0500 Subject: [PATCH 5/7] Added fill_value argument to Series/DataFrame.align --- pandas/core/frame.py | 16 +++++++++++----- pandas/core/series.py | 7 +++++-- pandas/tests/test_frame.py | 19 +++++++++++++++++-- pandas/tests/test_series.py | 21 ++++++++++++++------- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 56729662381dc..e860632dd52bb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1669,7 +1669,8 @@ def lookup(self, row_labels, col_labels): #---------------------------------------------------------------------- # Reindexing and alignment - def align(self, other, join='outer', axis=None, level=None, copy=True): + def align(self, other, join='outer', axis=None, level=None, copy=True, + fill_value=None): """ Align two DataFrame object on their index and columns with the specified join method for each axis Index @@ -1683,6 +1684,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True): level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level + copy : boolean, default True + Always returns new objects. If copy=False and no reindexing is + required then original objects are returned. + fill_value : object, default None + Fills na's if not None Returns ------- @@ -1699,7 +1705,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True): raise TypeError('unsupported type: %s' % type(other)) def _align_frame(self, other, join='outer', axis=None, level=None, - copy=True): + copy=True, fill_value=None): # defaults join_index, join_columns = None, None ilidx, iridx = None, None @@ -1721,10 +1727,10 @@ def _align_frame(self, other, join='outer', axis=None, level=None, join_columns, clidx, copy) right = other._reindex_with_indexers(join_index, iridx, join_columns, cridx, copy) - return left, right + return left.fillna(fill_value), right.fillna(fill_value) def _align_series(self, other, join='outer', axis=None, level=None, - copy=True): + copy=True, fill_value=None): fdata = self._data if axis == 0: join_index = self.index @@ -1753,7 +1759,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, left_result = DataFrame(fdata) right_result = other if ridx is None else other.reindex(join_index) - return left_result, right_result + return left_result.fillna(fill_value), right_result.fillna(fill_value) def reindex(self, index=None, columns=None, method=None, level=None, copy=True): diff --git a/pandas/core/series.py b/pandas/core/series.py index 374b7b917ef18..f469498506073 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1786,7 +1786,8 @@ def apply(self, func): mapped = lib.map_infer(self.values, func) return Series(mapped, index=self.index, name=self.name) - def align(self, other, join='outer', level=None, copy=True): + def align(self, other, join='outer', level=None, copy=True, + fill_value=None): """ Align two Series object with the specified join method @@ -1800,6 +1801,8 @@ def align(self, other, join='outer', level=None, copy=True): copy : boolean, default True Always return new objects. If copy=False and no reindexing is required, the same object will be returned (for better performance) + fill_value : object, default None + Fills na's if not None Returns ------- @@ -1812,7 +1815,7 @@ def align(self, other, join='outer', level=None, copy=True): left = self._reindex_indexer(join_index, lidx, copy) right = other._reindex_indexer(join_index, ridx, copy) - return left, right + return left.fillna(fill_value), right.fillna(fill_value) def _reindex_indexer(self, new_index, indexer, copy): if indexer is not None: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5fd54466d92b3..6f460dfaa79a1 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3180,8 +3180,15 @@ def test_align(self): # axis = 0 other = self.frame.ix[:-5, :3] - af, bf = self.frame.align(other, axis=0) - self.assert_(bf.columns.equals(other.columns)) + af, bf = self.frame.align(other, axis=0, fill_value=-1) + self.assert_(bf.columns.equals(other.columns)) + #test fill value + join_idx = self.frame.index.join(other.index) + diff_a = self.frame.index.diff(join_idx) + diff_b = other.index.diff(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + self.assert_((diff_a_vals == -1).all()) af, bf = self.frame.align(other, join='right', axis=0) self.assert_(bf.columns.equals(other.columns)) @@ -3194,6 +3201,14 @@ def test_align(self): self.assert_(bf.columns.equals(self.frame.columns)) self.assert_(bf.index.equals(other.index)) + #test fill value + join_idx = self.frame.index.join(other.index) + diff_a = self.frame.index.diff(join_idx) + diff_b = other.index.diff(join_idx) + diff_a_vals = af.reindex(diff_a).values + diff_b_vals = bf.reindex(diff_b).values + self.assert_((diff_a_vals == -1).all()) + af, bf = self.frame.align(other, join='inner', axis=1) self.assert_(bf.columns.equals(other.columns)) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 425b5a8fad1a6..e6e5c655bee5c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1783,10 +1783,16 @@ def test_apply(self): assert_series_equal(result, self.ts * 2) def test_align(self): - def _check_align(a, b, how='left'): - aa, ab = a.align(b, join=how) - + def _check_align(a, b, how='left', fill=None): + aa, ab = a.align(b, join=how, fill_value=fill) + join_index = a.index.join(b.index, how=how) + if fill is not None: + diff_a = a.index.diff(join_index) + self.asset_((aa.reindex(diff_a) == fill).all()) + diff_b = b.index.diff(join_index) + self.assert_((ab.reindex(diff_b) == fill).all()) + ea = a.reindex(join_index) eb = b.reindex(join_index) @@ -1794,16 +1800,17 @@ def _check_align(a, b, how='left'): assert_series_equal(ab, eb) for kind in JOIN_TYPES: - _check_align(self.ts[2:], self.ts[:-5]) + _check_align(self.ts[2:], self.ts[:-5], kind) + _check_align(self.ts[2:], self.ts[:-5], kind, -1) # empty left - _check_align(self.ts[:0], self.ts[:-5]) + _check_align(self.ts[:0], self.ts[:-5], kind) # empty right - _check_align(self.ts[:-5], self.ts[:0]) + _check_align(self.ts[:-5], self.ts[:0], kind) # both empty - _check_align(self.ts[:0], self.ts[:0]) + _check_align(self.ts[:0], self.ts[:0], kind) def test_align_nocopy(self): b = self.ts[:5].copy() From cc1c347b5d2bc001d89810a26c0dfda24318993d Mon Sep 17 00:00:00 2001 From: Chang She Date: Mon, 20 Feb 2012 22:55:31 -0500 Subject: [PATCH 6/7] allows to specify fill_method as well as fill_value --- pandas/core/frame.py | 31 +++++++++++++++++++++++-------- pandas/core/series.py | 11 ++++++++--- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b8b918364da0..48bf326c416de 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1670,7 +1670,7 @@ def lookup(self, row_labels, col_labels): # Reindexing and alignment def align(self, other, join='outer', axis=None, level=None, copy=True, - fill_value=None): + fill_value=None, fill_method=None): """ Align two DataFrame object on their index and columns with the specified join method for each axis Index @@ -1688,7 +1688,7 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, Always returns new objects. If copy=False and no reindexing is required then original objects are returned. fill_value : object, default None - Fills na's if not None + fill_method : str, default None Returns ------- @@ -1697,15 +1697,19 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, """ if isinstance(other, DataFrame): return self._align_frame(other, join=join, axis=axis, level=level, - copy=copy) + copy=copy, + fill_value=fill_value, + fill_method=fill_method) elif isinstance(other, Series): return self._align_series(other, join=join, axis=axis, level=level, - copy=copy) + copy=copy, + fill_value=fill_value, + fill_method=fill_method) else: # pragma: no cover raise TypeError('unsupported type: %s' % type(other)) def _align_frame(self, other, join='outer', axis=None, level=None, - copy=True, fill_value=None): + copy=True, fill_value=None, fill_method=None): # defaults join_index, join_columns = None, None ilidx, iridx = None, None @@ -1727,10 +1731,15 @@ def _align_frame(self, other, join='outer', axis=None, level=None, join_columns, clidx, copy) right = other._reindex_with_indexers(join_index, iridx, join_columns, cridx, copy) - return left.fillna(fill_value), right.fillna(fill_value) + fill_na = (fill_value is not None) or (fill_method is not None) + if fill_na: + return (left.fillna(fill_value, method=fill_method), + right.fillna(fill_value, method=fill_method)) + else: + return left, right def _align_series(self, other, join='outer', axis=None, level=None, - copy=True, fill_value=None): + copy=True, fill_value=None, fill_method=None): fdata = self._data if axis == 0: join_index = self.index @@ -1759,7 +1768,13 @@ def _align_series(self, other, join='outer', axis=None, level=None, left_result = DataFrame(fdata) right_result = other if ridx is None else other.reindex(join_index) - return left_result.fillna(fill_value), right_result.fillna(fill_value) + + fill_na = (fill_value is not None) or (fill_method is not None) + if fill_na: + return (left_result.fillna(fill_value, fill_method=fill_method), + right_result.fillna(fill_value, fill_method=fill_method)) + else: + return left_result, right_result def reindex(self, index=None, columns=None, method=None, level=None, copy=True): diff --git a/pandas/core/series.py b/pandas/core/series.py index f469498506073..4e3774687f1dd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1787,7 +1787,7 @@ def apply(self, func): return Series(mapped, index=self.index, name=self.name) def align(self, other, join='outer', level=None, copy=True, - fill_value=None): + fill_value=None, fill_method=None): """ Align two Series object with the specified join method @@ -1802,7 +1802,7 @@ def align(self, other, join='outer', level=None, copy=True, Always return new objects. If copy=False and no reindexing is required, the same object will be returned (for better performance) fill_value : object, default None - Fills na's if not None + fill_method : str, default 'pad' Returns ------- @@ -1815,7 +1815,12 @@ def align(self, other, join='outer', level=None, copy=True, left = self._reindex_indexer(join_index, lidx, copy) right = other._reindex_indexer(join_index, ridx, copy) - return left.fillna(fill_value), right.fillna(fill_value) + fill_na = (fill_value is not None) or (fill_method is not None) + if fill_na: + return (left.fillna(fill_value, method=fill_method), + right.fillna(fill_value, method=fill_method)) + else: + return left, right def _reindex_indexer(self, new_index, indexer, copy): if indexer is not None: From 6a1381f77272d3a3280c54a87ec8686767d3b494 Mon Sep 17 00:00:00 2001 From: Chang She Date: Wed, 22 Feb 2012 07:42:48 -0500 Subject: [PATCH 7/7] fixed bug in new test case in test_align --- pandas/tests/test_series.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index e6e5c655bee5c..c36b8c9b27d4f 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1788,10 +1788,12 @@ def _check_align(a, b, how='left', fill=None): join_index = a.index.join(b.index, how=how) if fill is not None: - diff_a = a.index.diff(join_index) - self.asset_((aa.reindex(diff_a) == fill).all()) - diff_b = b.index.diff(join_index) - self.assert_((ab.reindex(diff_b) == fill).all()) + diff_a = aa.index.diff(join_index) + diff_b = ab.index.diff(join_index) + if len(diff_a) > 0: + self.assert_((aa.reindex(diff_a) == fill).all()) + if len(diff_b) > 0: + self.assert_((ab.reindex(diff_b) == fill).all()) ea = a.reindex(join_index) eb = b.reindex(join_index) @@ -1800,17 +1802,17 @@ def _check_align(a, b, how='left', fill=None): assert_series_equal(ab, eb) for kind in JOIN_TYPES: - _check_align(self.ts[2:], self.ts[:-5], kind) - _check_align(self.ts[2:], self.ts[:-5], kind, -1) + _check_align(self.ts[2:], self.ts[:-5]) + _check_align(self.ts[2:], self.ts[:-5], -1) # empty left - _check_align(self.ts[:0], self.ts[:-5], kind) + _check_align(self.ts[:0], self.ts[:-5]) # empty right - _check_align(self.ts[:-5], self.ts[:0], kind) + _check_align(self.ts[:-5], self.ts[:0]) # both empty - _check_align(self.ts[:0], self.ts[:0], kind) + _check_align(self.ts[:0], self.ts[:0]) def test_align_nocopy(self): b = self.ts[:5].copy()