From 5fc463c6ffa8e1607f15b5784bd6925e460a9488 Mon Sep 17 00:00:00 2001 From: jreback Date: Sun, 16 Feb 2014 12:14:35 -0500 Subject: [PATCH] BUG: DataFrame.shift with axis=1 was raising (GH6371) --- doc/source/release.rst | 1 + pandas/core/generic.py | 2 +- pandas/core/internals.py | 19 ++++++---------- pandas/tests/test_frame.py | 44 +++++++++++++++++++++----------------- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 78dd6fbb2a5e3..b0944629ccbb2 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -111,6 +111,7 @@ Bug Fixes - Bug in ``pd.eval`` when parsing strings with possible tokens like ``'&'`` (:issue:`6351`) - Bug correctly handle placements of ``-inf`` in Panels when dividing by integer 0 (:issue:`6178`) +- ``DataFrame.shift`` with ``axis=1`` was raising (:issue:`6371`) pandas 0.13.1 ------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fdec1d2955e90..4e206770ce69c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3170,7 +3170,7 @@ def shift(self, periods=1, freq=None, axis=0, **kwds): if freq is None and not len(kwds): block_axis = self._get_block_manager_axis(axis) - indexer = com._shift_indexer(len(self), periods) + indexer = com._shift_indexer(len(self._get_axis(axis)), periods) new_data = self._data.shift(indexer=indexer, periods=periods, axis=block_axis) else: return self.tshift(periods, freq, **kwds) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index d09191ce53868..be64bea1c7c23 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -934,19 +934,14 @@ def shift(self, indexer, periods, axis=0): # that, handle boolean etc also new_values, fill_value = com._maybe_upcast(new_values) - # 1-d - if self.ndim == 1: - if periods > 0: - new_values[:periods] = fill_value - else: - new_values[periods:] = fill_value - - # 2-d + axis_indexer = [ slice(None) ] * self.ndim + if periods > 0: + axis_indexer[axis] = slice(None,periods) else: - if periods > 0: - new_values[:, :periods] = fill_value - else: - new_values[:, periods:] = fill_value + axis_indexer = [ slice(None) ] * self.ndim + axis_indexer[axis] = slice(periods,None) + new_values[tuple(axis_indexer)] = fill_value + return [make_block(new_values, self.items, self.ref_items, ndim=self.ndim, fastpath=True)] diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 7146cd8b725ae..da5a106425099 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -27,16 +27,13 @@ from numpy.testing import assert_array_equal import numpy.ma.mrecords as mrecords -import pandas as pan import pandas.core.nanops as nanops import pandas.core.common as com import pandas.core.format as fmt import pandas.core.datetools as datetools -from pandas.core.api import (DataFrame, Index, Series, notnull, isnull, - MultiIndex, DatetimeIndex, Timestamp) -from pandas import date_range +from pandas import (DataFrame, Index, Series, notnull, isnull, + MultiIndex, DatetimeIndex, Timestamp, date_range, read_csv) import pandas as pd -from pandas.io.parsers import read_csv from pandas.parser import CParserError from pandas.util.misc import is_little_endian @@ -3740,7 +3737,7 @@ def test_to_dict(self): def test_to_records_dt64(self): df = DataFrame([["one", "two", "three"], ["four", "five", "six"]], - index=pan.date_range("2012-01-01", "2012-01-02")) + index=date_range("2012-01-01", "2012-01-02")) self.assertEqual(df.to_records()['index'][0], df.index[0]) rs = df.to_records(convert_datetime64=False) @@ -5883,7 +5880,7 @@ def create_cols(name): #### this is a bug in read_csv right now #### #df_dt.ix[30:50,1:3] = np.nan - df = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1) + df = pd.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1) # dtype dtypes = dict() @@ -5893,7 +5890,7 @@ def create_cols(name): with ensure_clean() as filename: df.to_csv(filename) - rs = pan.read_csv(filename, index_col=0, dtype=dtypes, parse_dates=create_cols('date')) + rs = read_csv(filename, index_col=0, dtype=dtypes, parse_dates=create_cols('date')) assert_frame_equal(rs, df) def test_to_csv_dups_cols(self): @@ -5911,7 +5908,7 @@ def test_to_csv_dups_cols(self): df_bool = DataFrame(True,index=df_float.index,columns=lrange(3)) df_object = DataFrame('foo',index=df_float.index,columns=lrange(3)) df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=lrange(3)) - df = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1, ignore_index=True) + df = pd.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1, ignore_index=True) cols = [] for i in range(5): @@ -5955,7 +5952,7 @@ def test_to_csv_chunking(self): for chunksize in [10000,50000,100000]: with ensure_clean() as filename: aa.to_csv(filename,chunksize=chunksize) - rs = pan.read_csv(filename,index_col=0) + rs = read_csv(filename,index_col=0) assert_frame_equal(rs, aa) def test_to_csv_bug(self): @@ -5966,7 +5963,7 @@ def test_to_csv_bug(self): with ensure_clean() as path: newdf.to_csv(path) - recons = pan.read_csv(path, index_col=0) + recons = read_csv(path, index_col=0) assert_frame_equal(recons, newdf, check_names=False) # don't check_names as t != 1 def test_to_csv_unicode(self): @@ -5975,11 +5972,11 @@ def test_to_csv_unicode(self): with ensure_clean() as path: df.to_csv(path, encoding='UTF-8') - df2 = pan.read_csv(path, index_col=0, encoding='UTF-8') + df2 = read_csv(path, index_col=0, encoding='UTF-8') assert_frame_equal(df, df2) df.to_csv(path, encoding='UTF-8', index=False) - df2 = pan.read_csv(path, index_col=None, encoding='UTF-8') + df2 = read_csv(path, index_col=None, encoding='UTF-8') assert_frame_equal(df, df2) def test_to_csv_unicode_index_col(self): @@ -5993,14 +5990,14 @@ def test_to_csv_unicode_index_col(self): df.to_csv(buf, encoding='UTF-8') buf.seek(0) - df2 = pan.read_csv(buf, index_col=0, encoding='UTF-8') + df2 = read_csv(buf, index_col=0, encoding='UTF-8') assert_frame_equal(df, df2) def test_to_csv_stringio(self): buf = StringIO() self.frame.to_csv(buf) buf.seek(0) - recons = pan.read_csv(buf, index_col=0) + recons = read_csv(buf, index_col=0) assert_frame_equal(recons, self.frame, check_names=False) # TODO to_csv drops column name def test_to_csv_float_format(self): @@ -6013,7 +6010,7 @@ def test_to_csv_float_format(self): df.to_csv(filename, float_format='%.2f') - rs = pan.read_csv(filename, index_col=0) + rs = read_csv(filename, index_col=0) xp = DataFrame([[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]], index=['A', 'B'], columns=['X', 'Y', 'Z']) @@ -6359,7 +6356,7 @@ def test_asfreq_datetimeindex(self): tm.assert_isinstance(ts.index, DatetimeIndex) def test_at_time_between_time_datetimeindex(self): - index = pan.date_range("2012-01-01", "2012-01-05", freq='30min') + index = date_range("2012-01-01", "2012-01-05", freq='30min') df = DataFrame(randn(len(index), 5), index=index) akey = time(12, 0, 0) bkey = slice(time(13, 0, 0), time(14, 0, 0)) @@ -8009,12 +8006,11 @@ def test_replace_with_dict_with_bool_keys(self): df.replace({'asdf': 'asdb', True: 'yes'}) def test_combine_multiple_frames_dtypes(self): - from pandas import concat # GH 2759 A = DataFrame(data=np.ones((10, 2)), columns=['foo', 'bar'], dtype=np.float64) B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) - results = concat((A, B), axis=1).get_dtype_counts() + results = pd.concat((A, B), axis=1).get_dtype_counts() expected = Series(dict( float64 = 2, float32 = 2 )) assert_series_equal(results,expected) @@ -8994,6 +8990,14 @@ def test_shift(self): assertRaisesRegexp(ValueError, 'does not match PeriodIndex freq', ps.shift, freq='D') + + # shift other axis + # GH 6371 + df = DataFrame(np.random.rand(10,5)) + expected = pd.concat([DataFrame(np.nan,index=df.index,columns=[0]),df.iloc[:,0:-1]],ignore_index=True,axis=1) + result = df.shift(1,axis=1) + assert_frame_equal(result,expected) + def test_shift_bool(self): df = DataFrame({'high': [True, False], 'low': [False, False]}) @@ -11339,7 +11343,7 @@ def test_columns_with_dups(self): df_bool = DataFrame(True,index=df_float.index,columns=df_float.columns) df_object = DataFrame('foo',index=df_float.index,columns=df_float.columns) df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=df_float.columns) - df = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1) + df = pd.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1) result = df._data._set_ref_locs() self.assertEqual(len(result), len(df.columns))