diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 9e48702ad2b0a..eedcce82c733d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -477,7 +477,7 @@ def test_binary_ops_align(self): result = getattr(df, op)(x, level='second', axis=0) expected = (pd.concat([opa(df.loc[idx[:, i], :], v) - for i, v in x.iteritems()]) + for i, v in x.iteritems()]) .reindex_like(df).sortlevel()) assert_frame_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 252250c5a55b8..4b17736dd149a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -28,14 +28,14 @@ def test_ints(self): expected = Series(np.array([0, 2, 1, 1, 0, 2, np.nan, 0])) tm.assert_series_equal(result, expected) - s = pd.Series(np.arange(5),dtype=np.float32) - result = algos.match(s, [2,4]) + s = pd.Series(np.arange(5), dtype=np.float32) + result = algos.match(s, [2, 4]) expected = np.array([-1, -1, 0, -1, 1]) self.assert_numpy_array_equal(result, expected) - result = Series(algos.match(s, [2,4], np.nan)) + result = Series(algos.match(s, [2, 4], np.nan)) expected = Series(np.array([np.nan, np.nan, 0, np.nan, 1])) - tm.assert_series_equal(result,expected) + tm.assert_series_equal(result, expected) def test_strings(self): values = ['foo', 'bar', 'baz'] @@ -47,7 +47,8 @@ def test_strings(self): result = Series(algos.match(to_match, values, np.nan)) expected = Series(np.array([1, 0, np.nan, 0, 1, 2, np.nan])) - tm.assert_series_equal(result,expected) + tm.assert_series_equal(result, expected) + class TestFactorize(tm.TestCase): _multiprocess_can_split_ = True @@ -60,31 +61,42 @@ def test_warn(self): def test_basic(self): - labels, uniques = algos.factorize(['a', 'b', 'b', 'a', - 'a', 'c', 'c', 'c']) - # self.assert_numpy_array_equal(labels, np.array([ 0, 1, 1, 0, 0, 2, 2, 2],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array(['a','b','c'], dtype=object)) + labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c', + 'c']) + self.assert_numpy_array_equal( + uniques, np.array(['a', 'b', 'c'], dtype=object)) labels, uniques = algos.factorize(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'], sort=True) - self.assert_numpy_array_equal(labels, np.array([ 0, 1, 1, 0, 0, 2, 2, 2],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array(['a','b','c'], dtype=object)) + self.assert_numpy_array_equal(labels, np.array( + [0, 1, 1, 0, 0, 2, 2, 2], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + ['a', 'b', 'c'], dtype=object)) labels, uniques = algos.factorize(list(reversed(range(5)))) - self.assert_numpy_array_equal(labels, np.array([0, 1, 2, 3, 4], dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array([ 4, 3, 2, 1, 0],dtype=np.int64)) + self.assert_numpy_array_equal(labels, np.array( + [0, 1, 2, 3, 4], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + [4, 3, 2, 1, 0], dtype=np.int64)) labels, uniques = algos.factorize(list(reversed(range(5))), sort=True) - self.assert_numpy_array_equal(labels, np.array([ 4, 3, 2, 1, 0],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array([0, 1, 2, 3, 4], dtype=np.int64)) + self.assert_numpy_array_equal(labels, np.array( + [4, 3, 2, 1, 0], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + [0, 1, 2, 3, 4], dtype=np.int64)) labels, uniques = algos.factorize(list(reversed(np.arange(5.)))) - self.assert_numpy_array_equal(labels, np.array([0., 1., 2., 3., 4.], dtype=np.float64)) - self.assert_numpy_array_equal(uniques, np.array([ 4, 3, 2, 1, 0],dtype=np.int64)) - - labels, uniques = algos.factorize(list(reversed(np.arange(5.))), sort=True) - self.assert_numpy_array_equal(labels, np.array([ 4, 3, 2, 1, 0],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array([0., 1., 2., 3., 4.], dtype=np.float64)) + self.assert_numpy_array_equal(labels, np.array( + [0., 1., 2., 3., 4.], dtype=np.float64)) + self.assert_numpy_array_equal(uniques, np.array( + [4, 3, 2, 1, 0], dtype=np.int64)) + + labels, uniques = algos.factorize( + list(reversed(np.arange(5.))), sort=True) + self.assert_numpy_array_equal(labels, np.array( + [4, 3, 2, 1, 0], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + [0., 1., 2., 3., 4.], dtype=np.float64)) def test_mixed(self): @@ -92,39 +104,49 @@ def test_mixed(self): x = Series(['A', 'A', np.nan, 'B', 3.14, np.inf]) labels, uniques = algos.factorize(x) - self.assert_numpy_array_equal(labels, np.array([ 0, 0, -1, 1, 2, 3],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array(['A', 'B', 3.14, np.inf], dtype=object)) + self.assert_numpy_array_equal(labels, np.array( + [0, 0, -1, 1, 2, 3], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + ['A', 'B', 3.14, np.inf], dtype=object)) labels, uniques = algos.factorize(x, sort=True) - self.assert_numpy_array_equal(labels, np.array([ 2, 2, -1, 3, 0, 1],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array([3.14, np.inf, 'A', 'B'], dtype=object)) + self.assert_numpy_array_equal(labels, np.array( + [2, 2, -1, 3, 0, 1], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + [3.14, np.inf, 'A', 'B'], dtype=object)) def test_datelike(self): # M8 v1 = pd.Timestamp('20130101 09:00:00.00004') v2 = pd.Timestamp('20130101') - x = Series([v1,v1,v1,v2,v2,v1]) + x = Series([v1, v1, v1, v2, v2, v1]) labels, uniques = algos.factorize(x) - self.assert_numpy_array_equal(labels, np.array([ 0,0,0,1,1,0],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array([v1.value,v2.value],dtype='M8[ns]')) + self.assert_numpy_array_equal(labels, np.array( + [0, 0, 0, 1, 1, 0], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + [v1.value, v2.value], dtype='M8[ns]')) labels, uniques = algos.factorize(x, sort=True) - self.assert_numpy_array_equal(labels, np.array([ 1,1,1,0,0,1],dtype=np.int64)) - self.assert_numpy_array_equal(uniques, np.array([v2.value,v1.value],dtype='M8[ns]')) + self.assert_numpy_array_equal(labels, np.array( + [1, 1, 1, 0, 0, 1], dtype=np.int64)) + self.assert_numpy_array_equal(uniques, np.array( + [v2.value, v1.value], dtype='M8[ns]')) # period - v1 = pd.Period('201302',freq='M') - v2 = pd.Period('201303',freq='M') - x = Series([v1,v1,v1,v2,v2,v1]) + v1 = pd.Period('201302', freq='M') + v2 = pd.Period('201303', freq='M') + x = Series([v1, v1, v1, v2, v2, v1]) # periods are not 'sorted' as they are converted back into an index labels, uniques = algos.factorize(x) - self.assert_numpy_array_equal(labels, np.array([ 0,0,0,1,1,0],dtype=np.int64)) + self.assert_numpy_array_equal(labels, np.array( + [0, 0, 0, 1, 1, 0], dtype=np.int64)) self.assert_numpy_array_equal(uniques, pd.PeriodIndex([v1, v2])) - labels, uniques = algos.factorize(x,sort=True) - self.assert_numpy_array_equal(labels, np.array([ 0,0,0,1,1,0],dtype=np.int64)) + labels, uniques = algos.factorize(x, sort=True) + self.assert_numpy_array_equal(labels, np.array( + [0, 0, 0, 1, 1, 0], dtype=np.int64)) self.assert_numpy_array_equal(uniques, pd.PeriodIndex([v1, v2])) def test_factorize_nan(self): @@ -137,15 +159,20 @@ def test_factorize_nan(self): ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel) expected = np.array([0, 1, 0, na_sentinel], dtype='int32') self.assertEqual(len(set(key)), len(set(expected))) - self.assertTrue(np.array_equal(pd.isnull(key), expected == na_sentinel)) + self.assertTrue(np.array_equal( + pd.isnull(key), expected == na_sentinel)) # nan still maps to na_sentinel when sort=False key = np.array([0, np.nan, 1], dtype='O') na_sentinel = -1 - ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) - expected = np.array([ 2, -1, 0], dtype='int32') + + # TODO(wesm): unused? + ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) # noqa + + expected = np.array([2, -1, 0], dtype='int32') self.assertEqual(len(set(key)), len(set(expected))) - self.assertTrue(np.array_equal(pd.isnull(key), expected == na_sentinel)) + self.assertTrue( + np.array_equal(pd.isnull(key), expected == na_sentinel)) def test_vector_resize(self): # Test for memory errors after internal vector @@ -161,14 +188,15 @@ def _test_vector_resize(htable, uniques, dtype, nvals): test_cases = [ (hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object'), - (hashtable.Float64HashTable, hashtable.Float64Vector, 'float64'), - (hashtable.Int64HashTable, hashtable.Int64Vector, 'int64')] + (hashtable.Float64HashTable, hashtable.Float64Vector, 'float64'), + (hashtable.Int64HashTable, hashtable.Int64Vector, 'int64')] for (tbl, vect, dtype) in test_cases: # resizing to empty is a special case _test_vector_resize(tbl(), vect(), dtype, 0) _test_vector_resize(tbl(), vect(), dtype, 10) + class TestIndexer(tm.TestCase): _multiprocess_can_split_ = True @@ -180,15 +208,15 @@ def test_outer_join_indexer(self): ('object', algos.algos.outer_join_indexer_object)] for dtype, indexer in typemap: - left = np.arange(3, dtype = dtype) - right = np.arange(2,5, dtype = dtype) - empty = np.array([], dtype = dtype) + left = np.arange(3, dtype=dtype) + right = np.arange(2, 5, dtype=dtype) + empty = np.array([], dtype=dtype) result, lindexer, rindexer = indexer(left, right) tm.assertIsInstance(result, np.ndarray) tm.assertIsInstance(lindexer, np.ndarray) tm.assertIsInstance(rindexer, np.ndarray) - tm.assert_numpy_array_equal(result, np.arange(5, dtype = dtype)) + tm.assert_numpy_array_equal(result, np.arange(5, dtype=dtype)) tm.assert_numpy_array_equal(lindexer, np.array([0, 1, 2, -1, -1])) tm.assert_numpy_array_equal(rindexer, np.array([-1, -1, 0, 1, 2])) @@ -202,6 +230,7 @@ def test_outer_join_indexer(self): tm.assert_numpy_array_equal(lindexer, np.array([0, 1, 2])) tm.assert_numpy_array_equal(rindexer, np.array([-1, -1, -1])) + class TestUnique(tm.TestCase): _multiprocess_can_split_ = True @@ -224,8 +253,8 @@ def test_object_refcount_bug(self): def test_on_index_object(self): - mindex = pd.MultiIndex.from_arrays([np.arange(5).repeat(5), - np.tile(np.arange(5), 5)]) + mindex = pd.MultiIndex.from_arrays([np.arange(5).repeat(5), np.tile( + np.arange(5), 5)]) expected = mindex.values expected.sort() @@ -239,7 +268,8 @@ def test_on_index_object(self): def test_datetime64_dtype_array_returned(self): # GH 9431 expected = np.array(['2015-01-03T00:00:00.000000000+0000', - '2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]') + '2015-01-01T00:00:00.000000000+0000'], + dtype='M8[ns]') dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000', '2015-01-01T00:00:00.000000000+0000', @@ -258,7 +288,6 @@ def test_datetime64_dtype_array_returned(self): tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) - def test_timedelta64_dtype_array_returned(self): # GH 9431 expected = np.array([31200, 45678, 10000], dtype='m8[ns]') @@ -278,70 +307,70 @@ def test_timedelta64_dtype_array_returned(self): tm.assert_numpy_array_equal(result, expected) self.assertEqual(result.dtype, expected.dtype) + class TestIsin(tm.TestCase): _multiprocess_can_split_ = True def test_invalid(self): - self.assertRaises(TypeError, lambda : algos.isin(1,1)) - self.assertRaises(TypeError, lambda : algos.isin(1,[1])) - self.assertRaises(TypeError, lambda : algos.isin([1],1)) + self.assertRaises(TypeError, lambda: algos.isin(1, 1)) + self.assertRaises(TypeError, lambda: algos.isin(1, [1])) + self.assertRaises(TypeError, lambda: algos.isin([1], 1)) def test_basic(self): - result = algos.isin([1,2],[1]) - expected = np.array([True,False]) + result = algos.isin([1, 2], [1]) + expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(np.array([1,2]),[1]) - expected = np.array([True,False]) + result = algos.isin(np.array([1, 2]), [1]) + expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1,2]),[1]) - expected = np.array([True,False]) + result = algos.isin(pd.Series([1, 2]), [1]) + expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series([1,2]),pd.Series([1])) - expected = np.array([True,False]) + result = algos.isin(pd.Series([1, 2]), pd.Series([1])) + expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(['a','b'],['a']) - expected = np.array([True,False]) + result = algos.isin(['a', 'b'], ['a']) + expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(pd.Series(['a','b']),pd.Series(['a'])) - expected = np.array([True,False]) + result = algos.isin(pd.Series(['a', 'b']), pd.Series(['a'])) + expected = np.array([True, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(['a','b'],[1]) - expected = np.array([False,False]) + result = algos.isin(['a', 'b'], [1]) + expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) - arr = pd.date_range('20130101',periods=3).values - result = algos.isin(arr,[arr[0]]) - expected = np.array([True,False,False]) + arr = pd.date_range('20130101', periods=3).values + result = algos.isin(arr, [arr[0]]) + expected = np.array([True, False, False]) tm.assert_numpy_array_equal(result, expected) - result = algos.isin(arr,arr[0:2]) - expected = np.array([True,True,False]) + result = algos.isin(arr, arr[0:2]) + expected = np.array([True, True, False]) tm.assert_numpy_array_equal(result, expected) - arr = pd.timedelta_range('1 day',periods=3).values - result = algos.isin(arr,[arr[0]]) - expected = np.array([True,False,False]) + arr = pd.timedelta_range('1 day', periods=3).values + result = algos.isin(arr, [arr[0]]) + expected = np.array([True, False, False]) tm.assert_numpy_array_equal(result, expected) - - def test_large(self): - s = pd.date_range('20000101',periods=2000000,freq='s').values - result = algos.isin(s,s[0:2]) - expected = np.zeros(len(s),dtype=bool) + s = pd.date_range('20000101', periods=2000000, freq='s').values + result = algos.isin(s, s[0:2]) + expected = np.zeros(len(s), dtype=bool) expected[0] = True expected[1] = True tm.assert_numpy_array_equal(result, expected) + class TestValueCounts(tm.TestCase): _multiprocess_can_split_ = True @@ -354,14 +383,10 @@ def test_value_counts(self): tm.assertIsInstance(factor, Categorical) result = algos.value_counts(factor) - cats = ['(-1.194, -0.535]', - '(-0.535, 0.121]', - '(0.121, 0.777]', - '(0.777, 1.433]' - ] + cats = ['(-1.194, -0.535]', '(-0.535, 0.121]', '(0.121, 0.777]', + '(0.777, 1.433]'] expected_index = CategoricalIndex(cats, cats, ordered=True) - expected = Series([1, 1, 1, 1], - index=expected_index) + expected = Series([1, 1, 1, 1], index=expected_index) tm.assert_series_equal(result.sort_index(), expected.sort_index()) def test_value_counts_bins(self): @@ -385,7 +410,8 @@ def test_value_counts_dtypes(self): result = algos.value_counts(Series([1, 1., '1'])) # object self.assertEqual(len(result), 2) - self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), ['1', 1]) + self.assertRaises(TypeError, lambda s: algos.value_counts(s, bins=1), + ['1', 1]) def test_value_counts_nat(self): td = Series([np.timedelta64(10000), pd.NaT], dtype='timedelta64[ns]') @@ -404,7 +430,8 @@ def test_value_counts_nat(self): def test_categorical(self): s = Series(pd.Categorical(list('aaabbc'))) result = s.value_counts() - expected = pd.Series([3, 2, 1], index=pd.CategoricalIndex(['a', 'b', 'c'])) + expected = pd.Series([3, 2, 1], + index=pd.CategoricalIndex(['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) # preserve order? @@ -414,44 +441,41 @@ def test_categorical(self): tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_nans(self): - s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) + s = Series(pd.Categorical(list('aaaaabbbcc'))) # 4,3,2,1 (nan) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], - index=pd.CategoricalIndex(['a', 'b', 'c'], - categories=['a', 'b', 'c'])) + expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + ['a', 'b', 'c'], categories=['a', 'b', 'c'])) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) - expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( - ['a', 'b', 'c', np.nan])) + expected = pd.Series([ + 4, 3, 2, 1 + ], index=pd.CategoricalIndex(['a', 'b', 'c', np.nan])) tm.assert_series_equal(result, expected, check_index_type=True) # out of order - s = Series(pd.Categorical(list('aaaaabbbcc'), - ordered=True, categories=['b', 'a', 'c'])) + s = Series(pd.Categorical( + list('aaaaabbbcc'), ordered=True, categories=['b', 'a', 'c'])) s.iloc[1] = np.nan result = s.value_counts() - expected = pd.Series([4, 3, 2], - index=pd.CategoricalIndex(['a', 'b', 'c'], - categories=['b', 'a', 'c'], - ordered=True)) + expected = pd.Series([4, 3, 2], index=pd.CategoricalIndex( + ['a', 'b', 'c'], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) result = s.value_counts(dropna=False) expected = pd.Series([4, 3, 2, 1], index=pd.CategoricalIndex( - ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) + ['a', 'b', 'c', np.nan], categories=['b', 'a', 'c'], ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) def test_categorical_zeroes(self): # keep the `d` category with 0 - s = Series(pd.Categorical(list('bbbaac'), categories=list('abcd'), - ordered=True)) + s = Series(pd.Categorical( + list('bbbaac'), categories=list('abcd'), ordered=True)) result = s.value_counts() expected = Series([3, 2, 1, 0], index=pd.Categorical( ['b', 'a', 'c', 'd'], categories=list('abcd'), ordered=True)) tm.assert_series_equal(result, expected, check_index_type=True) - def test_dropna(self): # https://github.com/pydata/pandas/issues/9443#issuecomment-73719328 @@ -529,8 +553,7 @@ def test_group_var_generic_2d_all_finite(self): values = 10 * prng.rand(10, 2).astype(self.dtype) labels = np.tile(np.arange(5), (2, )).astype('int64') - expected_out = np.std( - values.reshape(2, 5, 2), ddof=1, axis=0) ** 2 + expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2 expected_counts = counts + 2 self.algo(out, counts, values, labels) @@ -546,10 +569,10 @@ def test_group_var_generic_2d_some_nan(self): values[:, 1] = np.nan labels = np.tile(np.arange(5), (2, )).astype('int64') - expected_out = np.vstack([ - values[:, 0].reshape(5, 2, order='F').std(ddof=1, axis=1) ** 2, - np.nan * np.ones(5) - ]).T + expected_out = np.vstack([values[:, 0] + .reshape(5, 2, order='F') + .std(ddof=1, axis=1) ** 2, + np.nan * np.ones(5)]).T expected_counts = counts + 2 self.algo(out, counts, values, labels) @@ -560,7 +583,7 @@ def test_group_var_constant(self): # Regression test from GH 10448. out = np.array([[np.nan]], dtype=self.dtype) - counts = np.array([0],dtype='int64') + counts = np.array([0], dtype='int64') values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype) labels = np.zeros(3, dtype='int64') @@ -584,7 +607,7 @@ def test_group_var_large_inputs(self): prng = RandomState(1234) out = np.array([[np.nan]], dtype=self.dtype) - counts = np.array([0],dtype='int64') + counts = np.array([0], dtype='int64') values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype) values.shape = (10 ** 6, 1) labels = np.zeros(10 ** 6, dtype='int64') @@ -611,6 +634,7 @@ def test_quantile(): expected = algos.quantile(s.values, [0, .25, .5, .75, 1.]) tm.assert_almost_equal(result, expected) + def test_unique_label_indices(): from pandas.hashtable import unique_label_indices @@ -622,10 +646,11 @@ def test_unique_label_indices(): tm.assert_numpy_array_equal(left, right) a[np.random.choice(len(a), 10)] = -1 - left= unique_label_indices(a) + left = unique_label_indices(a) right = np.unique(a, return_index=True)[1][1:] tm.assert_numpy_array_equal(left, right) + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 741e3eecc96a0..10a5b9dbefe02 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -11,23 +11,23 @@ import pandas.compat as compat import pandas.core.common as com import pandas.util.testing as tm -from pandas import (Series, Index, DatetimeIndex, - TimedeltaIndex, PeriodIndex, Timedelta) +from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex, + Timedelta) from pandas.compat import u, StringIO -from pandas.core.base import (FrozenList, FrozenNDArray, - PandasDelegate, NoNewAttributesMixin) +from pandas.core.base import (FrozenList, FrozenNDArray, PandasDelegate, + NoNewAttributesMixin) from pandas.tseries.base import DatetimeIndexOpsMixin -from pandas.util.testing import (assertRaisesRegexp, - assertIsInstance) +from pandas.util.testing import (assertRaisesRegexp, assertIsInstance) class CheckStringMixin(object): + def test_string_methods_dont_fail(self): repr(self.container) str(self.container) bytes(self.container) if not compat.PY3: - unicode(self.container) + unicode(self.container) # noqa def test_tricky_container(self): if not hasattr(self, 'unicode_container'): @@ -36,30 +36,35 @@ def test_tricky_container(self): str(self.unicode_container) bytes(self.unicode_container) if not compat.PY3: - unicode(self.unicode_container) + unicode(self.unicode_container) # noqa class CheckImmutable(object): mutable_regex = re.compile('does not support mutable operations') def check_mutable_error(self, *args, **kwargs): - # pass whatever functions you normally would to assertRaises (after the Exception kind) + # pass whatever functions you normally would to assertRaises (after the + # Exception kind) assertRaisesRegexp(TypeError, self.mutable_regex, *args, **kwargs) def test_no_mutable_funcs(self): - def setitem(): self.container[0] = 5 + def setitem(): + self.container[0] = 5 self.check_mutable_error(setitem) - def setslice(): self.container[1:2] = 3 + def setslice(): + self.container[1:2] = 3 self.check_mutable_error(setslice) - def delitem(): del self.container[0] + def delitem(): + del self.container[0] self.check_mutable_error(delitem) - def delslice(): del self.container[0:3] + def delslice(): + del self.container[0:3] self.check_mutable_error(delslice) mutable_methods = getattr(self, "mutable_methods", []) @@ -116,14 +121,15 @@ def test_shallow_copying(self): original = self.container.copy() assertIsInstance(self.container.view(), FrozenNDArray) self.assertFalse(isinstance( - self.container.view(np.ndarray), FrozenNDArray - )) + self.container.view(np.ndarray), FrozenNDArray)) self.assertIsNot(self.container.view(), self.container) self.assert_numpy_array_equal(self.container, original) # shallow copy should be the same too assertIsInstance(self.container._shallow_copy(), FrozenNDArray) + # setting should not be allowed - def testit(container): container[0] = 16 + def testit(container): + container[0] = 16 self.check_mutable_error(testit, self.container) @@ -164,8 +170,10 @@ def bar(self, *args, **kwargs): pass class Delegate(PandasDelegate): + def __init__(self, obj): self.obj = obj + Delegate._add_delegate_accessors(delegate=Delegator, accessors=Delegator._properties, typ='property') @@ -177,12 +185,17 @@ def __init__(self, obj): def f(): delegate.foo + self.assertRaises(TypeError, f) + def f(): delegate.foo = 5 + self.assertRaises(TypeError, f) + def f(): delegate.foo() + self.assertRaises(TypeError, f) @@ -191,32 +204,36 @@ class Ops(tm.TestCase): def _allow_na_ops(self, obj): """Whether to skip test cases including NaN""" if (isinstance(obj, Index) and - (obj.is_boolean() or not obj._can_hold_na)): + (obj.is_boolean() or not obj._can_hold_na)): # don't test boolean / int64 index return False return True def setUp(self): - self.bool_index = tm.makeBoolIndex(10, name='a') - self.int_index = tm.makeIntIndex(10, name='a') - self.float_index = tm.makeFloatIndex(10, name='a') - self.dt_index = tm.makeDateIndex(10, name='a') - self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern') - self.period_index = tm.makePeriodIndex(10, name='a') - self.string_index = tm.makeStringIndex(10, name='a') - self.unicode_index = tm.makeUnicodeIndex(10, name='a') + self.bool_index = tm.makeBoolIndex(10, name='a') + self.int_index = tm.makeIntIndex(10, name='a') + self.float_index = tm.makeFloatIndex(10, name='a') + self.dt_index = tm.makeDateIndex(10, name='a') + self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize( + tz='US/Eastern') + self.period_index = tm.makePeriodIndex(10, name='a') + self.string_index = tm.makeStringIndex(10, name='a') + self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) - self.int_series = Series(arr, index=self.int_index, name='a') - self.float_series = Series(arr, index=self.float_index, name='a') - self.dt_series = Series(arr, index=self.dt_index, name='a') - self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) + self.int_series = Series(arr, index=self.int_index, name='a') + self.float_series = Series(arr, index=self.float_index, name='a') + self.dt_series = Series(arr, index=self.dt_index, name='a') + self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') - types = ['bool','int','float','dt', 'dt_tz', 'period','string', 'unicode'] - fmts = [ "{0}_{1}".format(t,f) for t in types for f in ['index','series'] ] - self.objs = [ getattr(self,f) for f in fmts if getattr(self,f,None) is not None ] + types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', + 'unicode'] + fmts = ["{0}_{1}".format(t, f) + for t in types for f in ['index', 'series']] + self.objs = [getattr(self, f) + for f in fmts if getattr(self, f, None) is not None] def check_ops_properties(self, props, filter=None, ignore_failures=False): for op in props: @@ -230,36 +247,39 @@ def check_ops_properties(self, props, filter=None, ignore_failures=False): try: if isinstance(o, Series): - expected = Series(getattr(o.index,op), index=o.index, name='a') + expected = Series( + getattr(o.index, op), index=o.index, name='a') else: expected = getattr(o, op) except (AttributeError): if ignore_failures: continue - result = getattr(o,op) + result = getattr(o, op) # these couuld be series, arrays or scalars - if isinstance(result,Series) and isinstance(expected,Series): - tm.assert_series_equal(result,expected) - elif isinstance(result,Index) and isinstance(expected,Index): - tm.assert_index_equal(result,expected) - elif isinstance(result,np.ndarray) and isinstance(expected,np.ndarray): - self.assert_numpy_array_equal(result,expected) + if isinstance(result, Series) and isinstance(expected, Series): + tm.assert_series_equal(result, expected) + elif isinstance(result, Index) and isinstance(expected, Index): + tm.assert_index_equal(result, expected) + elif isinstance(result, np.ndarray) and isinstance(expected, + np.ndarray): + self.assert_numpy_array_equal(result, expected) else: self.assertEqual(result, expected) - # freq raises AttributeError on an Int64Index because its not defined - # we mostly care about Series hwere anyhow + # freq raises AttributeError on an Int64Index because its not + # defined we mostly care about Series hwere anyhow if not ignore_failures: for o in self.not_valid_objs: - # an object that is datetimelike will raise a TypeError, otherwise - # an AttributeError + # an object that is datetimelike will raise a TypeError, + # otherwise an AttributeError if issubclass(type(o), DatetimeIndexOpsMixin): - self.assertRaises(TypeError, lambda : getattr(o,op)) + self.assertRaises(TypeError, lambda: getattr(o, op)) else: - self.assertRaises(AttributeError, lambda : getattr(o,op)) + self.assertRaises(AttributeError, + lambda: getattr(o, op)) def test_binary_ops_docs(self): from pandas import DataFrame, Panel @@ -270,24 +290,28 @@ def test_binary_ops_docs(self): 'pow': '**', 'truediv': '/', 'floordiv': '//'} - for op_name in ['add', 'sub', 'mul', 'mod', 'pow', 'truediv', 'floordiv']: + for op_name in ['add', 'sub', 'mul', 'mod', 'pow', 'truediv', + 'floordiv']: for klass in [Series, DataFrame, Panel]: operand1 = klass.__name__.lower() operand2 = 'other' op = op_map[op_name] expected_str = ' '.join([operand1, op, operand2]) - self.assertTrue(expected_str in getattr(klass, op_name).__doc__) + self.assertTrue(expected_str in getattr(klass, + op_name).__doc__) # reverse version of the binary ops expected_str = ' '.join([operand2, op, operand1]) - self.assertTrue(expected_str in getattr(klass, 'r' + op_name).__doc__) + self.assertTrue(expected_str in getattr(klass, 'r' + + op_name).__doc__) + class TestIndexOps(Ops): def setUp(self): super(TestIndexOps, self).setUp() - self.is_valid_objs = [ o for o in self.objs if o._allow_index_ops ] - self.not_valid_objs = [ o for o in self.objs if not o._allow_index_ops ] + self.is_valid_objs = [o for o in self.objs if o._allow_index_ops] + self.not_valid_objs = [o for o in self.objs if not o._allow_index_ops] def test_none_comparison(self): @@ -299,12 +323,12 @@ def test_none_comparison(self): o[0] = np.nan # noinspection PyComparisonWithNone - result = o == None + result = o == None # noqa self.assertFalse(result.iat[0]) self.assertFalse(result.iat[1]) # noinspection PyComparisonWithNone - result = o != None + result = o != None # noqa self.assertTrue(result.iat[0]) self.assertTrue(result.iat[1]) @@ -314,9 +338,9 @@ def test_none_comparison(self): # this fails for numpy < 1.9 # and oddly for *some* platforms - #result = None != o - #self.assertTrue(result.iat[0]) - #self.assertTrue(result.iat[1]) + # result = None != o # noqa + # self.assertTrue(result.iat[0]) + # self.assertTrue(result.iat[1]) result = None > o self.assertFalse(result.iat[0]) @@ -326,14 +350,13 @@ def test_none_comparison(self): self.assertFalse(result.iat[0]) self.assertFalse(result.iat[1]) - def test_ndarray_compat_properties(self): for o in self.objs: # check that we work - for p in ['shape', 'dtype', 'flags', 'T', - 'strides', 'itemsize', 'nbytes']: + for p in ['shape', 'dtype', 'flags', 'T', 'strides', 'itemsize', + 'nbytes']: self.assertIsNotNone(getattr(o, p, None)) self.assertTrue(hasattr(o, 'base')) @@ -352,23 +375,25 @@ def test_ndarray_compat_properties(self): self.assertEqual(Series([1]).item(), 1) def test_ops(self): - for op in ['max','min']: + for op in ['max', 'min']: for o in self.objs: - result = getattr(o,op)() + result = getattr(o, op)() if not isinstance(o, PeriodIndex): expected = getattr(o.values, op)() else: - expected = pd.Period(ordinal=getattr(o.values, op)(), freq=o.freq) + expected = pd.Period(ordinal=getattr(o.values, op)(), + freq=o.freq) try: self.assertEqual(result, expected) except TypeError: - # comparing tz-aware series with np.array results in TypeError + # comparing tz-aware series with np.array results in + # TypeError expected = expected.astype('M8[ns]').astype('int64') self.assertEqual(result.value, expected) def test_nanops(self): # GH 7261 - for op in ['max','min']: + for op in ['max', 'min']: for klass in [Index, Series]: obj = klass([np.nan, 2.0]) @@ -389,25 +414,26 @@ def test_nanops(self): self.assertEqual(getattr(obj, op)(), datetime(2011, 11, 1)) # argmin/max - obj = Index(np.arange(5,dtype='int64')) - self.assertEqual(obj.argmin(),0) - self.assertEqual(obj.argmax(),4) + obj = Index(np.arange(5, dtype='int64')) + self.assertEqual(obj.argmin(), 0) + self.assertEqual(obj.argmax(), 4) obj = Index([np.nan, 1, np.nan, 2]) - self.assertEqual(obj.argmin(),1) - self.assertEqual(obj.argmax(),3) + self.assertEqual(obj.argmin(), 1) + self.assertEqual(obj.argmax(), 3) obj = Index([np.nan]) - self.assertEqual(obj.argmin(),-1) - self.assertEqual(obj.argmax(),-1) + self.assertEqual(obj.argmin(), -1) + self.assertEqual(obj.argmax(), -1) - obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011,11,2),pd.NaT]) - self.assertEqual(obj.argmin(),1) - self.assertEqual(obj.argmax(),2) + obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), + pd.NaT]) + self.assertEqual(obj.argmin(), 1) + self.assertEqual(obj.argmax(), 2) obj = Index([pd.NaT]) - self.assertEqual(obj.argmin(),-1) - self.assertEqual(obj.argmax(),-1) + self.assertEqual(obj.argmin(), -1) + self.assertEqual(obj.argmax(), -1) def test_value_counts_unique_nunique(self): for o in self.objs: @@ -447,9 +473,13 @@ def test_value_counts_unique_nunique(self): else: expected_index = pd.Index(values[::-1]) idx = o.index.repeat(range(1, len(o) + 1)) - o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx, name='a') + o = klass( + np.repeat(values, range(1, + len(o) + 1)), index=idx, name='a') - expected_s = Series(range(10, 0, -1), index=expected_index, dtype='int64', name='a') + expected_s = Series( + range(10, 0, - + 1), index=expected_index, dtype='int64', name='a') result = o.value_counts() tm.assert_series_equal(result, expected_s) @@ -484,28 +514,36 @@ def test_value_counts_unique_nunique(self): o = o.copy() o[0:2] = pd.tslib.iNaT values = o.values - elif o.values.dtype == 'datetime64[ns]' or isinstance(o, PeriodIndex): + elif o.values.dtype == 'datetime64[ns]' or isinstance( + o, PeriodIndex): values[0:2] = pd.tslib.iNaT else: values[0:2] = null_obj - # create repeated values, 'n'th element is repeated by n+1 times + # create repeated values, 'n'th element is repeated by n+1 + # times if isinstance(o, PeriodIndex): - # freq must be specified because repeat makes freq ambiguous + # freq must be specified because repeat makes freq + # ambiguous # resets name from Index expected_index = pd.Index(o, name=None) # attach name to klass - o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq, name='a') + o = klass( + np.repeat(values, range( + 1, len(o) + 1)), freq=o.freq, name='a') elif isinstance(o, Index): expected_index = pd.Index(values, name=None) - o = klass(np.repeat(values, range(1, len(o) + 1)), name='a') + o = klass( + np.repeat(values, range(1, len(o) + 1)), name='a') else: expected_index = pd.Index(values, name=None) idx = np.repeat(o.index.values, range(1, len(o) + 1)) - o = klass(np.repeat(values, range(1, len(o) + 1)), index=idx, name='a') + o = klass( + np.repeat(values, range( + 1, len(o) + 1)), index=idx, name='a') - expected_s_na = Series(list(range(10, 2, -1)) +[3], + expected_s_na = Series(list(range(10, 2, -1)) + [3], index=expected_index[9:0:-1], dtype='int64', name='a') expected_s = Series(list(range(10, 2, -1)), @@ -543,7 +581,8 @@ def test_value_counts_inferred(self): self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEqual(s.nunique(), 4) - # don't sort, have to sort after the fact as not sorting is platform-dep + # don't sort, have to sort after the fact as not sorting is + # platform-dep hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values() tm.assert_series_equal(hist, expected) @@ -559,7 +598,8 @@ def test_value_counts_inferred(self): tm.assert_series_equal(hist, expected) # bins - self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) + self.assertRaises(TypeError, + lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) @@ -573,45 +613,60 @@ def test_value_counts_inferred(self): self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) - exp4 = Series({0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1}, index=[0.998, 2.5, 1.5, 2.0]) + exp4 = Series({0.998: 2, + 1.5: 1, + 2.0: 0, + 2.5: 1}, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) - exp4n = Series({0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0]) + exp4n = Series( + {0.998: 0.5, + 1.5: 0.25, + 2.0: 0.0, + 2.5: 0.25}, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly - s_values = ['a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b'] + s_values = ['a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', + 'b'] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) - self.assert_numpy_array_equal(s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) + self.assert_numpy_array_equal(s.unique(), np.array( + ['a', 'b', np.nan, 'd'], dtype='O')) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) - tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) + tm.assert_series_equal(s.value_counts(), expected, + check_index_type=False) self.assert_numpy_array_equal(s.unique(), np.array([])) self.assertEqual(s.nunique(), 0) # GH 3002, datetime64[ns] # don't test names though - txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', - 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM']) + txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM', + 'xxyyzz20100101EGG', 'xxyyww20090101EGG', + 'foofoo20080909PIE', 'foofoo20080909GUM']) f = StringIO(txt) - df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], + df = pd.read_fwf(f, widths=[6, 8, 3], + names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None - idx = pd.to_datetime(['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', - '2009-01-01 00:00:00X']) + idx = pd.to_datetime( + ['2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', + '2009-01-01 00:00:00X']) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) - expected = np.array(['2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', - '2008-09-09 00:00:00Z'], dtype='datetime64[ns]') + expected = np.array(['2010-01-01 00:00:00Z', + '2009-01-01 00:00:00Z', + '2008-09-09 00:00:00Z'], + dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): expected = DatetimeIndex(expected) self.assertTrue(s.unique().equals(expected)) @@ -637,7 +692,8 @@ def test_value_counts_inferred(self): # numpy_array_equal cannot compare pd.NaT self.assert_numpy_array_equal(unique[:3], expected) - self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) + self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == + pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) @@ -664,10 +720,10 @@ def test_value_counts_inferred(self): def test_factorize(self): for o in self.objs: - if isinstance(o,Index) and o.is_boolean(): - exp_arr = np.array([0,1] + [0] * 8) + if isinstance(o, Index) and o.is_boolean(): + exp_arr = np.array([0, 1] + [0] * 8) exp_uniques = o - exp_uniques = Index([False,True]) + exp_uniques = Index([False, True]) else: exp_arr = np.array(range(len(o))) exp_uniques = o @@ -683,7 +739,7 @@ def test_factorize(self): for o in self.objs: # don't test boolean - if isinstance(o,Index) and o.is_boolean(): + if isinstance(o, Index) and o.is_boolean(): continue # sort by value, and create duplicates @@ -710,7 +766,8 @@ def test_factorize(self): self.assert_numpy_array_equal(labels, exp_arr) if isinstance(o, Series): - expected = Index(np.concatenate([o.values[5:10], o.values[:5]])) + expected = Index(np.concatenate([o.values[5:10], o.values[:5] + ])) self.assert_numpy_array_equal(uniques, expected) else: expected = o[5:].append(o[:5]) @@ -725,7 +782,7 @@ def test_duplicated_drop_duplicates(self): # special case if original.is_boolean(): result = original.drop_duplicates() - expected = Index([False,True], name='a') + expected = Index([False, True], name='a') tm.assert_index_equal(result, expected) continue @@ -743,7 +800,8 @@ def test_duplicated_drop_duplicates(self): # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] - expected = np.array([False] * len(original) + [True, True], dtype=bool) + expected = np.array([False] * len(original) + [True, True], + dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) @@ -780,8 +838,9 @@ def test_duplicated_drop_duplicates(self): result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) - with tm.assertRaisesRegexp(TypeError, - "drop_duplicates\(\) got an unexpected keyword argument"): + with tm.assertRaisesRegexp( + TypeError, "drop_duplicates\(\) got an unexpected " + "keyword argument"): idx.drop_duplicates(inplace=True) else: @@ -812,7 +871,8 @@ def test_duplicated_drop_duplicates(self): # deprecate take_last with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(s.duplicated(take_last=True), expected) + tm.assert_series_equal( + s.duplicated(take_last=True), expected) with tm.assert_produces_warning(FutureWarning): tm.assert_series_equal(s.drop_duplicates(take_last=True), s[~np.array(base)]) @@ -863,13 +923,15 @@ def get_fill_value(obj): fill_value = get_fill_value(o) # special assign to the numpy array - if o.values.dtype == 'datetime64[ns]' or isinstance(o, PeriodIndex): + if o.values.dtype == 'datetime64[ns]' or isinstance( + o, PeriodIndex): values[0:2] = pd.tslib.iNaT else: values[0:2] = null_obj if isinstance(o, PeriodIndex): - # freq must be specified because repeat makes freq ambiguous + # freq must be specified because repeat makes freq + # ambiguous expected = [fill_value.ordinal] * 2 + list(values[2:]) expected = klass(ordinal=expected, freq=o.freq) o = klass(ordinal=values, freq=o.freq) @@ -891,9 +953,8 @@ def test_memory_usage(self): res = o.memory_usage() res_deep = o.memory_usage(deep=True) - if (com.is_object_dtype(o) or - (isinstance(o, Series) and - com.is_object_dtype(o.index))): + if (com.is_object_dtype(o) or (isinstance(o, Series) and + com.is_object_dtype(o.index))): # if there are objects, only deep will pick them up self.assertTrue(res_deep > res) else: @@ -913,6 +974,7 @@ def test_memory_usage(self): class TestFloat64HashTable(tm.TestCase): + def test_lookup_nan(self): from pandas.hashtable import Float64HashTable xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) @@ -932,10 +994,12 @@ class T(NoNewAttributesMixin): t.a = "test" self.assertEqual(t.a, "test") t._freeze() - #self.assertTrue("__frozen" not in dir(t)) + # self.assertTrue("__frozen" not in dir(t)) self.assertIs(getattr(t, "__frozen"), True) + def f(): t.b = "test" + self.assertRaises(AttributeError, f) self.assertFalse(hasattr(t, "b")) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 7e09b2e13a3c1..8a9827b9d5533 100755 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -12,18 +12,22 @@ import pandas.compat as compat import pandas.core.common as com import pandas.util.testing as tm -from pandas import (Categorical, Index, Series, DataFrame, - PeriodIndex, Timestamp, CategoricalIndex) +from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex, + Timestamp, CategoricalIndex) from pandas.compat import range, lrange, u, PY3 from pandas.core.config import option_context +# GH 12066 +# flake8: noqa + class TestCategorical(tm.TestCase): _multiprocess_can_split_ = True def setUp(self): self.factor = Categorical.from_array(['a', 'b', 'b', 'a', - 'a', 'c', 'c', 'c'], ordered=True) + 'a', 'c', 'c', 'c'], + ordered=True) def test_getitem(self): self.assertEqual(self.factor[0], 'a') @@ -56,7 +60,7 @@ def test_setitem(self): # boolean c = self.factor.copy() - indexer = np.zeros(len(c),dtype='bool') + indexer = np.zeros(len(c), dtype='bool') indexer[0] = True indexer[-1] = True c[indexer] = 'c' @@ -70,7 +74,8 @@ def test_setitem_listlike(self): # GH 9469 # properly coerce the input indexers np.random.seed(1) - c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8)).add_categories([-1000]) + c = Categorical(np.random.randint(0, 5, size=150000).astype( + np.int8)).add_categories([-1000]) indexer = np.array([100000]).astype(np.int64) c[indexer] = -1000 @@ -87,11 +92,15 @@ def test_constructor_unsortable(self): self.assertFalse(factor.ordered) if compat.PY3: - self.assertRaises(TypeError, lambda : Categorical.from_array(arr, ordered=True)) + self.assertRaises( + TypeError, lambda: Categorical.from_array(arr, ordered=True)) else: - # this however will raise as cannot be sorted (on PY3 or older numpies) + # this however will raise as cannot be sorted (on PY3 or older + # numpies) if LooseVersion(np.__version__) < "1.10": - self.assertRaises(TypeError, lambda : Categorical.from_array(arr, ordered=True)) + self.assertRaises( + TypeError, + lambda: Categorical.from_array(arr, ordered=True)) else: Categorical.from_array(arr, ordered=True) @@ -99,9 +108,9 @@ def test_is_equal_dtype(self): # test dtype comparisons between cats - c1 = Categorical(list('aabca'),categories=list('abc'),ordered=False) - c2 = Categorical(list('aabca'),categories=list('cab'),ordered=False) - c3 = Categorical(list('aabca'),categories=list('cab'),ordered=True) + c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False) + c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False) + c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True) self.assertTrue(c1.is_dtype_equal(c1)) self.assertTrue(c2.is_dtype_equal(c2)) self.assertTrue(c3.is_dtype_equal(c3)) @@ -110,29 +119,35 @@ def test_is_equal_dtype(self): self.assertFalse(c1.is_dtype_equal(Index(list('aabca')))) self.assertFalse(c1.is_dtype_equal(c1.astype(object))) self.assertTrue(c1.is_dtype_equal(CategoricalIndex(c1))) - self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1,categories=list('cab')))) - self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1,ordered=True))) + self.assertFalse(c1.is_dtype_equal( + CategoricalIndex(c1, categories=list('cab')))) + self.assertFalse(c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))) def test_constructor(self): exp_arr = np.array(["a", "b", "c", "a", "b", "c"]) c1 = Categorical(exp_arr) self.assert_numpy_array_equal(c1.__array__(), exp_arr) - c2 = Categorical(exp_arr, categories=["a","b","c"]) + c2 = Categorical(exp_arr, categories=["a", "b", "c"]) self.assert_numpy_array_equal(c2.__array__(), exp_arr) - c2 = Categorical(exp_arr, categories=["c","b","a"]) + c2 = Categorical(exp_arr, categories=["c", "b", "a"]) self.assert_numpy_array_equal(c2.__array__(), exp_arr) # categories must be unique def f(): - Categorical([1,2], [1,2,2]) + Categorical([1, 2], [1, 2, 2]) + self.assertRaises(ValueError, f) + def f(): - Categorical(["a","b"], ["a","b","b"]) + Categorical(["a", "b"], ["a", "b", "b"]) + self.assertRaises(ValueError, f) + def f(): with tm.assert_produces_warning(FutureWarning): - Categorical([1,2], [1,2,np.nan, np.nan]) + Categorical([1, 2], [1, 2, np.nan, np.nan]) + self.assertRaises(ValueError, f) # The default should be unordered @@ -144,25 +159,25 @@ def f(): c2 = Categorical(c1) self.assertTrue(c1.equals(c2)) - c1 = Categorical(["a", "b", "c", "a"], categories=["a","b","c","d"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(c1) self.assertTrue(c1.equals(c2)) - c1 = Categorical(["a", "b", "c", "a"], categories=["a","c","b"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1) self.assertTrue(c1.equals(c2)) - c1 = Categorical(["a", "b", "c", "a"], categories=["a","c","b"]) - c2 = Categorical(c1, categories=["a","b","c"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) + c2 = Categorical(c1, categories=["a", "b", "c"]) self.assert_numpy_array_equal(c1.__array__(), c2.__array__()) - self.assert_numpy_array_equal(c2.categories, np.array(["a","b","c"])) + self.assert_numpy_array_equal(c2.categories, np.array(["a", "b", "c"])) # Series of dtype category - c1 = Categorical(["a", "b", "c", "a"], categories=["a","b","c","d"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(c1)) self.assertTrue(c1.equals(c2)) - c1 = Categorical(["a", "b", "c", "a"], categories=["a","c","b"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(Series(c1)) self.assertTrue(c1.equals(c2)) @@ -171,43 +186,50 @@ def f(): c2 = Categorical(Series(["a", "b", "c", "a"])) self.assertTrue(c1.equals(c2)) - c1 = Categorical(["a", "b", "c", "a"], categories=["a","b","c","d"]) - c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a","b","c","d"]) + c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + c2 = Categorical( + Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) self.assertTrue(c1.equals(c2)) # This should result in integer categories, not float! - cat = pd.Categorical([1,2,3,np.nan], categories=[1,2,3]) + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) self.assertTrue(com.is_integer_dtype(cat.categories)) # https://github.com/pydata/pandas/issues/3678 - cat = pd.Categorical([np.nan,1, 2, 3]) + cat = pd.Categorical([np.nan, 1, 2, 3]) self.assertTrue(com.is_integer_dtype(cat.categories)) # this should result in floats - cat = pd.Categorical([np.nan, 1, 2., 3 ]) + cat = pd.Categorical([np.nan, 1, 2., 3]) self.assertTrue(com.is_float_dtype(cat.categories)) - cat = pd.Categorical([np.nan, 1., 2., 3. ]) + cat = pd.Categorical([np.nan, 1., 2., 3.]) self.assertTrue(com.is_float_dtype(cat.categories)) # Deprecating NaNs in categoires (GH #10748) - # preserve int as far as possible by converting to object if NaN is in categories + # preserve int as far as possible by converting to object if NaN is in + # categories with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1, 2, 3], categories=[np.nan, 1, 2, 3]) + cat = pd.Categorical([np.nan, 1, 2, 3], + categories=[np.nan, 1, 2, 3]) self.assertTrue(com.is_object_dtype(cat.categories)) - # This doesn't work -> this would probably need some kind of "remember the original type" - # feature to try to cast the array interface result to... - #vals = np.asarray(cat[cat.notnull()]) - #self.assertTrue(com.is_integer_dtype(vals)) + + # This doesn't work -> this would probably need some kind of "remember + # the original type" feature to try to cast the array interface result + # to... + + # vals = np.asarray(cat[cat.notnull()]) + # self.assertTrue(com.is_integer_dtype(vals)) with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan,"a", "b", "c"], categories=[np.nan,"a", "b", "c"]) + cat = pd.Categorical([np.nan, "a", "b", "c"], + categories=[np.nan, "a", "b", "c"]) self.assertTrue(com.is_object_dtype(cat.categories)) # but don't do it for floats with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical([np.nan, 1., 2., 3.], categories=[np.nan, 1., 2., 3.]) + cat = pd.Categorical([np.nan, 1., 2., 3.], + categories=[np.nan, 1., 2., 3.]) self.assertTrue(com.is_float_dtype(cat.categories)) - # corner cases cat = pd.Categorical([1]) self.assertTrue(len(cat.categories) == 1) @@ -239,35 +261,39 @@ def f(): # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN with tm.assert_produces_warning(RuntimeWarning): - c_old = Categorical([0,1,2,0,1,2], categories=["a","b","c"]) + c_old = Categorical([0, 1, 2, 0, 1, 2], + categories=["a", "b", "c"]) # noqa with tm.assert_produces_warning(RuntimeWarning): - c_old = Categorical([0,1,2,0,1,2], categories=[3,4,5]) + c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa + categories=[3, 4, 5]) - # the next one are from the old docs, but unfortunately these don't trigger :-( + # the next one are from the old docs, but unfortunately these don't + # trigger :-( with tm.assert_produces_warning(None): - c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) - cat = Categorical([1,2], categories=[1,2,3]) + c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa + cat = Categorical([1, 2], categories=[1, 2, 3]) # this is a legitimate constructor with tm.assert_produces_warning(None): - c = Categorical(np.array([],dtype='int64'),categories=[3,2,1],ordered=True) + c = Categorical(np.array([], dtype='int64'), # noqa + categories=[3, 2, 1], ordered=True) def test_constructor_with_index(self): - - ci = CategoricalIndex(list('aabbca'),categories=list('cab')) + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) self.assertTrue(ci.values.equals(Categorical(ci))) - ci = CategoricalIndex(list('aabbca'),categories=list('cab')) - self.assertTrue(ci.values.equals(Categorical(ci.astype(object),categories=ci.categories))) + ci = CategoricalIndex(list('aabbca'), categories=list('cab')) + self.assertTrue(ci.values.equals(Categorical( + ci.astype(object), categories=ci.categories))) def test_constructor_with_generator(self): - # This was raising an Error in isnull(single_val).any() because isnull returned a scalar - # for a generator - from pandas.compat import range as xrange + # This was raising an Error in isnull(single_val).any() because isnull + # returned a scalar for a generator + xrange = range - exp = Categorical([0,1,2]) - cat = Categorical((x for x in [0,1,2])) + exp = Categorical([0, 1, 2]) + cat = Categorical((x for x in [0, 1, 2])) self.assertTrue(cat.equals(exp)) cat = Categorical(xrange(3)) self.assertTrue(cat.equals(exp)) @@ -277,42 +303,44 @@ def test_constructor_with_generator(self): MultiIndex.from_product([range(5), ['a', 'b', 'c']]) # check that categories accept generators and sequences - cat = pd.Categorical([0,1,2], categories=(x for x in [0,1,2])) + cat = pd.Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) self.assertTrue(cat.equals(exp)) - cat = pd.Categorical([0,1,2], categories=xrange(3)) + cat = pd.Categorical([0, 1, 2], categories=xrange(3)) self.assertTrue(cat.equals(exp)) - def test_from_codes(self): # too few categories def f(): - Categorical.from_codes([1,2], [1,2]) + Categorical.from_codes([1, 2], [1, 2]) + self.assertRaises(ValueError, f) # no int codes def f(): - Categorical.from_codes(["a"], [1,2]) + Categorical.from_codes(["a"], [1, 2]) + self.assertRaises(ValueError, f) # no unique categories def f(): - Categorical.from_codes([0,1,2], ["a","a","b"]) + Categorical.from_codes([0, 1, 2], ["a", "a", "b"]) + self.assertRaises(ValueError, f) # too negative def f(): - Categorical.from_codes([-2,1,2], ["a","b","c"]) - self.assertRaises(ValueError, f) + Categorical.from_codes([-2, 1, 2], ["a", "b", "c"]) + self.assertRaises(ValueError, f) - exp = Categorical(["a","b","c"], ordered=False) - res = Categorical.from_codes([0,1,2], ["a","b","c"]) + exp = Categorical(["a", "b", "c"], ordered=False) + res = Categorical.from_codes([0, 1, 2], ["a", "b", "c"]) self.assertTrue(exp.equals(res)) # Not available in earlier numpy versions if hasattr(np.random, "choice"): - codes = np.random.choice([0,1], 5, p=[0.9,0.1]) + codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) pd.Categorical.from_codes(codes, categories=["train", "test"]) def test_comparisons(self): @@ -353,10 +381,13 @@ def test_comparisons(self): self.assert_numpy_array_equal(result, expected) # comparisons with categoricals - cat_rev = pd.Categorical(["a","b","c"], categories=["c","b","a"], ordered=True) - cat_rev_base = pd.Categorical(["b","b","b"], categories=["c","b","a"], ordered=True) - cat = pd.Categorical(["a","b","c"], ordered=True) - cat_base = pd.Categorical(["b","b","b"], categories=cat.categories, ordered=True) + cat_rev = pd.Categorical(["a", "b", "c"], categories=["c", "b", "a"], + ordered=True) + cat_rev_base = pd.Categorical( + ["b", "b", "b"], categories=["c", "b", "a"], ordered=True) + cat = pd.Categorical(["a", "b", "c"], ordered=True) + cat_base = pd.Categorical(["b", "b", "b"], categories=cat.categories, + ordered=True) # comparisons need to take categories ordering into account res_rev = cat_rev > cat_rev_base @@ -374,30 +405,36 @@ def test_comparisons(self): # Only categories with same categories can be compared def f(): cat > cat_rev + self.assertRaises(TypeError, f) - cat_rev_base2 = pd.Categorical(["b","b","b"], categories=["c","b","a","d"]) + cat_rev_base2 = pd.Categorical( + ["b", "b", "b"], categories=["c", "b", "a", "d"]) + def f(): cat_rev > cat_rev_base2 + self.assertRaises(TypeError, f) # Only categories with same ordering information can be compared cat_unorderd = cat.set_ordered(False) self.assertFalse((cat > cat).any()) + def f(): cat > cat_unorderd + self.assertRaises(TypeError, f) # comparison (in both directions) with Series will raise - s = Series(["b","b","b"]) + s = Series(["b", "b", "b"]) self.assertRaises(TypeError, lambda: cat > s) self.assertRaises(TypeError, lambda: cat_rev > s) self.assertRaises(TypeError, lambda: s < cat) self.assertRaises(TypeError, lambda: s < cat_rev) - # comparison with numpy.array will raise in both direction, but only on newer - # numpy versions - a = np.array(["b","b","b"]) + # comparison with numpy.array will raise in both direction, but only on + # newer numpy versions + a = np.array(["b", "b", "b"]) self.assertRaises(TypeError, lambda: cat > a) self.assertRaises(TypeError, lambda: cat_rev > a) @@ -407,13 +444,14 @@ def f(): self.assertRaises(TypeError, lambda: a < cat) self.assertRaises(TypeError, lambda: a < cat_rev) - # Make sure that unequal comparison take the categories order in account - cat_rev = pd.Categorical(list("abc"), categories=list("cba"), ordered=True) + # Make sure that unequal comparison take the categories order in + # account + cat_rev = pd.Categorical( + list("abc"), categories=list("cba"), ordered=True) exp = np.array([True, False, False]) res = cat_rev > "b" self.assert_numpy_array_equal(res, exp) - def test_na_flags_int_categories(self): # #1457 @@ -435,55 +473,63 @@ def test_describe(self): # string type desc = self.factor.describe() expected = DataFrame({'counts': [3, 2, 3], - 'freqs': [3/8., 2/8., 3/8.]}, - index=pd.CategoricalIndex(['a', 'b', 'c'], name='categories')) + 'freqs': [3 / 8., 2 / 8., 3 / 8.]}, + index=pd.CategoricalIndex(['a', 'b', 'c'], + name='categories')) tm.assert_frame_equal(desc, expected) # check unused categories cat = self.factor.copy() - cat.set_categories(["a","b","c","d"], inplace=True) + cat.set_categories(["a", "b", "c", "d"], inplace=True) desc = cat.describe() expected = DataFrame({'counts': [3, 2, 3, 0], - 'freqs': [3/8., 2/8., 3/8., 0]}, - index=pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='categories')) + 'freqs': [3 / 8., 2 / 8., 3 / 8., 0]}, + index=pd.CategoricalIndex(['a', 'b', 'c', 'd'], + name='categories')) tm.assert_frame_equal(desc, expected) # check an integer one - desc = Categorical([1,2,3,1,2,3,3,2,1,1,1]).describe() + desc = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]).describe() expected = DataFrame({'counts': [5, 3, 3], - 'freqs': [5/11., 3/11., 3/11.]}, - index=pd.CategoricalIndex([1, 2, 3], name='categories')) + 'freqs': [5 / 11., 3 / 11., 3 / 11.]}, + index=pd.CategoricalIndex([1, 2, 3], + name='categories')) tm.assert_frame_equal(desc, expected) # https://github.com/pydata/pandas/issues/3678 # describe should work with NaN - cat = pd.Categorical([np.nan,1, 2, 2]) + cat = pd.Categorical([np.nan, 1, 2, 2]) desc = cat.describe() expected = DataFrame({'counts': [1, 2, 1], - 'freqs': [1/4., 2/4., 1/4.]}, - index=pd.CategoricalIndex([1, 2, np.nan], categories=[1, 2], + 'freqs': [1 / 4., 2 / 4., 1 / 4.]}, + index=pd.CategoricalIndex([1, 2, np.nan], + categories=[1, 2], name='categories')) tm.assert_frame_equal(desc, expected) # NA as a category with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c", np.nan], categories=["b", "a", "c", np.nan]) + cat = pd.Categorical(["a", "c", "c", np.nan], + categories=["b", "a", "c", np.nan]) result = cat.describe() - expected = DataFrame([[0,0],[1,0.25],[2,0.5],[1,0.25]], - columns=['counts','freqs'], - index=pd.CategoricalIndex(['b', 'a', 'c', np.nan], name='categories')) - tm.assert_frame_equal(result,expected) + expected = DataFrame([[0, 0], [1, 0.25], [2, 0.5], [1, 0.25]], + columns=['counts', 'freqs'], + index=pd.CategoricalIndex(['b', 'a', 'c', np.nan], + name='categories')) + tm.assert_frame_equal(result, expected) # NA as an unused category with tm.assert_produces_warning(FutureWarning): - cat = pd.Categorical(["a", "c", "c"], categories=["b", "a", "c", np.nan]) + cat = pd.Categorical(["a", "c", "c"], + categories=["b", "a", "c", np.nan]) result = cat.describe() - exp_idx = pd.CategoricalIndex(['b', 'a', 'c', np.nan], name='categories') - expected = DataFrame([[0, 0], [1, 1/3.], [2, 2/3.], [0, 0]], + exp_idx = pd.CategoricalIndex( + ['b', 'a', 'c', np.nan], name='categories') + expected = DataFrame([[0, 0], [1, 1 / 3.], [2, 2 / 3.], [0, 0]], columns=['counts', 'freqs'], index=exp_idx) - tm.assert_frame_equal(result,expected) + tm.assert_frame_equal(result, expected) def test_print(self): expected = ["[a, b, b, a, a, c, c, c]", @@ -493,9 +539,9 @@ def test_print(self): self.assertEqual(actual, expected) def test_big_print(self): - factor = Categorical([0,1,2,0,1,2]*100, ['a', 'b', 'c'], name='cat', fastpath=True) - expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", - "Length: 600", + factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ['a', 'b', 'c'], + name='cat', fastpath=True) + expected = ["[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", "Categories (3, object): [a, b, c]"] expected = "\n".join(expected) @@ -504,14 +550,14 @@ def test_big_print(self): self.assertEqual(actual, expected) def test_empty_print(self): - factor = Categorical([], ["a","b","c"]) + factor = Categorical([], ["a", "b", "c"]) expected = ("[], Categories (3, object): [a, b, c]") # hack because array_repr changed in numpy > 1.6.x actual = repr(factor) self.assertEqual(actual, expected) self.assertEqual(expected, actual) - factor = Categorical([], ["a","b","c"], ordered=True) + factor = Categorical([], ["a", "b", "c"], ordered=True) expected = ("[], Categories (3, object): [a < b < c]") actual = repr(factor) self.assertEqual(expected, actual) @@ -522,9 +568,9 @@ def test_empty_print(self): def test_print_none_width(self): # GH10087 - a = pd.Series(pd.Categorical([1,2,3,4])) + a = pd.Series(pd.Categorical([1, 2, 3, 4])) exp = u("0 1\n1 2\n2 3\n3 4\n" + - "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") with option_context("display.width", None): self.assertEqual(exp, repr(a)) @@ -533,27 +579,35 @@ def test_unicode_print(self): if PY3: _rep = repr else: - _rep = unicode + _rep = unicode # noqa c = pd.Categorical(['aaaaa', 'bb', 'cccc'] * 20) - expected = u"""[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] + expected = u"""\ +[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] Length: 60 Categories (3, object): [aaaaa, bb, cccc]""" + self.assertEqual(_rep(c), expected) - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) - expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] + c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] + * 20) + expected = u"""\ +[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa + self.assertEqual(_rep(c), expected) - # unicode option should not affect to Categorical, as it doesn't care the repr width + # unicode option should not affect to Categorical, as it doesn't care + # the repr width with option_context('display.unicode.east_asian_width', True): - c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) + c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] + * 20) expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa + self.assertEqual(_rep(c), expected) def test_periodindex(self): @@ -562,7 +616,7 @@ def test_periodindex(self): cat1 = Categorical.from_array(idx1) str(cat1) - exp_arr = np.array([0, 0, 1, 1, 2, 2],dtype='int64') + exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype='int64') exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') self.assert_numpy_array_equal(cat1._codes, exp_arr) self.assertTrue(cat1.categories.equals(exp_idx)) @@ -571,7 +625,7 @@ def test_periodindex(self): '2014-03', '2014-01'], freq='M') cat2 = Categorical.from_array(idx2, ordered=True) str(cat2) - exp_arr = np.array([2, 2, 1, 0, 2, 0],dtype='int64') + exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype='int64') exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M') self.assert_numpy_array_equal(cat2._codes, exp_arr) self.assertTrue(cat2.categories.equals(exp_idx2)) @@ -579,57 +633,63 @@ def test_periodindex(self): idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09', '2013-08', '2013-07', '2013-05'], freq='M') cat3 = Categorical.from_array(idx3, ordered=True) - exp_arr = np.array([6, 5, 4, 3, 2, 1, 0],dtype='int64') + exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype='int64') exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09', '2013-10', '2013-11', '2013-12'], freq='M') self.assert_numpy_array_equal(cat3._codes, exp_arr) self.assertTrue(cat3.categories.equals(exp_idx)) def test_categories_assigments(self): - s = pd.Categorical(["a","b","c","a"]) - exp = np.array([1,2,3,1]) - s.categories = [1,2,3] + s = pd.Categorical(["a", "b", "c", "a"]) + exp = np.array([1, 2, 3, 1]) + s.categories = [1, 2, 3] self.assert_numpy_array_equal(s.__array__(), exp) - self.assert_numpy_array_equal(s.categories, np.array([1,2,3])) + self.assert_numpy_array_equal(s.categories, np.array([1, 2, 3])) + # lengthen def f(): - s.categories = [1,2,3,4] + s.categories = [1, 2, 3, 4] + self.assertRaises(ValueError, f) + # shorten def f(): - s.categories = [1,2] + s.categories = [1, 2] + self.assertRaises(ValueError, f) def test_construction_with_ordered(self): # GH 9347, 9190 - cat = Categorical([0,1,2]) + cat = Categorical([0, 1, 2]) self.assertFalse(cat.ordered) - cat = Categorical([0,1,2],ordered=False) + cat = Categorical([0, 1, 2], ordered=False) self.assertFalse(cat.ordered) - cat = Categorical([0,1,2],ordered=True) + cat = Categorical([0, 1, 2], ordered=True) self.assertTrue(cat.ordered) def test_ordered_api(self): # GH 9347 - cat1 = pd.Categorical(["a","c","b"], ordered=False) - self.assertTrue(cat1.categories.equals(Index(['a','b','c']))) + cat1 = pd.Categorical(["a", "c", "b"], ordered=False) + self.assertTrue(cat1.categories.equals(Index(['a', 'b', 'c']))) self.assertFalse(cat1.ordered) - cat2 = pd.Categorical(["a","c","b"], categories=['b','c','a'], ordered=False) - self.assertTrue(cat2.categories.equals(Index(['b','c','a']))) + cat2 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], + ordered=False) + self.assertTrue(cat2.categories.equals(Index(['b', 'c', 'a']))) self.assertFalse(cat2.ordered) - cat3 = pd.Categorical(["a","c","b"], ordered=True) - self.assertTrue(cat3.categories.equals(Index(['a','b','c']))) + cat3 = pd.Categorical(["a", "c", "b"], ordered=True) + self.assertTrue(cat3.categories.equals(Index(['a', 'b', 'c']))) self.assertTrue(cat3.ordered) - cat4 = pd.Categorical(["a","c","b"], categories=['b','c','a'], ordered=True) - self.assertTrue(cat4.categories.equals(Index(['b','c','a']))) + cat4 = pd.Categorical(["a", "c", "b"], categories=['b', 'c', 'a'], + ordered=True) + self.assertTrue(cat4.categories.equals(Index(['b', 'c', 'a']))) self.assertTrue(cat4.ordered) def test_set_ordered(self): - cat = Categorical(["a","b","c","a"], ordered=True) + cat = Categorical(["a", "b", "c", "a"], ordered=True) cat2 = cat.as_unordered() self.assertFalse(cat2.ordered) cat2 = cat.as_ordered() @@ -655,123 +715,142 @@ def test_set_ordered(self): self.assertTrue(cat.ordered) def test_set_categories(self): - cat = Categorical(["a","b","c","a"], ordered=True) - exp_categories = np.array(["c","b","a"]) - exp_values = np.array(["a","b","c","a"]) + cat = Categorical(["a", "b", "c", "a"], ordered=True) + exp_categories = np.array(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"]) - res = cat.set_categories(["c","b","a"], inplace=True) + res = cat.set_categories(["c", "b", "a"], inplace=True) self.assert_numpy_array_equal(cat.categories, exp_categories) self.assert_numpy_array_equal(cat.__array__(), exp_values) self.assertIsNone(res) - res = cat.set_categories(["a","b","c"]) + res = cat.set_categories(["a", "b", "c"]) # cat must be the same as before self.assert_numpy_array_equal(cat.categories, exp_categories) self.assert_numpy_array_equal(cat.__array__(), exp_values) # only res is changed - exp_categories_back = np.array(["a","b","c"]) + exp_categories_back = np.array(["a", "b", "c"]) self.assert_numpy_array_equal(res.categories, exp_categories_back) self.assert_numpy_array_equal(res.__array__(), exp_values) - # not all "old" included in "new" -> all not included ones are now np.nan - cat = Categorical(["a","b","c","a"], ordered=True) + # not all "old" included in "new" -> all not included ones are now + # np.nan + cat = Categorical(["a", "b", "c", "a"], ordered=True) res = cat.set_categories(["a"]) - self.assert_numpy_array_equal(res.codes, np.array([0,-1,-1,0])) + self.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0])) # still not all "old" in "new" - res = cat.set_categories(["a","b","d"]) - self.assert_numpy_array_equal(res.codes, np.array([0,1,-1,0])) - self.assert_numpy_array_equal(res.categories, np.array(["a","b","d"])) + res = cat.set_categories(["a", "b", "d"]) + self.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0])) + self.assert_numpy_array_equal(res.categories, + np.array(["a", "b", "d"])) # all "old" included in "new" - cat = cat.set_categories(["a","b","c","d"]) - exp_categories = np.array(["a","b","c","d"]) + cat = cat.set_categories(["a", "b", "c", "d"]) + exp_categories = np.array(["a", "b", "c", "d"]) self.assert_numpy_array_equal(cat.categories, exp_categories) # internals... - c = Categorical([1,2,3,4,1], categories=[1,2,3,4], ordered=True) - self.assert_numpy_array_equal(c._codes, np.array([0,1,2,3,0])) - self.assert_numpy_array_equal(c.categories , np.array([1,2,3,4] )) - self.assert_numpy_array_equal(c.get_values(), np.array([1,2,3,4,1] )) - c = c.set_categories([4,3,2,1]) # all "pointers" to '4' must be changed from 3 to 0,... - self.assert_numpy_array_equal(c._codes, np.array([3,2,1,0,3])) # positions are changed - self.assert_numpy_array_equal(c.categories, np.array([4,3,2,1])) # categories are now in new order - self.assert_numpy_array_equal(c.get_values(), np.array([1,2,3,4,1])) # output is the same + c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) + self.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0])) + self.assert_numpy_array_equal(c.categories, np.array([1, 2, 3, 4])) + self.assert_numpy_array_equal(c.get_values(), + np.array([1, 2, 3, 4, 1])) + c = c.set_categories( + [4, 3, 2, 1 + ]) # all "pointers" to '4' must be changed from 3 to 0,... + self.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3]) + ) # positions are changed + self.assert_numpy_array_equal(c.categories, np.array([4, 3, 2, 1]) + ) # categories are now in new order + self.assert_numpy_array_equal(c.get_values(), np.array([1, 2, 3, 4, 1]) + ) # output is the same self.assertTrue(c.min(), 4) self.assertTrue(c.max(), 1) # set_categories should set the ordering if specified - c2 = c.set_categories([4,3,2,1],ordered=False) + c2 = c.set_categories([4, 3, 2, 1], ordered=False) self.assertFalse(c2.ordered) self.assert_numpy_array_equal(c.get_values(), c2.get_values()) # set_categories should pass thru the ordering - c2 = c.set_ordered(False).set_categories([4,3,2,1]) + c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) self.assertFalse(c2.ordered) self.assert_numpy_array_equal(c.get_values(), c2.get_values()) def test_rename_categories(self): - cat = pd.Categorical(["a","b","c","a"]) + cat = pd.Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed - res = cat.rename_categories([1,2,3]) - self.assert_numpy_array_equal(res.__array__(), np.array([1,2,3,1])) - self.assert_numpy_array_equal(res.categories, np.array([1,2,3])) - self.assert_numpy_array_equal(cat.__array__(), np.array(["a","b","c","a"])) - self.assert_numpy_array_equal(cat.categories, np.array(["a","b","c"])) - res = cat.rename_categories([1,2,3], inplace=True) + res = cat.rename_categories([1, 2, 3]) + self.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1])) + self.assert_numpy_array_equal(res.categories, np.array([1, 2, 3])) + self.assert_numpy_array_equal(cat.__array__(), + np.array(["a", "b", "c", "a"])) + self.assert_numpy_array_equal(cat.categories, + np.array(["a", "b", "c"])) + res = cat.rename_categories([1, 2, 3], inplace=True) # and now inplace self.assertIsNone(res) - self.assert_numpy_array_equal(cat.__array__(), np.array([1,2,3,1])) - self.assert_numpy_array_equal(cat.categories, np.array([1,2,3])) + self.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1])) + self.assert_numpy_array_equal(cat.categories, np.array([1, 2, 3])) # lengthen def f(): - cat.rename_categories([1,2,3,4]) + cat.rename_categories([1, 2, 3, 4]) + self.assertRaises(ValueError, f) + # shorten def f(): - cat.rename_categories([1,2]) + cat.rename_categories([1, 2]) + self.assertRaises(ValueError, f) def test_reorder_categories(self): - cat = Categorical(["a","b","c","a"], ordered=True) + cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() - new = Categorical(["a","b","c","a"], categories=["c","b","a"], ordered=True) + new = Categorical(["a", "b", "c", "a"], categories=["c", "b", "a"], + ordered=True) # first inplace == False - res = cat.reorder_categories(["c","b","a"]) + res = cat.reorder_categories(["c", "b", "a"]) # cat must be the same as before self.assert_categorical_equal(cat, old) # only res is changed self.assert_categorical_equal(res, new) # inplace == True - res = cat.reorder_categories(["c","b","a"], inplace=True) + res = cat.reorder_categories(["c", "b", "a"], inplace=True) self.assertIsNone(res) self.assert_categorical_equal(cat, new) # not all "old" included in "new" - cat = Categorical(["a","b","c","a"], ordered=True) + cat = Categorical(["a", "b", "c", "a"], ordered=True) + def f(): cat.reorder_categories(["a"]) + self.assertRaises(ValueError, f) # still not all "old" in "new" def f(): - cat.reorder_categories(["a","b","d"]) + cat.reorder_categories(["a", "b", "d"]) + self.assertRaises(ValueError, f) # all "old" included in "new", but too long def f(): - cat.reorder_categories(["a","b","c","d"]) + cat.reorder_categories(["a", "b", "c", "d"]) + self.assertRaises(ValueError, f) def test_add_categories(self): - cat = Categorical(["a","b","c","a"], ordered=True) + cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() - new = Categorical(["a","b","c","a"], categories=["a","b","c","d"], ordered=True) + new = Categorical(["a", "b", "c", "a"], + categories=["a", "b", "c", "d"], ordered=True) # first inplace == False res = cat.add_categories("d") @@ -790,11 +869,13 @@ def test_add_categories(self): # new is in old categories def f(): cat.add_categories(["d"]) + self.assertRaises(ValueError, f) # GH 9927 cat = Categorical(list("abc"), ordered=True) - expected = Categorical(list("abc"), categories=list("abcde"), ordered=True) + expected = Categorical( + list("abc"), categories=list("abcde"), ordered=True) # test with Series, np.array, index, list res = cat.add_categories(Series(["d", "e"])) self.assert_categorical_equal(res, expected) @@ -806,9 +887,10 @@ def f(): self.assert_categorical_equal(res, expected) def test_remove_categories(self): - cat = Categorical(["a","b","c","a"], ordered=True) + cat = Categorical(["a", "b", "c", "a"], ordered=True) old = cat.copy() - new = Categorical(["a","b",np.nan,"a"], categories=["a","b"], ordered=True) + new = Categorical(["a", "b", np.nan, "a"], categories=["a", "b"], + ordered=True) # first inplace == False res = cat.remove_categories("c") @@ -827,12 +909,14 @@ def test_remove_categories(self): # removal is not in categories def f(): cat.remove_categories(["c"]) + self.assertRaises(ValueError, f) def test_remove_unused_categories(self): - c = Categorical(["a","b","c","d","a"], categories=["a","b","c","d","e"]) - exp_categories_all = np.array(["a","b","c","d","e"]) - exp_categories_dropped = np.array(["a","b","c","d"]) + c = Categorical(["a", "b", "c", "d", "a"], + categories=["a", "b", "c", "d", "e"]) + exp_categories_all = np.array(["a", "b", "c", "d", "e"]) + exp_categories_dropped = np.array(["a", "b", "c", "d"]) self.assert_numpy_array_equal(c.categories, exp_categories_all) @@ -845,16 +929,18 @@ def test_remove_unused_categories(self): self.assertIsNone(res) # with NaN values (GH11599) - c = Categorical(["a","b","c",np.nan], categories=["a","b","c","d","e"]) + c = Categorical(["a", "b", "c", np.nan], + categories=["a", "b", "c", "d", "e"]) res = c.remove_unused_categories() - self.assert_numpy_array_equal(res.categories, np.array(["a","b","c"])) + self.assert_numpy_array_equal(res.categories, + np.array(["a", "b", "c"])) self.assert_numpy_array_equal(c.categories, exp_categories_all) val = ['F', np.nan, 'D', 'B', 'D', 'F', np.nan] cat = pd.Categorical(values=val, categories=list('ABCDEFG')) out = cat.remove_unused_categories() self.assert_numpy_array_equal(out.categories, ['B', 'D', 'F']) - self.assert_numpy_array_equal(out.codes, [ 2, -1, 1, 0, 1, 2, -1]) + self.assert_numpy_array_equal(out.codes, [2, -1, 1, 0, 1, 2, -1]) self.assertEqual(out.get_values().tolist(), val) alpha = list('abcdefghijklmnopqrstuvwxyz') @@ -868,51 +954,62 @@ def test_remove_unused_categories(self): def test_nan_handling(self): # Nans are represented as -1 in codes - c = Categorical(["a","b",np.nan,"a"]) - self.assert_numpy_array_equal(c.categories , np.array(["a","b"])) - self.assert_numpy_array_equal(c._codes , np.array([0,1,-1,0])) + c = Categorical(["a", "b", np.nan, "a"]) + self.assert_numpy_array_equal(c.categories, np.array(["a", "b"])) + self.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0])) c[1] = np.nan - self.assert_numpy_array_equal(c.categories , np.array(["a","b"])) - self.assert_numpy_array_equal(c._codes , np.array([0,-1,-1,0])) + self.assert_numpy_array_equal(c.categories, np.array(["a", "b"])) + self.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0])) - # If categories have nan included, the code should point to that instead + # If categories have nan included, the code should point to that + # instead with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a","b",np.nan,"a"], categories=["a","b",np.nan]) - self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan], - dtype=np.object_)) - self.assert_numpy_array_equal(c._codes, np.array([0,1,2,0])) + c = Categorical(["a", "b", np.nan, "a"], + categories=["a", "b", np.nan]) + self.assert_numpy_array_equal(c.categories, + np.array(["a", "b", np.nan], + dtype=np.object_)) + self.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 0])) c[1] = np.nan - self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan], - dtype=np.object_)) - self.assert_numpy_array_equal(c._codes, np.array([0,2,2,0])) + self.assert_numpy_array_equal(c.categories, + np.array(["a", "b", np.nan], + dtype=np.object_)) + self.assert_numpy_array_equal(c._codes, np.array([0, 2, 2, 0])) # Changing categories should also make the replaced category np.nan - c = Categorical(["a","b","c","a"]) + c = Categorical(["a", "b", "c", "a"]) with tm.assert_produces_warning(FutureWarning): - c.categories = ["a","b",np.nan] - self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan], - dtype=np.object_)) - self.assert_numpy_array_equal(c._codes, np.array([0,1,2,0])) - - # Adding nan to categories should make assigned nan point to the category! - c = Categorical(["a","b",np.nan,"a"]) - self.assert_numpy_array_equal(c.categories , np.array(["a","b"])) - self.assert_numpy_array_equal(c._codes , np.array([0,1,-1,0])) + c.categories = ["a", "b", np.nan] # noqa + + self.assert_numpy_array_equal(c.categories, + np.array(["a", "b", np.nan], + dtype=np.object_)) + self.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 0])) + + # Adding nan to categories should make assigned nan point to the + # category! + c = Categorical(["a", "b", np.nan, "a"]) + self.assert_numpy_array_equal(c.categories, np.array(["a", "b"])) + self.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0])) with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a","b",np.nan], rename=True, inplace=True) - self.assert_numpy_array_equal(c.categories, np.array(["a","b",np.nan], - dtype=np.object_)) - self.assert_numpy_array_equal(c._codes, np.array([0,1,-1,0])) + c.set_categories(["a", "b", np.nan], rename=True, inplace=True) + + self.assert_numpy_array_equal(c.categories, + np.array(["a", "b", np.nan], + dtype=np.object_)) + self.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0])) c[1] = np.nan - self.assert_numpy_array_equal(c.categories , np.array(["a","b",np.nan], - dtype=np.object_)) - self.assert_numpy_array_equal(c._codes, np.array([0,2,-1,0])) + self.assert_numpy_array_equal(c.categories, + np.array(["a", "b", np.nan], + dtype=np.object_)) + self.assert_numpy_array_equal(c._codes, np.array([0, 2, -1, 0])) # Remove null categories (GH 10156) cases = [ ([1.0, 2.0, np.nan], [1.0, 2.0]), (['a', 'b', None], ['a', 'b']), - ([pd.Timestamp('2012-05-01'), pd.NaT], [pd.Timestamp('2012-05-01')]) + ([pd.Timestamp('2012-05-01'), pd.NaT], + [pd.Timestamp('2012-05-01')]) ] null_values = [np.nan, None, pd.NaT] @@ -933,25 +1030,25 @@ def test_nan_handling(self): def f(): with tm.assert_produces_warning(FutureWarning): Categorical([], categories=nulls) - self.assertRaises(ValueError, f) + self.assertRaises(ValueError, f) def test_isnull(self): exp = np.array([False, False, True]) - c = Categorical(["a","b",np.nan]) + c = Categorical(["a", "b", np.nan]) res = c.isnull() self.assert_numpy_array_equal(res, exp) with tm.assert_produces_warning(FutureWarning): - c = Categorical(["a","b",np.nan], categories=["a","b",np.nan]) + c = Categorical(["a", "b", np.nan], categories=["a", "b", np.nan]) res = c.isnull() self.assert_numpy_array_equal(res, exp) # test both nan in categories and as -1 exp = np.array([True, False, True]) - c = Categorical(["a","b",np.nan]) + c = Categorical(["a", "b", np.nan]) with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a","b",np.nan], rename=True, inplace=True) + c.set_categories(["a", "b", np.nan], rename=True, inplace=True) c[0] = np.nan res = c.isnull() self.assert_numpy_array_equal(res, exp) @@ -959,48 +1056,53 @@ def test_isnull(self): def test_codes_immutable(self): # Codes should be read only - c = Categorical(["a","b","c","a", np.nan]) - exp = np.array([0,1,2,0,-1],dtype='int8') + c = Categorical(["a", "b", "c", "a", np.nan]) + exp = np.array([0, 1, 2, 0, -1], dtype='int8') self.assert_numpy_array_equal(c.codes, exp) # Assignments to codes should raise def f(): - c.codes = np.array([0,1,2,0,1],dtype='int8') + c.codes = np.array([0, 1, 2, 0, 1], dtype='int8') + self.assertRaises(ValueError, f) # changes in the codes array should raise # np 1.6.1 raises RuntimeError rather than ValueError - codes= c.codes + codes = c.codes + def f(): codes[4] = 1 + self.assertRaises(ValueError, f) - # But even after getting the codes, the original array should still be writeable! + # But even after getting the codes, the original array should still be + # writeable! c[4] = "a" - exp = np.array([0,1,2,0,0],dtype='int8') + exp = np.array([0, 1, 2, 0, 0], dtype='int8') self.assert_numpy_array_equal(c.codes, exp) c._codes[4] = 2 - exp = np.array([0,1,2,0, 2],dtype='int8') + exp = np.array([0, 1, 2, 0, 2], dtype='int8') self.assert_numpy_array_equal(c.codes, exp) - def test_min_max(self): # unordered cats have no min/max - cat = Categorical(["a","b","c","d"], ordered=False) - self.assertRaises(TypeError, lambda : cat.min()) - self.assertRaises(TypeError, lambda : cat.max()) - cat = Categorical(["a","b","c","d"], ordered=True) + cat = Categorical(["a", "b", "c", "d"], ordered=False) + self.assertRaises(TypeError, lambda: cat.min()) + self.assertRaises(TypeError, lambda: cat.max()) + cat = Categorical(["a", "b", "c", "d"], ordered=True) _min = cat.min() _max = cat.max() self.assertEqual(_min, "a") self.assertEqual(_max, "d") - cat = Categorical(["a","b","c","d"], categories=['d','c','b','a'], ordered=True) + cat = Categorical(["a", "b", "c", "d"], + categories=['d', 'c', 'b', 'a'], ordered=True) _min = cat.min() _max = cat.max() self.assertEqual(_min, "d") self.assertEqual(_max, "a") - cat = Categorical([np.nan,"b","c",np.nan], categories=['d','c','b','a'], ordered=True) + cat = Categorical([np.nan, "b", "c", np.nan], + categories=['d', 'c', 'b', 'a'], ordered=True) _min = cat.min() _max = cat.max() self.assertTrue(np.isnan(_min)) @@ -1011,7 +1113,8 @@ def test_min_max(self): _max = cat.max(numeric_only=True) self.assertEqual(_max, "b") - cat = Categorical([np.nan,1,2,np.nan], categories=[5,4,3,2,1], ordered=True) + cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], + ordered=True) _min = cat.min() _max = cat.max() self.assertTrue(np.isnan(_min)) @@ -1034,18 +1137,22 @@ def test_unique(self): self.assert_numpy_array_equal(res, exp) tm.assert_categorical_equal(res, Categorical(exp)) - cat = Categorical(["c", "a", "b", "a", "a"], categories=["a", "b", "c"]) + cat = Categorical(["c", "a", "b", "a", "a"], + categories=["a", "b", "c"]) exp = np.asarray(["c", "a", "b"]) res = cat.unique() self.assert_numpy_array_equal(res, exp) - tm.assert_categorical_equal(res, Categorical(exp, categories=['c', 'a', 'b'])) + tm.assert_categorical_equal(res, Categorical( + exp, categories=['c', 'a', 'b'])) # nan must be removed - cat = Categorical(["b", np.nan, "b", np.nan, "a"], categories=["a", "b", "c"]) + cat = Categorical(["b", np.nan, "b", np.nan, "a"], + categories=["a", "b", "c"]) res = cat.unique() exp = np.asarray(["b", np.nan, "a"], dtype=object) self.assert_numpy_array_equal(res, exp) - tm.assert_categorical_equal(res, Categorical(["b", np.nan, "a"], categories=["b", "a"])) + tm.assert_categorical_equal(res, Categorical( + ["b", np.nan, "a"], categories=["b", "a"])) def test_unique_ordered(self): # keep categories order when ordered=True @@ -1056,21 +1163,24 @@ def test_unique_ordered(self): self.assert_numpy_array_equal(res, exp) tm.assert_categorical_equal(res, exp_cat) - cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'], ordered=True) + cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'], + ordered=True) res = cat.unique() exp = np.asarray(['c', 'b', 'a']) exp_cat = Categorical(exp, categories=['a', 'b', 'c'], ordered=True) self.assert_numpy_array_equal(res, exp) tm.assert_categorical_equal(res, exp_cat) - cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'], ordered=True) + cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'], + ordered=True) res = cat.unique() exp = np.asarray(['b', 'a']) exp_cat = Categorical(exp, categories=['a', 'b'], ordered=True) self.assert_numpy_array_equal(res, exp) tm.assert_categorical_equal(res, exp_cat) - cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'], ordered=True) + cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'], + ordered=True) res = cat.unique() exp = np.asarray(['b', np.nan, 'a'], dtype=object) exp_cat = Categorical(exp, categories=['a', 'b'], ordered=True) @@ -1078,111 +1188,117 @@ def test_unique_ordered(self): tm.assert_categorical_equal(res, exp_cat) def test_mode(self): - s = Categorical([1,1,2,4,5,5,5], categories=[5,4,3,2,1], ordered=True) + s = Categorical([1, 1, 2, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1], + ordered=True) res = s.mode() - exp = Categorical([5], categories=[5,4,3,2,1], ordered=True) + exp = Categorical([5], categories=[5, 4, 3, 2, 1], ordered=True) self.assertTrue(res.equals(exp)) - s = Categorical([1,1,1,4,5,5,5], categories=[5,4,3,2,1], ordered=True) + s = Categorical([1, 1, 1, 4, 5, 5, 5], categories=[5, 4, 3, 2, 1], + ordered=True) res = s.mode() - exp = Categorical([5,1], categories=[5,4,3,2,1], ordered=True) + exp = Categorical([5, 1], categories=[5, 4, 3, 2, 1], ordered=True) self.assertTrue(res.equals(exp)) - s = Categorical([1,2,3,4,5], categories=[5,4,3,2,1], ordered=True) + s = Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], + ordered=True) res = s.mode() - exp = Categorical([], categories=[5,4,3,2,1], ordered=True) + exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) self.assertTrue(res.equals(exp)) # NaN should not become the mode! - s = Categorical([np.nan,np.nan,np.nan,4,5], categories=[5,4,3,2,1], ordered=True) + s = Categorical([np.nan, np.nan, np.nan, 4, 5], + categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([], categories=[5,4,3,2,1], ordered=True) + exp = Categorical([], categories=[5, 4, 3, 2, 1], ordered=True) self.assertTrue(res.equals(exp)) - s = Categorical([np.nan,np.nan,np.nan,4,5,4], categories=[5,4,3,2,1], ordered=True) + s = Categorical([np.nan, np.nan, np.nan, 4, 5, 4], + categories=[5, 4, 3, 2, 1], ordered=True) res = s.mode() - exp = Categorical([4], categories=[5,4,3,2,1], ordered=True) + exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True) self.assertTrue(res.equals(exp)) - s = Categorical([np.nan,np.nan,4,5,4], categories=[5,4,3,2,1], ordered=True) + s = Categorical([np.nan, np.nan, 4, 5, 4], categories=[5, 4, 3, 2, 1], + ordered=True) res = s.mode() - exp = Categorical([4], categories=[5,4,3,2,1], ordered=True) + exp = Categorical([4], categories=[5, 4, 3, 2, 1], ordered=True) self.assertTrue(res.equals(exp)) - def test_sort(self): # unordered cats are sortable - cat = Categorical(["a","b","b","a"], ordered=False) + cat = Categorical(["a", "b", "b", "a"], ordered=False) cat.sort_values() cat.sort() - cat = Categorical(["a","c","b","d"], ordered=True) + cat = Categorical(["a", "c", "b", "d"], ordered=True) # sort_values res = cat.sort_values() - exp = np.array(["a","b","c","d"],dtype=object) + exp = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp) - cat = Categorical(["a","c","b","d"], categories=["a","b","c","d"], ordered=True) + cat = Categorical(["a", "c", "b", "d"], + categories=["a", "b", "c", "d"], ordered=True) res = cat.sort_values() - exp = np.array(["a","b","c","d"],dtype=object) + exp = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp) res = cat.sort_values(ascending=False) - exp = np.array(["d","c","b","a"],dtype=object) + exp = np.array(["d", "c", "b", "a"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp) # sort (inplace order) cat1 = cat.copy() cat1.sort() - exp = np.array(["a","b","c","d"],dtype=object) + exp = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(cat1.__array__(), exp) def test_slicing_directly(self): - cat = Categorical(["a","b","c","d","a","b","c"]) + cat = Categorical(["a", "b", "c", "d", "a", "b", "c"]) sliced = cat[3] tm.assert_equal(sliced, "d") sliced = cat[3:5] - expected = Categorical(["d","a"], categories=['a', 'b', 'c', 'd']) + expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd']) self.assert_numpy_array_equal(sliced._codes, expected._codes) tm.assert_index_equal(sliced.categories, expected.categories) def test_set_item_nan(self): - cat = pd.Categorical([1,2,3]) - exp = pd.Categorical([1,np.nan,3], categories=[1,2,3]) + cat = pd.Categorical([1, 2, 3]) + exp = pd.Categorical([1, np.nan, 3], categories=[1, 2, 3]) cat[1] = np.nan self.assertTrue(cat.equals(exp)) # if nan in categories, the proper code should be set! - cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3]) + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1,2,3, np.nan], rename=True, inplace=True) + cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) cat[1] = np.nan - exp = np.array([0,3,2,-1]) + exp = np.array([0, 3, 2, -1]) self.assert_numpy_array_equal(cat.codes, exp) - cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3]) + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1,2,3, np.nan], rename=True, inplace=True) + cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) cat[1:3] = np.nan - exp = np.array([0,3,3,-1]) + exp = np.array([0, 3, 3, -1]) self.assert_numpy_array_equal(cat.codes, exp) - cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3]) + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1,2,3, np.nan], rename=True, inplace=True) + cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) cat[1:3] = [np.nan, 1] - exp = np.array([0,3,0,-1]) + exp = np.array([0, 3, 0, -1]) self.assert_numpy_array_equal(cat.codes, exp) - cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3]) + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1,2,3, np.nan], rename=True, inplace=True) + cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) cat[1:3] = [np.nan, np.nan] - exp = np.array([0,3,3,-1]) + exp = np.array([0, 3, 3, -1]) self.assert_numpy_array_equal(cat.codes, exp) - cat = pd.Categorical([1,2, np.nan, 3], categories=[1,2,3]) + cat = pd.Categorical([1, 2, np.nan, 3], categories=[1, 2, 3]) with tm.assert_produces_warning(FutureWarning): - cat.set_categories([1,2,3, np.nan], rename=True, inplace=True) + cat.set_categories([1, 2, 3, np.nan], rename=True, inplace=True) cat[pd.isnull(cat)] = np.nan - exp = np.array([0,1,3,2]) + exp = np.array([0, 1, 3, 2]) self.assert_numpy_array_equal(cat.codes, exp) def test_shift(self): @@ -1198,7 +1314,7 @@ def test_shift(self): # shift back sn2 = cat.shift(-2) xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan], - categories=['a', 'b', 'c', 'd']) + categories=['a', 'b', 'c', 'd']) self.assert_categorical_equal(sn2, xp2) self.assert_categorical_equal(cat[2:], sn2[:-2]) @@ -1206,16 +1322,16 @@ def test_shift(self): self.assert_categorical_equal(cat, cat.shift(0)) def test_nbytes(self): - cat = pd.Categorical([1,2,3]) + cat = pd.Categorical([1, 2, 3]) exp = cat._codes.nbytes + cat._categories.values.nbytes self.assertEqual(cat.nbytes, exp) def test_memory_usage(self): - cat = pd.Categorical([1,2,3]) + cat = pd.Categorical([1, 2, 3]) self.assertEqual(cat.nbytes, cat.memory_usage()) self.assertEqual(cat.nbytes, cat.memory_usage(deep=True)) - cat = pd.Categorical(['foo','foo','bar']) + cat = pd.Categorical(['foo', 'foo', 'bar']) self.assertEqual(cat.nbytes, cat.memory_usage()) self.assertTrue(cat.memory_usage(deep=True) > cat.nbytes) @@ -1226,10 +1342,8 @@ def test_memory_usage(self): def test_searchsorted(self): # https://github.com/pydata/pandas/issues/8420 - s1 = pd.Series(['apple', 'bread', 'bread', 'cheese', - 'milk']) - s2 = pd.Series(['apple', 'bread', 'bread', 'cheese', - 'milk', 'donuts']) + s1 = pd.Series(['apple', 'bread', 'bread', 'cheese', 'milk']) + s2 = pd.Series(['apple', 'bread', 'bread', 'cheese', 'milk', 'donuts']) c1 = pd.Categorical(s1, ordered=True) c2 = pd.Categorical(s2, ordered=True) @@ -1241,7 +1355,8 @@ def test_searchsorted(self): self.assert_numpy_array_equal(res, chk) # Scalar version of single item array - # Categorical return np.array like pd.Series, but different from np.array.searchsorted() + # Categorical return np.array like pd.Series, but different from + # np.array.searchsorted() res = c1.searchsorted('bread') chk = s1.searchsorted('bread') exp = np.array([1]) @@ -1258,20 +1373,24 @@ def test_searchsorted(self): # Searching for a value that is not present, to the right res = c1.searchsorted(['bread', 'eggs'], side='right') chk = s1.searchsorted(['bread', 'eggs'], side='right') - exp = np.array([3, 4]) # eggs before milk + exp = np.array([3, 4]) # eggs before milk self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) # As above, but with a sorter array to reorder an unsorted array - res = c2.searchsorted(['bread', 'eggs'], side='right', sorter=[0, 1, 2, 3, 5, 4]) - chk = s2.searchsorted(['bread', 'eggs'], side='right', sorter=[0, 1, 2, 3, 5, 4]) - exp = np.array([3, 5]) # eggs after donuts, after switching milk and donuts + res = c2.searchsorted(['bread', 'eggs'], side='right', + sorter=[0, 1, 2, 3, 5, 4]) + chk = s2.searchsorted(['bread', 'eggs'], side='right', + sorter=[0, 1, 2, 3, 5, 4]) + exp = np.array([3, 5] + ) # eggs after donuts, after switching milk and donuts self.assert_numpy_array_equal(res, exp) self.assert_numpy_array_equal(res, chk) def test_deprecated_labels(self): - # TODO: labels is deprecated and should be removed in 0.18 or 2017, whatever is earlier - cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3]) + # TODO: labels is deprecated and should be removed in 0.18 or 2017, + # whatever is earlier + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) exp = cat.codes with tm.assert_produces_warning(FutureWarning): res = cat.labels @@ -1279,14 +1398,15 @@ def test_deprecated_labels(self): self.assertFalse(LooseVersion(pd.__version__) >= '0.18') def test_deprecated_levels(self): - # TODO: levels is deprecated and should be removed in 0.18 or 2017, whatever is earlier - cat = pd.Categorical([1,2,3, np.nan], categories=[1,2,3]) + # TODO: levels is deprecated and should be removed in 0.18 or 2017, + # whatever is earlier + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) exp = cat.categories with tm.assert_produces_warning(FutureWarning): res = cat.levels self.assert_numpy_array_equal(res, exp) with tm.assert_produces_warning(FutureWarning): - res = pd.Categorical([1,2,3, np.nan], levels=[1,2,3]) + res = pd.Categorical([1, 2, 3, np.nan], levels=[1, 2, 3]) self.assert_numpy_array_equal(res.categories, exp) self.assertFalse(LooseVersion(pd.__version__) >= '0.18') @@ -1295,13 +1415,14 @@ def test_removed_names_produces_warning(self): # 10482 with tm.assert_produces_warning(UserWarning): - Categorical([0,1], name="a") + Categorical([0, 1], name="a") with tm.assert_produces_warning(UserWarning): - Categorical.from_codes([1,2], ["a","b","c"], name="a") + Categorical.from_codes([1, 2], ["a", "b", "c"], name="a") def test_datetime_categorical_comparison(self): - dt_cat = pd.Categorical(pd.date_range('2014-01-01', periods=3), ordered=True) + dt_cat = pd.Categorical( + pd.date_range('2014-01-01', periods=3), ordered=True) self.assert_numpy_array_equal(dt_cat > dt_cat[0], [False, True, True]) self.assert_numpy_array_equal(dt_cat[0] < dt_cat, [False, True, True]) @@ -1312,9 +1433,9 @@ def test_reflected_comparison_with_scalars(self): self.assert_numpy_array_equal(cat[0] < cat, [False, True, True]) def test_comparison_with_unknown_scalars(self): - # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 and following - # comparisons with scalars not in categories should raise for unequal comps, but not for - # equal/not equal + # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal cat = pd.Categorical([1, 2, 3], ordered=True) self.assertRaises(TypeError, lambda: cat < 4) @@ -1322,176 +1443,183 @@ def test_comparison_with_unknown_scalars(self): self.assertRaises(TypeError, lambda: 4 < cat) self.assertRaises(TypeError, lambda: 4 > cat) - self.assert_numpy_array_equal(cat == 4 , [False, False, False]) - self.assert_numpy_array_equal(cat != 4 , [True, True, True]) + self.assert_numpy_array_equal(cat == 4, [False, False, False]) + self.assert_numpy_array_equal(cat != 4, [True, True, True]) class TestCategoricalAsBlock(tm.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.factor = Categorical.from_array(['a', 'b', 'b', 'a', - 'a', 'c', 'c', 'c']) + self.factor = Categorical.from_array(['a', 'b', 'b', 'a', 'a', 'c', + 'c', 'c']) df = DataFrame({'value': np.random.randint(0, 10000, 100)}) - labels = [ "{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500) ] + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] df = df.sort_values(by=['value'], ascending=True) - df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) + df['value_group'] = pd.cut(df.value, range(0, 10500, 500), right=False, + labels=labels) self.cat = df def test_dtypes(self): - # GH8143 - index = ['cat','obj','num'] + index = ['cat', 'obj', 'num'] cat = pd.Categorical(['a', 'b', 'c']) obj = pd.Series(['a', 'b', 'c']) num = pd.Series([1, 2, 3]) df = pd.concat([pd.Series(cat), obj, num], axis=1, keys=index) result = df.dtypes == 'object' - expected = Series([False,True,False],index=index) + expected = Series([False, True, False], index=index) tm.assert_series_equal(result, expected) result = df.dtypes == 'int64' - expected = Series([False,False,True],index=index) + expected = Series([False, False, True], index=index) tm.assert_series_equal(result, expected) result = df.dtypes == 'category' - expected = Series([True,False,False],index=index) + expected = Series([True, False, False], index=index) tm.assert_series_equal(result, expected) def test_codes_dtypes(self): # GH 8453 - result = Categorical(['foo','bar','baz']) + result = Categorical(['foo', 'bar', 'baz']) self.assertTrue(result.codes.dtype == 'int8') - result = Categorical(['foo%05d' % i for i in range(400) ]) + result = Categorical(['foo%05d' % i for i in range(400)]) self.assertTrue(result.codes.dtype == 'int16') - result = Categorical(['foo%05d' % i for i in range(40000) ]) + result = Categorical(['foo%05d' % i for i in range(40000)]) self.assertTrue(result.codes.dtype == 'int32') # adding cats - result = Categorical(['foo','bar','baz']) + result = Categorical(['foo', 'bar', 'baz']) self.assertTrue(result.codes.dtype == 'int8') - result = result.add_categories(['foo%05d' % i for i in range(400) ]) + result = result.add_categories(['foo%05d' % i for i in range(400)]) self.assertTrue(result.codes.dtype == 'int16') # removing cats - result = result.remove_categories(['foo%05d' % i for i in range(300) ]) + result = result.remove_categories(['foo%05d' % i for i in range(300)]) self.assertTrue(result.codes.dtype == 'int8') def test_basic(self): # test basic creation / coercion of categoricals s = Series(self.factor, name='A') - self.assertEqual(s.dtype,'category') - self.assertEqual(len(s),len(self.factor)) + self.assertEqual(s.dtype, 'category') + self.assertEqual(len(s), len(self.factor)) str(s.values) str(s) # in a frame - df = DataFrame({'A' : self.factor }) + df = DataFrame({'A': self.factor}) result = df['A'] - tm.assert_series_equal(result,s) - result = df.iloc[:,0] - tm.assert_series_equal(result,s) - self.assertEqual(len(df),len(self.factor)) + tm.assert_series_equal(result, s) + result = df.iloc[:, 0] + tm.assert_series_equal(result, s) + self.assertEqual(len(df), len(self.factor)) str(df.values) str(df) - df = DataFrame({'A' : s }) + df = DataFrame({'A': s}) result = df['A'] - tm.assert_series_equal(result,s) - self.assertEqual(len(df),len(self.factor)) + tm.assert_series_equal(result, s) + self.assertEqual(len(df), len(self.factor)) str(df.values) str(df) # multiples - df = DataFrame({'A' : s, 'B' : s, 'C' : 1}) + df = DataFrame({'A': s, 'B': s, 'C': 1}) result1 = df['A'] result2 = df['B'] tm.assert_series_equal(result1, s) tm.assert_series_equal(result2, s, check_names=False) self.assertEqual(result2.name, 'B') - self.assertEqual(len(df),len(self.factor)) + self.assertEqual(len(df), len(self.factor)) str(df.values) str(df) # GH8623 - x = pd.DataFrame([[1,'John P. Doe'],[2,'Jane Dove'],[1,'John P. Doe']], - columns=['person_id','person_name']) - x['person_name'] = pd.Categorical(x.person_name) # doing this breaks transform + x = pd.DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], + [1, 'John P. Doe']], + columns=['person_id', 'person_name']) + x['person_name'] = pd.Categorical(x.person_name + ) # doing this breaks transform expected = x.iloc[0].person_name result = x.person_name.iloc[0] - self.assertEqual(result,expected) + self.assertEqual(result, expected) result = x.person_name[0] - self.assertEqual(result,expected) + self.assertEqual(result, expected) result = x.person_name.loc[0] - self.assertEqual(result,expected) + self.assertEqual(result, expected) def test_creation_astype(self): - l = ["a","b","c","a"] + l = ["a", "b", "c", "a"] s = pd.Series(l) exp = pd.Series(Categorical(l)) res = s.astype('category') tm.assert_series_equal(res, exp) - l = [1,2,3,1] + l = [1, 2, 3, 1] s = pd.Series(l) exp = pd.Series(Categorical(l)) res = s.astype('category') tm.assert_series_equal(res, exp) - df = pd.DataFrame({"cats":[1,2,3,4,5,6], "vals":[1,2,3,4,5,6]}) - cats = Categorical([1,2,3,4,5,6]) - exp_df = pd.DataFrame({"cats":cats, "vals":[1,2,3,4,5,6]}) - df["cats"] = df["cats"].astype("category") + df = pd.DataFrame({"cats": [1, 2, 3, 4, 5, 6], + "vals": [1, 2, 3, 4, 5, 6]}) + cats = Categorical([1, 2, 3, 4, 5, 6]) + exp_df = pd.DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") tm.assert_frame_equal(exp_df, df) - df = pd.DataFrame({"cats":['a', 'b', 'b', 'a', 'a', 'd'], "vals":[1,2,3,4,5,6]}) + df = pd.DataFrame({"cats": ['a', 'b', 'b', 'a', 'a', 'd'], + "vals": [1, 2, 3, 4, 5, 6]}) cats = Categorical(['a', 'b', 'b', 'a', 'a', 'd']) - exp_df = pd.DataFrame({"cats":cats, "vals":[1,2,3,4,5,6]}) - df["cats"] = df["cats"].astype("category") + exp_df = pd.DataFrame({"cats": cats, "vals": [1, 2, 3, 4, 5, 6]}) + df["cats"] = df["cats"].astype("category") tm.assert_frame_equal(exp_df, df) # with keywords - l = ["a","b","c","a"] + l = ["a", "b", "c", "a"] s = pd.Series(l) exp = pd.Series(Categorical(l, ordered=True)) res = s.astype('category', ordered=True) tm.assert_series_equal(res, exp) - exp = pd.Series(Categorical(l, categories=list('abcdef'), ordered=True)) + exp = pd.Series(Categorical( + l, categories=list('abcdef'), ordered=True)) res = s.astype('category', categories=list('abcdef'), ordered=True) tm.assert_series_equal(res, exp) def test_construction_series(self): - l = [1,2,3,1] + l = [1, 2, 3, 1] exp = Series(l).astype('category') - res = Series(l,dtype='category') + res = Series(l, dtype='category') tm.assert_series_equal(res, exp) - l = ["a","b","c","a"] + l = ["a", "b", "c", "a"] exp = Series(l).astype('category') - res = Series(l,dtype='category') + res = Series(l, dtype='category') tm.assert_series_equal(res, exp) # insert into frame with different index # GH 8076 index = pd.date_range('20000101', periods=3) - expected = Series(Categorical(values=[np.nan,np.nan,np.nan],categories=['a', 'b', 'c'])) + expected = Series(Categorical(values=[np.nan, np.nan, np.nan], + categories=['a', 'b', 'c'])) expected.index = index expected = DataFrame({'x': expected}) - df = DataFrame({'x': Series(['a', 'b', 'c'],dtype='category')}, index=index) + df = DataFrame( + {'x': Series(['a', 'b', 'c'], dtype='category')}, index=index) tm.assert_frame_equal(df, expected) def test_construction_frame(self): @@ -1499,7 +1627,7 @@ def test_construction_frame(self): # GH8626 # dict creation - df = DataFrame({ 'A' : list('abc') }, dtype='category') + df = DataFrame({'A': list('abc')}, dtype='category') expected = Series(list('abc'), dtype='category', name='A') tm.assert_series_equal(df['A'], expected) @@ -1519,25 +1647,31 @@ def test_construction_frame(self): # ndim != 1 df = DataFrame([pd.Categorical(list('abc'))]) - expected = DataFrame({ 0 : Series(list('abc'),dtype='category')}) - tm.assert_frame_equal(df,expected) + expected = DataFrame({0: Series(list('abc'), dtype='category')}) + tm.assert_frame_equal(df, expected) - df = DataFrame([pd.Categorical(list('abc')),pd.Categorical(list('abd'))]) - expected = DataFrame({ 0 : Series(list('abc'),dtype='category'), - 1 : Series(list('abd'),dtype='category')},columns=[0,1]) - tm.assert_frame_equal(df,expected) + df = DataFrame([pd.Categorical(list('abc')), pd.Categorical(list( + 'abd'))]) + expected = DataFrame({0: Series(list('abc'), dtype='category'), + 1: Series(list('abd'), dtype='category')}, + columns=[0, 1]) + tm.assert_frame_equal(df, expected) # mixed - df = DataFrame([pd.Categorical(list('abc')),list('def')]) - expected = DataFrame({ 0 : Series(list('abc'),dtype='category'), - 1 : list('def')},columns=[0,1]) - tm.assert_frame_equal(df,expected) + df = DataFrame([pd.Categorical(list('abc')), list('def')]) + expected = DataFrame({0: Series(list('abc'), dtype='category'), + 1: list('def')}, columns=[0, 1]) + tm.assert_frame_equal(df, expected) # invalid (shape) - self.assertRaises(ValueError, lambda : DataFrame([pd.Categorical(list('abc')),pd.Categorical(list('abdefg'))])) + self.assertRaises( + ValueError, + lambda: DataFrame([pd.Categorical(list('abc')), + pd.Categorical(list('abdefg'))])) # ndim > 1 - self.assertRaises(NotImplementedError, lambda : pd.Categorical(np.array([list('abcd')]))) + self.assertRaises(NotImplementedError, + lambda: pd.Categorical(np.array([list('abcd')]))) def test_reshaping(self): @@ -1547,12 +1681,12 @@ def test_reshaping(self): df['category'] = df['str'].astype('category') result = df['category'].unstack() - c = Categorical(['foo']*len(p.major_axis)) - expected = DataFrame({'A' : c.copy(), - 'B' : c.copy(), - 'C' : c.copy(), - 'D' : c.copy()}, - columns=Index(list('ABCD'),name='minor'), + c = Categorical(['foo'] * len(p.major_axis)) + expected = DataFrame({'A': c.copy(), + 'B': c.copy(), + 'C': c.copy(), + 'D': c.copy()}, + columns=Index(list('ABCD'), name='minor'), index=p.major_axis.set_names('major')) tm.assert_frame_equal(result, expected) @@ -1561,91 +1695,94 @@ def test_reindex(self): index = pd.date_range('20000101', periods=3) # reindexing to an invalid Categorical - s = Series(['a', 'b', 'c'],dtype='category') + s = Series(['a', 'b', 'c'], dtype='category') result = s.reindex(index) - expected = Series(Categorical(values=[np.nan,np.nan,np.nan],categories=['a', 'b', 'c'])) + expected = Series(Categorical(values=[np.nan, np.nan, np.nan], + categories=['a', 'b', 'c'])) expected.index = index tm.assert_series_equal(result, expected) # partial reindexing - expected = Series(Categorical(values=['b','c'],categories=['a', 'b', 'c'])) - expected.index = [1,2] - result = s.reindex([1,2]) + expected = Series(Categorical(values=['b', 'c'], categories=['a', 'b', + 'c'])) + expected.index = [1, 2] + result = s.reindex([1, 2]) tm.assert_series_equal(result, expected) - expected = Series(Categorical(values=['c',np.nan],categories=['a', 'b', 'c'])) - expected.index = [2,3] - result = s.reindex([2,3]) + expected = Series(Categorical( + values=['c', np.nan], categories=['a', 'b', 'c'])) + expected.index = [2, 3] + result = s.reindex([2, 3]) tm.assert_series_equal(result, expected) - - def test_sideeffects_free(self): - - # Passing a categorical to a Series and then changing values in either the series or the - # categorical should not change the values in the other one, IF you specify copy! - cat = Categorical(["a","b","c","a"]) - s = pd.Series(cat, copy=True) + # Passing a categorical to a Series and then changing values in either + # the series or the categorical should not change the values in the + # other one, IF you specify copy! + cat = Categorical(["a", "b", "c", "a"]) + s = pd.Series(cat, copy=True) self.assertFalse(s.cat is cat) - s.cat.categories = [1,2,3] - exp_s = np.array([1,2,3,1]) - exp_cat = np.array(["a","b","c","a"]) + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1]) + exp_cat = np.array(["a", "b", "c", "a"]) self.assert_numpy_array_equal(s.__array__(), exp_s) self.assert_numpy_array_equal(cat.__array__(), exp_cat) # setting s[0] = 2 - exp_s2 = np.array([2,2,3,1]) + exp_s2 = np.array([2, 2, 3, 1]) self.assert_numpy_array_equal(s.__array__(), exp_s2) self.assert_numpy_array_equal(cat.__array__(), exp_cat) # however, copy is False by default # so this WILL change values - cat = Categorical(["a","b","c","a"]) - s = pd.Series(cat) + cat = Categorical(["a", "b", "c", "a"]) + s = pd.Series(cat) self.assertTrue(s.values is cat) - s.cat.categories = [1,2,3] - exp_s = np.array([1,2,3,1]) + s.cat.categories = [1, 2, 3] + exp_s = np.array([1, 2, 3, 1]) self.assert_numpy_array_equal(s.__array__(), exp_s) self.assert_numpy_array_equal(cat.__array__(), exp_s) s[0] = 2 - exp_s2 = np.array([2,2,3,1]) + exp_s2 = np.array([2, 2, 3, 1]) self.assert_numpy_array_equal(s.__array__(), exp_s2) self.assert_numpy_array_equal(cat.__array__(), exp_s2) def test_nan_handling(self): # Nans are represented as -1 in labels - s = Series(Categorical(["a","b",np.nan,"a"])) - self.assert_numpy_array_equal(s.cat.categories, np.array(["a","b"])) - self.assert_numpy_array_equal(s.values.codes, np.array([0,1,-1,0])) + s = Series(Categorical(["a", "b", np.nan, "a"])) + self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"])) + self.assert_numpy_array_equal(s.values.codes, np.array([0, 1, -1, 0])) - # If categories have nan included, the label should point to that instead + # If categories have nan included, the label should point to that + # instead with tm.assert_produces_warning(FutureWarning): - s2 = Series(Categorical(["a","b",np.nan,"a"], categories=["a","b",np.nan])) - self.assert_numpy_array_equal(s2.cat.categories, - np.array(["a","b",np.nan], dtype=np.object_)) - self.assert_numpy_array_equal(s2.values.codes, np.array([0,1,2,0])) + s2 = Series(Categorical( + ["a", "b", np.nan, "a"], categories=["a", "b", np.nan])) + self.assert_numpy_array_equal(s2.cat.categories, np.array( + ["a", "b", np.nan], dtype=np.object_)) + self.assert_numpy_array_equal(s2.values.codes, np.array([0, 1, 2, 0])) # Changing categories should also make the replaced category np.nan - s3 = Series(Categorical(["a","b","c","a"])) + s3 = Series(Categorical(["a", "b", "c", "a"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - s3.cat.categories = ["a","b",np.nan] - self.assert_numpy_array_equal(s3.cat.categories, - np.array(["a","b",np.nan], dtype=np.object_)) - self.assert_numpy_array_equal(s3.values.codes, np.array([0,1,2,0])) + s3.cat.categories = ["a", "b", np.nan] + self.assert_numpy_array_equal(s3.cat.categories, np.array( + ["a", "b", np.nan], dtype=np.object_)) + self.assert_numpy_array_equal(s3.values.codes, np.array([0, 1, 2, 0])) def test_cat_accessor(self): - s = Series(Categorical(["a","b",np.nan,"a"])) - self.assert_numpy_array_equal(s.cat.categories, np.array(["a","b"])) + s = Series(Categorical(["a", "b", np.nan, "a"])) + self.assert_numpy_array_equal(s.cat.categories, np.array(["a", "b"])) self.assertEqual(s.cat.ordered, False) - exp = Categorical(["a","b",np.nan,"a"], categories=["b","a"]) + exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) s.cat.set_categories(["b", "a"], inplace=True) self.assertTrue(s.values.equals(exp)) res = s.cat.set_categories(["b", "a"]) self.assertTrue(res.values.equals(exp)) - exp = Categorical(["a","b",np.nan,"a"], categories=["b","a"]) + exp = Categorical(["a", "b", np.nan, "a"], categories=["b", "a"]) s[:] = "a" s = s.cat.remove_unused_categories() self.assert_numpy_array_equal(s.cat.categories, np.array(["a"])) @@ -1654,13 +1791,14 @@ def test_sequence_like(self): # GH 7839 # make sure can iterate - df = DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + df = DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) df['grade'] = Categorical(df['raw_grade']) # basic sequencing testing result = list(df.grade.values) expected = np.array(df.grade.values).tolist() - tm.assert_almost_equal(result,expected) + tm.assert_almost_equal(result, expected) # iteration for t in df.itertuples(index=False): @@ -1675,24 +1813,27 @@ def test_sequence_like(self): def test_series_delegations(self): # invalid accessor - self.assertRaises(AttributeError, lambda : Series([1,2,3]).cat) - tm.assertRaisesRegexp(AttributeError, - r"Can only use .cat accessor with a 'category' dtype", - lambda : Series([1,2,3]).cat) - self.assertRaises(AttributeError, lambda : Series(['a','b','c']).cat) - self.assertRaises(AttributeError, lambda : Series(np.arange(5.)).cat) - self.assertRaises(AttributeError, lambda : Series([Timestamp('20130101')]).cat) - - # Series should delegate calls to '.categories', '.codes', '.ordered' and the - # methods '.set_categories()' 'drop_unused_categories()' to the categorical - s = Series(Categorical(["a","b","c","a"], ordered=True)) - exp_categories = np.array(["a","b","c"]) + self.assertRaises(AttributeError, lambda: Series([1, 2, 3]).cat) + tm.assertRaisesRegexp( + AttributeError, + r"Can only use .cat accessor with a 'category' dtype", + lambda: Series([1, 2, 3]).cat) + self.assertRaises(AttributeError, lambda: Series(['a', 'b', 'c']).cat) + self.assertRaises(AttributeError, lambda: Series(np.arange(5.)).cat) + self.assertRaises(AttributeError, + lambda: Series([Timestamp('20130101')]).cat) + + # Series should delegate calls to '.categories', '.codes', '.ordered' + # and the methods '.set_categories()' 'drop_unused_categories()' to the + # categorical + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = np.array(["a", "b", "c"]) self.assert_numpy_array_equal(s.cat.categories, exp_categories) - s.cat.categories = [1,2,3] - exp_categories = np.array([1,2,3]) + s.cat.categories = [1, 2, 3] + exp_categories = np.array([1, 2, 3]) self.assert_numpy_array_equal(s.cat.categories, exp_categories) - exp_codes = Series([0,1,2,0],dtype='int8') + exp_codes = Series([0, 1, 2, 0], dtype='int8') tm.assert_series_equal(s.cat.codes, exp_codes) self.assertEqual(s.cat.ordered, True) @@ -1702,39 +1843,44 @@ def test_series_delegations(self): self.assertEqual(s.cat.ordered, True) # reorder - s = Series(Categorical(["a","b","c","a"], ordered=True)) - exp_categories = np.array(["c","b","a"]) - exp_values = np.array(["a","b","c","a"]) - s = s.cat.set_categories(["c","b","a"]) + s = Series(Categorical(["a", "b", "c", "a"], ordered=True)) + exp_categories = np.array(["c", "b", "a"]) + exp_values = np.array(["a", "b", "c", "a"]) + s = s.cat.set_categories(["c", "b", "a"]) self.assert_numpy_array_equal(s.cat.categories, exp_categories) self.assert_numpy_array_equal(s.values.__array__(), exp_values) self.assert_numpy_array_equal(s.__array__(), exp_values) # remove unused categories - s = Series(Categorical(["a","b","b","a"], categories=["a","b","c"])) - exp_categories = np.array(["a","b"]) - exp_values = np.array(["a","b","b","a"]) + s = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c" + ])) + exp_categories = np.array(["a", "b"]) + exp_values = np.array(["a", "b", "b", "a"]) s = s.cat.remove_unused_categories() self.assert_numpy_array_equal(s.cat.categories, exp_categories) self.assert_numpy_array_equal(s.values.__array__(), exp_values) self.assert_numpy_array_equal(s.__array__(), exp_values) - # This method is likely to be confused, so test that it raises an error on wrong inputs: + # This method is likely to be confused, so test that it raises an error + # on wrong inputs: def f(): - s.set_categories([4,3,2,1]) + s.set_categories([4, 3, 2, 1]) + self.assertRaises(Exception, f) # right: s.cat.set_categories([4,3,2,1]) def test_series_functions_no_warnings(self): df = pd.DataFrame({'value': np.random.randint(0, 100, 20)}) - labels = [ "{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] + labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)] with tm.assert_produces_warning(False): - df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels) + df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, + labels=labels) def test_assignment_to_dataframe(self): # assignment - df = DataFrame({'value': np.array(np.random.randint(0, 10000, 100),dtype='int32')}) - labels = [ "{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500) ] + df = DataFrame({'value': np.array( + np.random.randint(0, 10000, 100), dtype='int32')}) + labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)] df = df.sort_values(by=['value'], ascending=True) s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels) @@ -1743,16 +1889,18 @@ def test_assignment_to_dataframe(self): str(df) result = df.dtypes - expected = Series([np.dtype('int32'), com.CategoricalDtype()],index=['value','D']) - tm.assert_series_equal(result,expected) + expected = Series( + [np.dtype('int32'), com.CategoricalDtype()], index=['value', 'D']) + tm.assert_series_equal(result, expected) df['E'] = s str(df) result = df.dtypes - expected = Series([np.dtype('int32'), com.CategoricalDtype(), com.CategoricalDtype()], - index=['value','D','E']) - tm.assert_series_equal(result,expected) + expected = Series([np.dtype('int32'), com.CategoricalDtype(), + com.CategoricalDtype()], + index=['value', 'D', 'E']) + tm.assert_series_equal(result, expected) result1 = df['D'] result2 = df['E'] @@ -1762,122 +1910,164 @@ def test_assignment_to_dataframe(self): s.name = 'E' self.assertTrue(result2.sort_index().equals(s.sort_index())) - cat = pd.Categorical([1,2,3,10], categories=[1,2,3,4,10]) + cat = pd.Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10]) df = pd.DataFrame(pd.Series(cat)) def test_describe(self): # Categoricals should not show up together with numerical columns result = self.cat.describe() - self.assertEqual(len(result.columns),1) - + self.assertEqual(len(result.columns), 1) - # In a frame, describe() for the cat should be the same as for string arrays (count, unique, - # top, freq) + # In a frame, describe() for the cat should be the same as for string + # arrays (count, unique, top, freq) - cat = Categorical(["a","b","b","b"], categories=['a','b','c'], ordered=True) + cat = Categorical(["a", "b", "b", "b"], categories=['a', 'b', 'c'], + ordered=True) s = Series(cat) result = s.describe() - expected = Series([4,2,"b",3],index=['count','unique','top', 'freq']) - tm.assert_series_equal(result,expected) + expected = Series([4, 2, "b", 3], + index=['count', 'unique', 'top', 'freq']) + tm.assert_series_equal(result, expected) - cat = pd.Series(pd.Categorical(["a","b","c","c"])) - df3 = pd.DataFrame({"cat":cat, "s":["a","b","c","c"]}) + cat = pd.Series(pd.Categorical(["a", "b", "c", "c"])) + df3 = pd.DataFrame({"cat": cat, "s": ["a", "b", "c", "c"]}) res = df3.describe() self.assert_numpy_array_equal(res["cat"].values, res["s"].values) def test_repr(self): - a = pd.Series(pd.Categorical([1,2,3,4])) + a = pd.Series(pd.Categorical([1, 2, 3, 4])) exp = u("0 1\n1 2\n2 3\n3 4\n" + - "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") self.assertEqual(exp, a.__unicode__()) - a = pd.Series(pd.Categorical(["a","b"] *25)) - exp = u("0 a\n1 b\n" + " ..\n" + - "48 a\n49 b\n" + + a = pd.Series(pd.Categorical(["a", "b"] * 25)) + exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + "dtype: category\nCategories (2, object): [a, b]") with option_context("display.max_rows", 5): self.assertEqual(exp, repr(a)) levs = list("abcdefghijklmnopqrstuvwxyz") - a = pd.Series(pd.Categorical(["a","b"], categories=levs, ordered=True)) - exp = u("0 a\n1 b\n" + - "dtype: category\n" + a = pd.Series(pd.Categorical( + ["a", "b"], categories=levs, ordered=True)) + exp = u("0 a\n1 b\n" + "dtype: category\n" "Categories (26, object): [a < b < c < d ... w < x < y < z]") - self.assertEqual(exp,a.__unicode__()) + self.assertEqual(exp, a.__unicode__()) def test_categorical_repr(self): - c = pd.Categorical([1, 2 ,3]) + c = pd.Categorical([1, 2, 3]) exp = """[1, 2, 3] Categories (3, int64): [1, 2, 3]""" + self.assertEqual(repr(c), exp) - c = pd.Categorical([1, 2 ,3, 1, 2 ,3], categories=[1, 2, 3]) + c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3]) exp = """[1, 2, 3, 1, 2, 3] Categories (3, int64): [1, 2, 3]""" + self.assertEqual(repr(c), exp) c = pd.Categorical([1, 2, 3, 4, 5] * 10) exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1, 2, 3, 4, 5]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(np.arange(20)) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" + self.assertEqual(repr(c), exp) def test_categorical_repr_ordered(self): - c = pd.Categorical([1, 2 ,3], ordered=True) + c = pd.Categorical([1, 2, 3], ordered=True) exp = """[1, 2, 3] Categories (3, int64): [1 < 2 < 3]""" + self.assertEqual(repr(c), exp) - c = pd.Categorical([1, 2 ,3, 1, 2 ,3], categories=[1, 2, 3], ordered=True) + c = pd.Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3], + ordered=True) exp = """[1, 2, 3, 1, 2, 3] Categories (3, int64): [1 < 2 < 3]""" + self.assertEqual(repr(c), exp) c = pd.Categorical([1, 2, 3, 4, 5] * 10, ordered=True) exp = """[1, 2, 3, 4, 5, ..., 1, 2, 3, 4, 5] Length: 50 Categories (5, int64): [1 < 2 < 3 < 4 < 5]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(np.arange(20), ordered=True) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" + self.assertEqual(repr(c), exp) def test_categorical_repr_datetime(self): idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx) - exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] -Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, - 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" + + # TODO(wesm): exceeding 80 characters in the console is not good + # behavior + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]""") self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] -Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, - 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" + exp = ( + "[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, " + "2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]\n" + "Categories (5, datetime64[ns]): [2011-01-01 09:00:00, " + "2011-01-01 10:00:00, 2011-01-01 11:00:00,\n" + " 2011-01-01 12:00:00, " + "2011-01-01 13:00:00]") + self.assertEqual(repr(c), exp) - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') c = pd.Categorical(idx) - exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n 2011-01-01 13:00:00-05:00]""" + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]") + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) - exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] -Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, - 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, - 2011-01-01 13:00:00-05:00]""" + exp = ( + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, " + "2011-01-01 13:00:00-05:00, 2011-01-01 09:00:00-05:00, " + "2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, " + "2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]\n" + "Categories (5, datetime64[ns, US/Eastern]): " + "[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,\n" + " " + "2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,\n" + " " + "2011-01-01 13:00:00-05:00]") + self.assertEqual(repr(c), exp) def test_categorical_repr_datetime_ordered(self): @@ -1885,21 +2075,25 @@ def test_categorical_repr_datetime_ordered(self): c = pd.Categorical(idx, ordered=True) exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00, 2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00] Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < - 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" + 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa + self.assertEqual(repr(c), exp) - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') c = pd.Categorical(idx, ordered=True) exp = """[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00] Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < - 2011-01-01 13:00:00-05:00]""" + 2011-01-01 13:00:00-05:00]""" # noqa + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) @@ -1907,6 +2101,7 @@ def test_categorical_repr_datetime_ordered(self): Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < 2011-01-01 13:00:00-05:00]""" + self.assertEqual(repr(c), exp) def test_categorical_repr_period(self): @@ -1915,23 +2110,27 @@ def test_categorical_repr_period(self): exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" + self.assertEqual(repr(c), exp) idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + self.assertEqual(repr(c), exp) def test_categorical_repr_period_ordered(self): @@ -1940,23 +2139,27 @@ def test_categorical_repr_period_ordered(self): exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" + self.assertEqual(repr(c), exp) idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + self.assertEqual(repr(c), exp) def test_categorical_repr_timedelta(self): @@ -1964,11 +2167,13 @@ def test_categorical_repr_timedelta(self): c = pd.Categorical(idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + self.assertEqual(repr(c), exp) idx = pd.timedelta_range('1 hours', periods=20) @@ -1978,6 +2183,7 @@ def test_categorical_repr_timedelta(self): Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) @@ -1986,6 +2192,7 @@ def test_categorical_repr_timedelta(self): Categories (20, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00]""" + self.assertEqual(repr(c), exp) def test_categorical_repr_timedelta_ordered(self): @@ -1993,11 +2200,13 @@ def test_categorical_repr_timedelta_ordered(self): c = pd.Categorical(idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + self.assertEqual(repr(c), exp) idx = pd.timedelta_range('1 hours', periods=20) @@ -2007,6 +2216,7 @@ def test_categorical_repr_timedelta_ordered(self): Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < 18 days 01:00:00 < 19 days 01:00:00]""" + self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) @@ -2015,15 +2225,17 @@ def test_categorical_repr_timedelta_ordered(self): Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < 18 days 01:00:00 < 19 days 01:00:00]""" + self.assertEqual(repr(c), exp) def test_categorical_series_repr(self): - s = pd.Series(pd.Categorical([1, 2 ,3])) + s = pd.Series(pd.Categorical([1, 2, 3])) exp = """0 1 1 2 2 3 dtype: category Categories (3, int64): [1, 2, 3]""" + self.assertEqual(repr(s), exp) s = pd.Series(pd.Categorical(np.arange(10))) @@ -2039,15 +2251,17 @@ def test_categorical_series_repr(self): 9 9 dtype: category Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_ordered(self): - s = pd.Series(pd.Categorical([1, 2 ,3], ordered=True)) + s = pd.Series(pd.Categorical([1, 2, 3], ordered=True)) exp = """0 1 1 2 2 3 dtype: category Categories (3, int64): [1 < 2 < 3]""" + self.assertEqual(repr(s), exp) s = pd.Series(pd.Categorical(np.arange(10), ordered=True)) @@ -2063,6 +2277,7 @@ def test_categorical_series_repr_ordered(self): 9 9 dtype: category Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_datetime(self): @@ -2076,9 +2291,11 @@ def test_categorical_series_repr_datetime(self): dtype: category Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" + self.assertEqual(repr(s), exp) - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') s = pd.Series(pd.Categorical(idx)) exp = """0 2011-01-01 09:00:00-05:00 1 2011-01-01 10:00:00-05:00 @@ -2089,6 +2306,7 @@ def test_categorical_series_repr_datetime(self): Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_datetime_ordered(self): @@ -2102,9 +2320,11 @@ def test_categorical_series_repr_datetime_ordered(self): dtype: category Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 < 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" + self.assertEqual(repr(s), exp) - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') s = pd.Series(pd.Categorical(idx, ordered=True)) exp = """0 2011-01-01 09:00:00-05:00 1 2011-01-01 10:00:00-05:00 @@ -2115,6 +2335,7 @@ def test_categorical_series_repr_datetime_ordered(self): Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 < 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 < 2011-01-01 13:00:00-05:00]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_period(self): @@ -2128,6 +2349,7 @@ def test_categorical_series_repr_period(self): dtype: category Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]""" + self.assertEqual(repr(s), exp) idx = pd.period_range('2011-01', freq='M', periods=5) @@ -2139,6 +2361,7 @@ def test_categorical_series_repr_period(self): 4 2011-05 dtype: category Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_period_ordered(self): @@ -2152,6 +2375,7 @@ def test_categorical_series_repr_period_ordered(self): dtype: category Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < 2011-01-01 13:00]""" + self.assertEqual(repr(s), exp) idx = pd.period_range('2011-01', freq='M', periods=5) @@ -2163,6 +2387,7 @@ def test_categorical_series_repr_period_ordered(self): 4 2011-05 dtype: category Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_timedelta(self): @@ -2175,6 +2400,7 @@ def test_categorical_series_repr_timedelta(self): 4 5 days dtype: category Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" + self.assertEqual(repr(s), exp) idx = pd.timedelta_range('1 hours', periods=10) @@ -2193,6 +2419,7 @@ def test_categorical_series_repr_timedelta(self): Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00]""" + self.assertEqual(repr(s), exp) def test_categorical_series_repr_timedelta_ordered(self): @@ -2205,6 +2432,7 @@ def test_categorical_series_repr_timedelta_ordered(self): 4 5 days dtype: category Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" + self.assertEqual(repr(s), exp) idx = pd.timedelta_range('1 hours', periods=10) @@ -2223,10 +2451,11 @@ def test_categorical_series_repr_timedelta_ordered(self): Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < 8 days 01:00:00 < 9 days 01:00:00]""" + self.assertEqual(repr(s), exp) def test_categorical_index_repr(self): - idx = pd.CategoricalIndex(pd.Categorical([1, 2 ,3])) + idx = pd.CategoricalIndex(pd.Categorical([1, 2, 3])) exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" self.assertEqual(repr(idx), exp) @@ -2235,7 +2464,7 @@ def test_categorical_index_repr(self): self.assertEqual(repr(i), exp) def test_categorical_index_repr_ordered(self): - i = pd.CategoricalIndex(pd.Categorical([1, 2 ,3], ordered=True)) + i = pd.CategoricalIndex(pd.Categorical([1, 2, 3], ordered=True)) exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" self.assertEqual(repr(i), exp) @@ -2250,14 +2479,17 @@ def test_categorical_index_repr_datetime(self): '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=False, dtype='category')""" + self.assertEqual(repr(i), exp) - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') i = pd.CategoricalIndex(pd.Categorical(idx)) exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=False, dtype='category')""" + self.assertEqual(repr(i), exp) def test_categorical_index_repr_datetime_ordered(self): @@ -2267,14 +2499,17 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 11:00:00', '2011-01-01 12:00:00', '2011-01-01 13:00:00'], categories=[2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00, 2011-01-01 12:00:00, 2011-01-01 13:00:00], ordered=True, dtype='category')""" + self.assertEqual(repr(i), exp) - idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + idx = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') i = pd.CategoricalIndex(pd.Categorical(idx, ordered=True)) exp = """CategoricalIndex(['2011-01-01 09:00:00-05:00', '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" + self.assertEqual(repr(i), exp) i = pd.CategoricalIndex(pd.Categorical(idx.append(idx), ordered=True)) @@ -2284,6 +2519,7 @@ def test_categorical_index_repr_datetime_ordered(self): '2011-01-01 10:00:00-05:00', '2011-01-01 11:00:00-05:00', '2011-01-01 12:00:00-05:00', '2011-01-01 13:00:00-05:00'], categories=[2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00, 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00, 2011-01-01 13:00:00-05:00], ordered=True, dtype='category')""" + self.assertEqual(repr(i), exp) def test_categorical_index_repr_period(self): @@ -2308,6 +2544,7 @@ def test_categorical_index_repr_period(self): exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" + self.assertEqual(repr(i), exp) i = pd.CategoricalIndex(pd.Categorical(idx.append(idx))) @@ -2316,6 +2553,7 @@ def test_categorical_index_repr_period(self): '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=False, dtype='category')""" + self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01', freq='M', periods=5) @@ -2329,6 +2567,7 @@ def test_categorical_index_repr_period_ordered(self): exp = """CategoricalIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00', '2011-01-01 12:00', '2011-01-01 13:00'], categories=[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00], ordered=True, dtype='category')""" + self.assertEqual(repr(i), exp) idx = pd.period_range('2011-01', freq='M', periods=5) @@ -2349,6 +2588,7 @@ def test_categorical_index_repr_timedelta(self): '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=False, dtype='category')""" + self.assertEqual(repr(i), exp) def test_categorical_index_repr_timedelta_ordered(self): @@ -2364,11 +2604,13 @@ def test_categorical_index_repr_timedelta_ordered(self): '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" + self.assertEqual(repr(i), exp) def test_categorical_frame(self): # normal DataFrame - dt = pd.date_range('2011-01-01 09:00', freq='H', periods=5, tz='US/Eastern') + dt = pd.date_range('2011-01-01 09:00', freq='H', periods=5, + tz='US/Eastern') p = pd.period_range('2011-01', freq='M', periods=5) df = pd.DataFrame({'dt': dt, 'p': p}) exp = """ dt p @@ -2385,12 +2627,13 @@ def test_info(self): # make sure it works n = 2500 - df = DataFrame({ 'int64' : np.random.randint(100,size=n) }) - df['category'] = Series(np.array(list('abcdefghij')).take(np.random.randint(0,10,size=n))).astype('category') + df = DataFrame({'int64': np.random.randint(100, size=n)}) + df['category'] = Series(np.array(list('abcdefghij')).take( + np.random.randint(0, 10, size=n))).astype('category') df.isnull() df.info() - df2 = df[df['category']=='d'] + df2 = df[df['category'] == 'd'] df2.info() def test_groupby_sort(self): @@ -2398,7 +2641,7 @@ def test_groupby_sort(self): # http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby # This should result in a properly sorted Series so that the plot # has a sorted x axis - #self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') + # self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar') res = self.cat.groupby(['value_group'])['value_group'].count() exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))] @@ -2407,56 +2650,68 @@ def test_groupby_sort(self): def test_min_max(self): # unordered cats have no min/max - cat = Series(Categorical(["a","b","c","d"], ordered=False)) - self.assertRaises(TypeError, lambda : cat.min()) - self.assertRaises(TypeError, lambda : cat.max()) + cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) + self.assertRaises(TypeError, lambda: cat.min()) + self.assertRaises(TypeError, lambda: cat.max()) - cat = Series(Categorical(["a","b","c","d"], ordered=True)) + cat = Series(Categorical(["a", "b", "c", "d"], ordered=True)) _min = cat.min() _max = cat.max() self.assertEqual(_min, "a") self.assertEqual(_max, "d") - cat = Series(Categorical(["a","b","c","d"], categories=['d','c','b','a'], ordered=True)) + cat = Series(Categorical(["a", "b", "c", "d"], categories=[ + 'd', 'c', 'b', 'a'], ordered=True)) _min = cat.min() _max = cat.max() self.assertEqual(_min, "d") self.assertEqual(_max, "a") - cat = Series(Categorical([np.nan,"b","c",np.nan], categories=['d','c','b','a'], ordered=True)) + cat = Series(Categorical( + [np.nan, "b", "c", np.nan], categories=['d', 'c', 'b', 'a' + ], ordered=True)) _min = cat.min() _max = cat.max() self.assertTrue(np.isnan(_min)) self.assertEqual(_max, "b") - cat = Series(Categorical([np.nan,1,2,np.nan], categories=[5,4,3,2,1], ordered=True)) + cat = Series(Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True)) _min = cat.min() _max = cat.max() self.assertTrue(np.isnan(_min)) self.assertEqual(_max, 1) def test_mode(self): - s = Series(Categorical([1,1,2,4,5,5,5], categories=[5,4,3,2,1], ordered=True)) + s = Series(Categorical([1, 1, 2, 4, 5, 5, 5], + categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([5], categories=[5,4,3,2,1], ordered=True)) + exp = Series(Categorical([5], categories=[ + 5, 4, 3, 2, 1], ordered=True)) tm.assert_series_equal(res, exp) - s = Series(Categorical([1,1,1,4,5,5,5], categories=[5,4,3,2,1], ordered=True)) + s = Series(Categorical([1, 1, 1, 4, 5, 5, 5], + categories=[5, 4, 3, 2, 1], ordered=True)) res = s.mode() - exp = Series(Categorical([5,1], categories=[5,4,3,2,1], ordered=True)) + exp = Series(Categorical([5, 1], categories=[ + 5, 4, 3, 2, 1], ordered=True)) tm.assert_series_equal(res, exp) - s = Series(Categorical([1,2,3,4,5], categories=[5,4,3,2,1], ordered=True)) + s = Series(Categorical([1, 2, 3, 4, 5], categories=[5, 4, 3, 2, 1], + ordered=True)) res = s.mode() - exp = Series(Categorical([], categories=[5,4,3,2,1], ordered=True)) + exp = Series(Categorical([], categories=[5, 4, 3, 2, 1], ordered=True)) tm.assert_series_equal(res, exp) def test_value_counts(self): - s = pd.Series(pd.Categorical(["a","b","c","c","c","b"], categories=["c","a","b","d"])) + s = pd.Series(pd.Categorical( + ["a", "b", "c", "c", "c", "b"], categories=["c", "a", "b", "d"])) res = s.value_counts(sort=False) - exp = Series([3,1,2,0], index=pd.CategoricalIndex(["c","a","b","d"])) + exp = Series([3, 1, 2, 0], + index=pd.CategoricalIndex(["c", "a", "b", "d"])) tm.assert_series_equal(res, exp) res = s.value_counts(sort=True) - exp = Series([3,2,1,0], index=pd.CategoricalIndex(["c","b","a","d"])) + exp = Series([3, 2, 1, 0], + index=pd.CategoricalIndex(["c", "b", "a", "d"])) tm.assert_series_equal(res, exp) def test_value_counts_with_nan(self): @@ -2481,40 +2736,50 @@ def test_value_counts_with_nan(self): # category, it should be last. tm.assert_series_equal( s.value_counts(dropna=False, sort=False), - pd.Series([2, 1, 3], index=pd.CategoricalIndex(["a", "b", np.nan]))) + pd.Series([2, 1, 3], + index=pd.CategoricalIndex(["a", "b", np.nan]))) with tm.assert_produces_warning(FutureWarning): - s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b", np.nan])) + s = pd.Series(pd.Categorical( + ["a", "b", "a"], categories=["a", "b", np.nan])) tm.assert_series_equal( s.value_counts(dropna=True), pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) tm.assert_series_equal( s.value_counts(dropna=False), - pd.Series([2, 1, 0], index=pd.CategoricalIndex(["a", "b", np.nan]))) + pd.Series([2, 1, 0], + index=pd.CategoricalIndex(["a", "b", np.nan]))) with tm.assert_produces_warning(FutureWarning): - s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None], - categories=["a", "b", np.nan])) + s = pd.Series(pd.Categorical( + ["a", "b", None, "a", None, None], categories=["a", "b", np.nan + ])) tm.assert_series_equal( s.value_counts(dropna=True), pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))) tm.assert_series_equal( s.value_counts(dropna=False), - pd.Series([3, 2, 1], index=pd.CategoricalIndex([np.nan, "a", "b"]))) + pd.Series([3, 2, 1], + index=pd.CategoricalIndex([np.nan, "a", "b"]))) def test_groupby(self): - cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], categories=["a","b","c","d"], ordered=True) - data = DataFrame({"a":[1,1,1,2,2,2,3,4,5], "b":cats}) + cats = Categorical( + ["a", "a", "a", "b", "b", "b", "c", "c", "c" + ], categories=["a", "b", "c", "d"], ordered=True) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) - expected = DataFrame({'a': Series([1, 2, 4, np.nan], - index=pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='b'))}) + expected = DataFrame({'a': Series( + [1, 2, 4, np.nan], index=pd.CategoricalIndex( + ['a', 'b', 'c', 'd'], name='b'))}) result = data.groupby("b").mean() tm.assert_frame_equal(result, expected) - raw_cat1 = Categorical(["a","a","b","b"], categories=["a","b","z"], ordered=True) - raw_cat2 = Categorical(["c","d","c","d"], categories=["c","d","y"], ordered=True) - df = DataFrame({"A":raw_cat1,"B":raw_cat2, "values":[1,2,3,4]}) + raw_cat1 = Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + raw_cat2 = Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]}) # single grouper gb = df.groupby("A") @@ -2524,60 +2789,63 @@ def test_groupby(self): tm.assert_frame_equal(result, expected) # multiple groupers - gb = df.groupby(['A','B']) - expected = DataFrame({ 'values' : Series([1,2,np.nan,3,4,np.nan,np.nan,np.nan,np.nan], - index=pd.MultiIndex.from_product([['a','b','z'],['c','d','y']],names=['A','B'])) }) + gb = df.groupby(['A', 'B']) + expected = DataFrame({'values': Series( + [1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan + ], index=pd.MultiIndex.from_product( + [['a', 'b', 'z'], ['c', 'd', 'y']], names=['A', 'B']))}) result = gb.sum() tm.assert_frame_equal(result, expected) # multiple groupers with a non-cat df = df.copy() - df['C'] = ['foo','bar']*2 - gb = df.groupby(['A','B','C']) - expected = DataFrame({ 'values' : - Series(np.nan,index=pd.MultiIndex.from_product([['a','b','z'], - ['c','d','y'], - ['foo','bar']], - names=['A','B','C'])) - }).sortlevel() - expected.iloc[[1,2,7,8],0] = [1,2,3,4] + df['C'] = ['foo', 'bar'] * 2 + gb = df.groupby(['A', 'B', 'C']) + expected = DataFrame({'values': Series( + np.nan, index=pd.MultiIndex.from_product( + [['a', 'b', 'z'], ['c', 'd', 'y'], ['foo', 'bar'] + ], names=['A', 'B', 'C']))}).sortlevel() + expected.iloc[[1, 2, 7, 8], 0] = [1, 2, 3, 4] result = gb.sum() tm.assert_frame_equal(result, expected) # GH 8623 - x=pd.DataFrame([[1,'John P. Doe'],[2,'Jane Dove'],[1,'John P. Doe']], - columns=['person_id','person_name']) + x = pd.DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'], + [1, 'John P. Doe']], + columns=['person_id', 'person_name']) x['person_name'] = pd.Categorical(x.person_name) g = x.groupby(['person_id']) - result = g.transform(lambda x:x) + result = g.transform(lambda x: x) tm.assert_frame_equal(result, x[['person_name']]) result = x.drop_duplicates('person_name') - expected = x.iloc[[0,1]] + expected = x.iloc[[0, 1]] tm.assert_frame_equal(result, expected) def f(x): return x.drop_duplicates('person_name').iloc[0] result = g.apply(f) - expected = x.iloc[[0,1]].copy() - expected.index = Index([1,2],name='person_id') + expected = x.iloc[[0, 1]].copy() + expected.index = Index([1, 2], name='person_id') expected['person_name'] = expected['person_name'].astype('object') tm.assert_frame_equal(result, expected) # GH 9921 # Monotonic df = DataFrame({"a": [5, 15, 25]}) - c = pd.cut(df.a, bins=[0,10,20,30,40]) + c = pd.cut(df.a, bins=[0, 10, 20, 30, 40]) result = df.a.groupby(c).transform(sum) tm.assert_series_equal(result, df['a'], check_names=False) self.assertTrue(result.name is None) - tm.assert_series_equal(df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) + tm.assert_series_equal( + df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']]) - tm.assert_frame_equal(df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']]) + tm.assert_frame_equal( + df.groupby(c).transform(lambda xs: np.max(xs)), df[['a']]) # Filter tm.assert_series_equal(df.a.groupby(c).filter(np.all), df['a']) @@ -2585,45 +2853,53 @@ def f(x): # Non-monotonic df = DataFrame({"a": [5, 15, 25, -5]}) - c = pd.cut(df.a, bins=[-10, 0,10,20,30,40]) + c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40]) result = df.a.groupby(c).transform(sum) tm.assert_series_equal(result, df['a'], check_names=False) self.assertTrue(result.name is None) - tm.assert_series_equal(df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) + tm.assert_series_equal( + df.a.groupby(c).transform(lambda xs: np.sum(xs)), df['a']) tm.assert_frame_equal(df.groupby(c).transform(sum), df[['a']]) - tm.assert_frame_equal(df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']]) + tm.assert_frame_equal( + df.groupby(c).transform(lambda xs: np.sum(xs)), df[['a']]) # GH 9603 df = pd.DataFrame({'a': [1, 0, 0, 0]}) c = pd.cut(df.a, [0, 1, 2, 3, 4]) result = df.groupby(c).apply(len) - expected = pd.Series([1, 0, 0, 0], index=pd.CategoricalIndex(c.values.categories)) + expected = pd.Series([1, 0, 0, 0], + index=pd.CategoricalIndex(c.values.categories)) expected.index.name = 'a' tm.assert_series_equal(result, expected) def test_pivot_table(self): - raw_cat1 = Categorical(["a","a","b","b"], categories=["a","b","z"], ordered=True) - raw_cat2 = Categorical(["c","d","c","d"], categories=["c","d","y"], ordered=True) - df = DataFrame({"A":raw_cat1,"B":raw_cat2, "values":[1,2,3,4]}) + raw_cat1 = Categorical(["a", "a", "b", "b"], + categories=["a", "b", "z"], ordered=True) + raw_cat2 = Categorical(["c", "d", "c", "d"], + categories=["c", "d", "y"], ordered=True) + df = DataFrame({"A": raw_cat1, "B": raw_cat2, "values": [1, 2, 3, 4]}) result = pd.pivot_table(df, values='values', index=['A', 'B']) - expected = Series([1,2,np.nan,3,4,np.nan,np.nan,np.nan,np.nan], - index=pd.MultiIndex.from_product([['a','b','z'],['c','d','y']],names=['A','B']), + expected = Series([1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan], + index=pd.MultiIndex.from_product( + [['a', 'b', 'z'], ['c', 'd', 'y']], + names=['A', 'B']), name='values') tm.assert_series_equal(result, expected) def test_count(self): - s = Series(Categorical([np.nan,1,2,np.nan], categories=[5,4,3,2,1], ordered=True)) + s = Series(Categorical([np.nan, 1, 2, np.nan], + categories=[5, 4, 3, 2, 1], ordered=True)) result = s.count() self.assertEqual(result, 2) def test_sort(self): - c = Categorical(["a","b","b","a"], ordered=False) + c = Categorical(["a", "b", "b", "a"], ordered=False) cat = Series(c) # 9816 deprecated @@ -2631,28 +2907,36 @@ def test_sort(self): c.order() # sort in the categories order - expected = Series(Categorical(["a","a","b","b"], ordered=False),index=[0,3,1,2]) + expected = Series( + Categorical(["a", "a", "b", "b"], + ordered=False), index=[0, 3, 1, 2]) result = cat.sort_values() tm.assert_series_equal(result, expected) - cat = Series(Categorical(["a","c","b","d"], ordered=True)) + cat = Series(Categorical(["a", "c", "b", "d"], ordered=True)) res = cat.sort_values() - exp = np.array(["a","b","c","d"]) + exp = np.array(["a", "b", "c", "d"]) self.assert_numpy_array_equal(res.__array__(), exp) - cat = Series(Categorical(["a","c","b","d"], categories=["a","b","c","d"], ordered=True)) + cat = Series(Categorical(["a", "c", "b", "d"], categories=[ + "a", "b", "c", "d"], ordered=True)) res = cat.sort_values() - exp = np.array(["a","b","c","d"]) + exp = np.array(["a", "b", "c", "d"]) self.assert_numpy_array_equal(res.__array__(), exp) res = cat.sort_values(ascending=False) - exp = np.array(["d","c","b","a"]) + exp = np.array(["d", "c", "b", "a"]) self.assert_numpy_array_equal(res.__array__(), exp) - raw_cat1 = Categorical(["a","b","c","d"], categories=["a","b","c","d"], ordered=False) - raw_cat2 = Categorical(["a","b","c","d"], categories=["d","c","b","a"], ordered=True) - s = ["a","b","c","d"] - df = DataFrame({"unsort":raw_cat1,"sort":raw_cat2, "string":s, "values":[1,2,3,4]}) + raw_cat1 = Categorical(["a", "b", "c", "d"], + categories=["a", "b", "c", "d"], ordered=False) + raw_cat2 = Categorical(["a", "b", "c", "d"], + categories=["d", "c", "b", "a"], ordered=True) + s = ["a", "b", "c", "d"] + df = DataFrame({"unsort": raw_cat1, + "sort": raw_cat2, + "string": s, + "values": [1, 2, 3, 4]}) # Cats must be sorted in a dataframe res = df.sort_values(by=["string"], ascending=False) @@ -2671,78 +2955,79 @@ def test_sort(self): # multi-columns sort # GH 7848 - df = DataFrame({"id":[6,5,4,3,2,1], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + df = DataFrame({"id": [6, 5, 4, 3, 2, 1], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) df["grade"] = pd.Categorical(df["raw_grade"], ordered=True) df['grade'] = df['grade'].cat.set_categories(['b', 'e', 'a']) # sorts 'grade' according to the order of the categories result = df.sort_values(by=['grade']) - expected = df.iloc[[1,2,5,0,3,4]] - tm.assert_frame_equal(result,expected) + expected = df.iloc[[1, 2, 5, 0, 3, 4]] + tm.assert_frame_equal(result, expected) # multi result = df.sort_values(by=['grade', 'id']) - expected = df.iloc[[2,1,5,4,3,0]] - tm.assert_frame_equal(result,expected) + expected = df.iloc[[2, 1, 5, 4, 3, 0]] + tm.assert_frame_equal(result, expected) # reverse - cat = Categorical(["a","c","c","b","d"], ordered=True) + cat = Categorical(["a", "c", "c", "b", "d"], ordered=True) res = cat.sort_values(ascending=False) - exp_val = np.array(["d","c", "c", "b","a"],dtype=object) - exp_categories = np.array(["a","b","c","d"],dtype=object) + exp_val = np.array(["d", "c", "c", "b", "a"], dtype=object) + exp_categories = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) # some NaN positions - cat = Categorical(["a","c","b","d", np.nan], ordered=True) + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) res = cat.sort_values(ascending=False, na_position='last') - exp_val = np.array(["d","c","b","a", np.nan],dtype=object) - exp_categories = np.array(["a","b","c","d"],dtype=object) + exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) + exp_categories = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) - cat = Categorical(["a","c","b","d", np.nan], ordered=True) + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) res = cat.sort_values(ascending=False, na_position='first') - exp_val = np.array([np.nan, "d","c","b","a"],dtype=object) - exp_categories = np.array(["a","b","c","d"],dtype=object) + exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) + exp_categories = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) - cat = Categorical(["a","c","b","d", np.nan], ordered=True) + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) res = cat.sort_values(ascending=False, na_position='first') - exp_val = np.array([np.nan, "d","c","b","a"],dtype=object) - exp_categories = np.array(["a","b","c","d"],dtype=object) + exp_val = np.array([np.nan, "d", "c", "b", "a"], dtype=object) + exp_categories = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) - cat = Categorical(["a","c","b","d", np.nan], ordered=True) + cat = Categorical(["a", "c", "b", "d", np.nan], ordered=True) res = cat.sort_values(ascending=False, na_position='last') - exp_val = np.array(["d","c","b","a",np.nan],dtype=object) - exp_categories = np.array(["a","b","c","d"],dtype=object) + exp_val = np.array(["d", "c", "b", "a", np.nan], dtype=object) + exp_categories = np.array(["a", "b", "c", "d"], dtype=object) self.assert_numpy_array_equal(res.__array__(), exp_val) self.assert_numpy_array_equal(res.categories, exp_categories) def test_slicing(self): - cat = Series(Categorical([1,2,3,4])) + cat = Series(Categorical([1, 2, 3, 4])) reversed = cat[::-1] - exp = np.array([4,3,2,1]) + exp = np.array([4, 3, 2, 1]) self.assert_numpy_array_equal(reversed.__array__(), exp) - df = DataFrame({'value': (np.arange(100)+1).astype('int64')}) - df['D'] = pd.cut(df.value, bins=[0,25,50,75,100]) + df = DataFrame({'value': (np.arange(100) + 1).astype('int64')}) + df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100]) - expected = Series([11,'(0, 25]'], index=['value','D'], name=10) + expected = Series([11, '(0, 25]'], index=['value', 'D'], name=10) result = df.iloc[10] tm.assert_series_equal(result, expected) - expected = DataFrame({'value': np.arange(11,21).astype('int64')}, - index=np.arange(10,20).astype('int64')) - expected['D'] = pd.cut(expected.value, bins=[0,25,50,75,100]) + expected = DataFrame({'value': np.arange(11, 21).astype('int64')}, + index=np.arange(10, 20).astype('int64')) + expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100]) result = df.iloc[10:20] tm.assert_frame_equal(result, expected) - expected = Series([9,'(0, 25]'],index=['value', 'D'], name=8) + expected = Series([9, '(0, 25]'], index=['value', 'D'], name=8) result = df.loc[8] tm.assert_series_equal(result, expected) @@ -2755,107 +3040,109 @@ def test_slicing_and_getting_ops(self): # - returning a row # - returning a single value - cats = pd.Categorical(["a","c","b","c","c","c","c"], categories=["a","b","c"]) - idx = pd.Index(["h","i","j","k","l","m","n"]) - values= [1,2,3,4,5,6,7] - df = pd.DataFrame({"cats":cats,"values":values}, index=idx) + cats = pd.Categorical( + ["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"]) + idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 2, 3, 4, 5, 6, 7] + df = pd.DataFrame({"cats": cats, "values": values}, index=idx) # the expected values - cats2 = pd.Categorical(["b","c"], categories=["a","b","c"]) - idx2 = pd.Index(["j","k"]) - values2= [3,4] + cats2 = pd.Categorical(["b", "c"], categories=["a", "b", "c"]) + idx2 = pd.Index(["j", "k"]) + values2 = [3, 4] # 2:4,: | "j":"k",: - exp_df = pd.DataFrame({"cats":cats2,"values":values2}, index=idx2) + exp_df = pd.DataFrame({"cats": cats2, "values": values2}, index=idx2) # :,"cats" | :,0 - exp_col = pd.Series(cats,index=idx,name='cats') + exp_col = pd.Series(cats, index=idx, name='cats') # "j",: | 2,: - exp_row = pd.Series(["b",3], index=["cats","values"], dtype="object", name="j") + exp_row = pd.Series(["b", 3], index=["cats", "values"], dtype="object", + name="j") # "j","cats | 2,0 exp_val = "b" # iloc # frame - res_df = df.iloc[2:4,:] + res_df = df.iloc[2:4, :] tm.assert_frame_equal(res_df, exp_df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) # row - res_row = df.iloc[2,:] + res_row = df.iloc[2, :] tm.assert_series_equal(res_row, exp_row) tm.assertIsInstance(res_row["cats"], compat.string_types) # col - res_col = df.iloc[:,0] + res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) self.assertTrue(com.is_categorical_dtype(res_col)) # single value - res_val = df.iloc[2,0] + res_val = df.iloc[2, 0] self.assertEqual(res_val, exp_val) # loc # frame - res_df = df.loc["j":"k",:] + res_df = df.loc["j":"k", :] tm.assert_frame_equal(res_df, exp_df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) # row - res_row = df.loc["j",:] + res_row = df.loc["j", :] tm.assert_series_equal(res_row, exp_row) tm.assertIsInstance(res_row["cats"], compat.string_types) # col - res_col = df.loc[:,"cats"] + res_col = df.loc[:, "cats"] tm.assert_series_equal(res_col, exp_col) self.assertTrue(com.is_categorical_dtype(res_col)) # single value - res_val = df.loc["j","cats"] + res_val = df.loc["j", "cats"] self.assertEqual(res_val, exp_val) # ix # frame - #res_df = df.ix["j":"k",[0,1]] # doesn't work? - res_df = df.ix["j":"k",:] + # res_df = df.ix["j":"k",[0,1]] # doesn't work? + res_df = df.ix["j":"k", :] tm.assert_frame_equal(res_df, exp_df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) # row - res_row = df.ix["j",:] + res_row = df.ix["j", :] tm.assert_series_equal(res_row, exp_row) tm.assertIsInstance(res_row["cats"], compat.string_types) # col - res_col = df.ix[:,"cats"] + res_col = df.ix[:, "cats"] tm.assert_series_equal(res_col, exp_col) self.assertTrue(com.is_categorical_dtype(res_col)) # single value - res_val = df.ix["j",0] + res_val = df.ix["j", 0] self.assertEqual(res_val, exp_val) # iat - res_val = df.iat[2,0] + res_val = df.iat[2, 0] self.assertEqual(res_val, exp_val) # at - res_val = df.at["j","cats"] + res_val = df.at["j", "cats"] self.assertEqual(res_val, exp_val) # fancy indexing exp_fancy = df.iloc[[2]] res_fancy = df[df["cats"] == "b"] - tm.assert_frame_equal(res_fancy,exp_fancy) + tm.assert_frame_equal(res_fancy, exp_fancy) res_fancy = df[df["values"] == 3] - tm.assert_frame_equal(res_fancy,exp_fancy) + tm.assert_frame_equal(res_fancy, exp_fancy) # get_value - res_val = df.get_value("j","cats") + res_val = df.get_value("j", "cats") self.assertEqual(res_val, exp_val) # i : int, slice, or sequence of integers @@ -2863,372 +3150,430 @@ def test_slicing_and_getting_ops(self): tm.assert_series_equal(res_row, exp_row) tm.assertIsInstance(res_row["cats"], compat.string_types) - res_df = df.iloc[slice(2,4)] + res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) - res_df = df.iloc[[2,3]] + res_df = df.iloc[[2, 3]] tm.assert_frame_equal(res_df, exp_df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) - res_col = df.iloc[:,0] + res_col = df.iloc[:, 0] tm.assert_series_equal(res_col, exp_col) self.assertTrue(com.is_categorical_dtype(res_col)) - res_df = df.iloc[:,slice(0,2)] + res_df = df.iloc[:, slice(0, 2)] tm.assert_frame_equal(res_df, df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) - res_df = df.iloc[:,[0,1]] + res_df = df.iloc[:, [0, 1]] tm.assert_frame_equal(res_df, df) self.assertTrue(com.is_categorical_dtype(res_df["cats"])) def test_slicing_doc_examples(self): - #GH 7918 - cats = Categorical(["a","b","b","b","c","c","c"], categories=["a","b","c"]) - idx = Index(["h","i","j","k","l","m","n",]) - values= [1,2,2,2,3,4,5] - df = DataFrame({"cats":cats,"values":values}, index=idx) - - result = df.iloc[2:4,:] - expected = DataFrame({"cats":Categorical(['b','b'],categories=['a','b','c']),"values":[2,2]}, index=['j','k']) + # GH 7918 + cats = Categorical( + ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c"]) + idx = Index(["h", "i", "j", "k", "l", "m", "n", ]) + values = [1, 2, 2, 2, 3, 4, 5] + df = DataFrame({"cats": cats, "values": values}, index=idx) + + result = df.iloc[2:4, :] + expected = DataFrame( + {"cats": Categorical( + ['b', 'b'], categories=['a', 'b', 'c']), + "values": [2, 2]}, index=['j', 'k']) tm.assert_frame_equal(result, expected) - result = df.iloc[2:4,:].dtypes - expected = Series(['category','int64'],['cats','values']) + result = df.iloc[2:4, :].dtypes + expected = Series(['category', 'int64'], ['cats', 'values']) tm.assert_series_equal(result, expected) - result = df.loc["h":"j","cats"] - expected = Series(Categorical(['a','b','b'], - categories=['a','b','c']), index=['h','i','j'], name='cats') + result = df.loc["h":"j", "cats"] + expected = Series(Categorical(['a', 'b', 'b'], + categories=['a', 'b', 'c']), + index=['h', 'i', 'j'], name='cats') tm.assert_series_equal(result, expected) - result = df.ix["h":"j",0:1] - expected = DataFrame({'cats' : Series(Categorical(['a','b','b'],categories=['a','b','c']),index=['h','i','j']) }) + result = df.ix["h":"j", 0:1] + expected = DataFrame({'cats': Series( + Categorical( + ['a', 'b', 'b'], categories=['a', 'b', 'c']), index=['h', 'i', + 'j'])}) tm.assert_frame_equal(result, expected) def test_assigning_ops(self): - # systematically test the assigning operations: # for all slicing ops: # for value in categories and value not in categories: + # - assign a single value -> exp_single_cats_value + # - assign a complete row (mixed values) -> exp_single_row - # - assign multiple rows (mixed values) (-> array) -> exp_multi_row - # - assign a part of a column with dtype == categorical -> exp_parts_cats_col - # - assign a part of a column with dtype != categorical -> exp_parts_cats_col - cats = pd.Categorical(["a","a","a","a","a","a","a"], categories=["a","b"]) - idx = pd.Index(["h","i","j","k","l","m","n"]) - values = [1,1,1,1,1,1,1] - orig = pd.DataFrame({"cats":cats,"values":values}, index=idx) + # assign multiple rows (mixed values) (-> array) -> exp_multi_row - ### the expected values - # changed single row - cats1 = pd.Categorical(["a","a","b","a","a","a","a"], categories=["a","b"]) - idx1 = pd.Index(["h","i","j","k","l","m","n"]) - values1 = [1,1,2,1,1,1,1] - exp_single_row = pd.DataFrame({"cats":cats1,"values":values1}, index=idx1) + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col + + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col - #changed multiple rows - cats2 = pd.Categorical(["a","a","b","b","a","a","a"], categories=["a","b"]) - idx2 = pd.Index(["h","i","j","k","l","m","n"]) - values2 = [1,1,2,2,1,1,1] - exp_multi_row = pd.DataFrame({"cats":cats2,"values":values2}, index=idx2) + cats = pd.Categorical( + ["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"]) + idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + values = [1, 1, 1, 1, 1, 1, 1] + orig = pd.DataFrame({"cats": cats, "values": values}, index=idx) + + # the expected values + # changed single row + cats1 = pd.Categorical( + ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx1 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + values1 = [1, 1, 2, 1, 1, 1, 1] + exp_single_row = pd.DataFrame( + {"cats": cats1, + "values": values1}, index=idx1) + + # changed multiple rows + cats2 = pd.Categorical( + ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx2 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + values2 = [1, 1, 2, 2, 1, 1, 1] + exp_multi_row = pd.DataFrame( + {"cats": cats2, + "values": values2}, index=idx2) # changed part of the cats column - cats3 = pd.Categorical(["a","a","b","b","a","a","a"], categories=["a","b"]) - idx3 = pd.Index(["h","i","j","k","l","m","n"]) - values3 = [1,1,1,1,1,1,1] - exp_parts_cats_col = pd.DataFrame({"cats":cats3,"values":values3}, index=idx3) + cats3 = pd.Categorical( + ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"]) + idx3 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + values3 = [1, 1, 1, 1, 1, 1, 1] + exp_parts_cats_col = pd.DataFrame( + {"cats": cats3, + "values": values3}, index=idx3) # changed single value in cats col - cats4 = pd.Categorical(["a","a","b","a","a","a","a"], categories=["a","b"]) - idx4 = pd.Index(["h","i","j","k","l","m","n"]) - values4 = [1,1,1,1,1,1,1] - exp_single_cats_value = pd.DataFrame({"cats":cats4,"values":values4}, index=idx4) - - #### iloc ##### - ################ + cats4 = pd.Categorical( + ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"]) + idx4 = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + values4 = [1, 1, 1, 1, 1, 1, 1] + exp_single_cats_value = pd.DataFrame( + {"cats": cats4, + "values": values4}, index=idx4) + + # iloc + # ############### # - assign a single value -> exp_single_cats_value df = orig.copy() - df.iloc[2,0] = "b" + df.iloc[2, 0] = "b" tm.assert_frame_equal(df, exp_single_cats_value) - df = orig.copy() - df.iloc[df.index == "j",0] = "b" + df.iloc[df.index == "j", 0] = "b" tm.assert_frame_equal(df, exp_single_cats_value) - # - assign a single value not in the current categories set def f(): df = orig.copy() - df.iloc[2,0] = "c" + df.iloc[2, 0] = "c" + self.assertRaises(ValueError, f) # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() - df.iloc[2,:] = ["b",2] + df.iloc[2, :] = ["b", 2] tm.assert_frame_equal(df, exp_single_row) # - assign a complete row (mixed values) not in categories set def f(): df = orig.copy() - df.iloc[2,:] = ["c",2] + df.iloc[2, :] = ["c", 2] + self.assertRaises(ValueError, f) # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() - df.iloc[2:4,:] = [["b",2],["b",2]] + df.iloc[2:4, :] = [["b", 2], ["b", 2]] tm.assert_frame_equal(df, exp_multi_row) def f(): df = orig.copy() - df.iloc[2:4,:] = [["c",2],["c",2]] + df.iloc[2:4, :] = [["c", 2], ["c", 2]] + self.assertRaises(ValueError, f) - # - assign a part of a column with dtype == categorical -> exp_parts_cats_col + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col df = orig.copy() - df.iloc[2:4,0] = pd.Categorical(["b","b"], categories=["a","b"]) + df.iloc[2:4, 0] = pd.Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() - df.iloc[2:4,0] = pd.Categorical(["b","b"], categories=["a","b","c"]) + df.iloc[2:4, 0] = pd.Categorical( + ["b", "b"], categories=["a", "b", "c"]) with tm.assertRaises(ValueError): # different values df = orig.copy() - df.iloc[2:4,0] = pd.Categorical(["c","c"], categories=["a","b","c"]) + df.iloc[2:4, 0] = pd.Categorical( + ["c", "c"], categories=["a", "b", "c"]) - # - assign a part of a column with dtype != categorical -> exp_parts_cats_col + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col df = orig.copy() - df.iloc[2:4,0] = ["b","b"] + df.iloc[2:4, 0] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): - df.iloc[2:4,0] = ["c","c"] + df.iloc[2:4, 0] = ["c", "c"] - #### loc ##### - ################ + # loc + # ############## # - assign a single value -> exp_single_cats_value df = orig.copy() - df.loc["j","cats"] = "b" + df.loc["j", "cats"] = "b" tm.assert_frame_equal(df, exp_single_cats_value) df = orig.copy() - df.loc[df.index == "j","cats"] = "b" + df.loc[df.index == "j", "cats"] = "b" tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set def f(): df = orig.copy() - df.loc["j","cats"] = "c" + df.loc["j", "cats"] = "c" + self.assertRaises(ValueError, f) # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() - df.loc["j",:] = ["b",2] + df.loc["j", :] = ["b", 2] tm.assert_frame_equal(df, exp_single_row) # - assign a complete row (mixed values) not in categories set def f(): df = orig.copy() - df.loc["j",:] = ["c",2] + df.loc["j", :] = ["c", 2] + self.assertRaises(ValueError, f) # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() - df.loc["j":"k",:] = [["b",2],["b",2]] + df.loc["j":"k", :] = [["b", 2], ["b", 2]] tm.assert_frame_equal(df, exp_multi_row) def f(): df = orig.copy() - df.loc["j":"k",:] = [["c",2],["c",2]] + df.loc["j":"k", :] = [["c", 2], ["c", 2]] + self.assertRaises(ValueError, f) - # - assign a part of a column with dtype == categorical -> exp_parts_cats_col + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col df = orig.copy() - df.loc["j":"k","cats"] = pd.Categorical(["b","b"], categories=["a","b"]) + df.loc["j":"k", "cats"] = pd.Categorical( + ["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() - df.loc["j":"k","cats"] = pd.Categorical(["b","b"], categories=["a","b","c"]) + df.loc["j":"k", "cats"] = pd.Categorical( + ["b", "b"], categories=["a", "b", "c"]) with tm.assertRaises(ValueError): # different values df = orig.copy() - df.loc["j":"k","cats"] = pd.Categorical(["c","c"], categories=["a","b","c"]) + df.loc["j":"k", "cats"] = pd.Categorical( + ["c", "c"], categories=["a", "b", "c"]) - # - assign a part of a column with dtype != categorical -> exp_parts_cats_col + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col df = orig.copy() - df.loc["j":"k","cats"] = ["b","b"] + df.loc["j":"k", "cats"] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): - df.loc["j":"k","cats"] = ["c","c"] + df.loc["j":"k", "cats"] = ["c", "c"] - #### ix ##### - ################ + # ix + # ############## # - assign a single value -> exp_single_cats_value df = orig.copy() - df.ix["j",0] = "b" + df.ix["j", 0] = "b" tm.assert_frame_equal(df, exp_single_cats_value) df = orig.copy() - df.ix[df.index == "j",0] = "b" + df.ix[df.index == "j", 0] = "b" tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set def f(): df = orig.copy() - df.ix["j",0] = "c" + df.ix["j", 0] = "c" + self.assertRaises(ValueError, f) # - assign a complete row (mixed values) -> exp_single_row df = orig.copy() - df.ix["j",:] = ["b",2] + df.ix["j", :] = ["b", 2] tm.assert_frame_equal(df, exp_single_row) # - assign a complete row (mixed values) not in categories set def f(): df = orig.copy() - df.ix["j",:] = ["c",2] + df.ix["j", :] = ["c", 2] + self.assertRaises(ValueError, f) # - assign multiple rows (mixed values) -> exp_multi_row df = orig.copy() - df.ix["j":"k",:] = [["b",2],["b",2]] + df.ix["j":"k", :] = [["b", 2], ["b", 2]] tm.assert_frame_equal(df, exp_multi_row) def f(): df = orig.copy() - df.ix["j":"k",:] = [["c",2],["c",2]] + df.ix["j":"k", :] = [["c", 2], ["c", 2]] + self.assertRaises(ValueError, f) - # - assign a part of a column with dtype == categorical -> exp_parts_cats_col + # assign a part of a column with dtype == categorical -> + # exp_parts_cats_col df = orig.copy() - df.ix["j":"k",0] = pd.Categorical(["b","b"], categories=["a","b"]) + df.ix["j":"k", 0] = pd.Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): # different categories -> not sure if this should fail or pass df = orig.copy() - df.ix["j":"k",0] = pd.Categorical(["b","b"], categories=["a","b","c"]) + df.ix["j":"k", 0] = pd.Categorical( + ["b", "b"], categories=["a", "b", "c"]) with tm.assertRaises(ValueError): # different values df = orig.copy() - df.ix["j":"k",0] = pd.Categorical(["c","c"], categories=["a","b","c"]) + df.ix["j":"k", 0] = pd.Categorical( + ["c", "c"], categories=["a", "b", "c"]) - # - assign a part of a column with dtype != categorical -> exp_parts_cats_col + # assign a part of a column with dtype != categorical -> + # exp_parts_cats_col df = orig.copy() - df.ix["j":"k",0] = ["b","b"] + df.ix["j":"k", 0] = ["b", "b"] tm.assert_frame_equal(df, exp_parts_cats_col) with tm.assertRaises(ValueError): - df.ix["j":"k",0] = ["c","c"] + df.ix["j":"k", 0] = ["c", "c"] # iat df = orig.copy() - df.iat[2,0] = "b" + df.iat[2, 0] = "b" tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set def f(): df = orig.copy() - df.iat[2,0] = "c" + df.iat[2, 0] = "c" + self.assertRaises(ValueError, f) # at # - assign a single value -> exp_single_cats_value df = orig.copy() - df.at["j","cats"] = "b" + df.at["j", "cats"] = "b" tm.assert_frame_equal(df, exp_single_cats_value) # - assign a single value not in the current categories set def f(): df = orig.copy() - df.at["j","cats"] = "c" + df.at["j", "cats"] = "c" + self.assertRaises(ValueError, f) # fancy indexing - catsf = pd.Categorical(["a","a","c","c","a","a","a"], categories=["a","b","c"]) - idxf = pd.Index(["h","i","j","k","l","m","n"]) - valuesf = [1,1,3,3,1,1,1] - df = pd.DataFrame({"cats":catsf,"values":valuesf}, index=idxf) + catsf = pd.Categorical( + ["a", "a", "c", "c", "a", "a", "a"], categories=["a", "b", "c"]) + idxf = pd.Index(["h", "i", "j", "k", "l", "m", "n"]) + valuesf = [1, 1, 3, 3, 1, 1, 1] + df = pd.DataFrame({"cats": catsf, "values": valuesf}, index=idxf) exp_fancy = exp_multi_row.copy() - exp_fancy["cats"].cat.set_categories(["a","b","c"], inplace=True) + exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True) - df[df["cats"] == "c"] = ["b",2] + df[df["cats"] == "c"] = ["b", 2] tm.assert_frame_equal(df, exp_multi_row) # set_value df = orig.copy() - df.set_value("j","cats", "b") + df.set_value("j", "cats", "b") tm.assert_frame_equal(df, exp_single_cats_value) def f(): df = orig.copy() - df.set_value("j","cats", "c") + df.set_value("j", "cats", "c") + self.assertRaises(ValueError, f) - # Assigning a Category to parts of a int/... column uses the values of the Catgorical - df = pd.DataFrame({"a":[1,1,1,1,1], "b":["a","a","a","a","a"]}) - exp = pd.DataFrame({"a":[1,"b","b",1,1], "b":["a","a","b","b","a"]}) - df.loc[1:2,"a"] = pd.Categorical(["b","b"], categories=["a","b"]) - df.loc[2:3,"b"] = pd.Categorical(["b","b"], categories=["a","b"]) + # Assigning a Category to parts of a int/... column uses the values of + # the Catgorical + df = pd.DataFrame({"a": [1, 1, 1, 1, 1], + "b": ["a", "a", "a", "a", "a"]}) + exp = pd.DataFrame({"a": [1, "b", "b", 1, 1], + "b": ["a", "a", "b", "b", "a"]}) + df.loc[1:2, "a"] = pd.Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = pd.Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) - ######### Series ########## - orig = Series(pd.Categorical(["b","b"], categories=["a","b"])) + # Series + orig = Series(pd.Categorical(["b", "b"], categories=["a", "b"])) s = orig.copy() s[:] = "a" - exp = Series(pd.Categorical(["a","a"], categories=["a","b"])) + exp = Series(pd.Categorical(["a", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s[1] = "a" - exp = Series(pd.Categorical(["b","a"], categories=["a","b"])) + exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s[s.index > 0] = "a" - exp = Series(pd.Categorical(["b","a"], categories=["a","b"])) + exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s[[False, True]] = "a" - exp = Series(pd.Categorical(["b","a"], categories=["a","b"])) + exp = Series(pd.Categorical(["b", "a"], categories=["a", "b"])) tm.assert_series_equal(s, exp) s = orig.copy() s.index = ["x", "y"] s["y"] = "a" - exp = Series(pd.Categorical(["b","a"], categories=["a","b"]), index=["x", "y"]) + exp = Series( + pd.Categorical(["b", "a"], + categories=["a", "b"]), index=["x", "y"]) tm.assert_series_equal(s, exp) # ensure that one can set something to np.nan - s = Series(Categorical([1,2,3])) - exp = Series(Categorical([1,np.nan,3])) + s = Series(Categorical([1, 2, 3])) + exp = Series(Categorical([1, np.nan, 3])) s[1] = np.nan tm.assert_series_equal(s, exp) - def test_comparisons(self): tests_data = [(list("abc"), list("cba"), list("bbb")), - ([1,2,3], [3,2,1], [2,2,2])] - for data , reverse, base in tests_data: - cat_rev = pd.Series(pd.Categorical(data, categories=reverse, ordered=True)) - cat_rev_base = pd.Series(pd.Categorical(base, categories=reverse, ordered=True)) + ([1, 2, 3], [3, 2, 1], [2, 2, 2])] + for data, reverse, base in tests_data: + cat_rev = pd.Series(pd.Categorical(data, categories=reverse, + ordered=True)) + cat_rev_base = pd.Series(pd.Categorical(base, categories=reverse, + ordered=True)) cat = pd.Series(pd.Categorical(data, ordered=True)) - cat_base = pd.Series(pd.Categorical(base, categories=cat.cat.categories, ordered=True)) + cat_base = pd.Series(pd.Categorical( + base, categories=cat.cat.categories, ordered=True)) s = Series(base) a = np.array(base) @@ -3260,10 +3605,11 @@ def test_comparisons(self): # Only categories with same categories can be compared def f(): cat > cat_rev + self.assertRaises(TypeError, f) - # categorical cannot be compared to Series or numpy array, and also not the other way - # around + # categorical cannot be compared to Series or numpy array, and also + # not the other way around self.assertRaises(TypeError, lambda: cat > s) self.assertRaises(TypeError, lambda: cat_rev > s) self.assertRaises(TypeError, lambda: cat > a) @@ -3277,17 +3623,21 @@ def f(): # unequal comparison should raise for unordered cats cat = Series(Categorical(list("abc"))) + def f(): cat > "b" + self.assertRaises(TypeError, f) cat = Series(Categorical(list("abc"), ordered=False)) + def f(): cat > "b" + self.assertRaises(TypeError, f) - # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 and following - # comparisons with scalars not in categories should raise for unequal comps, but not for - # equal/not equal + # https://github.com/pydata/pandas/issues/9836#issuecomment-92123057 + # and following comparisons with scalars not in categories should raise + # for unequal comps, but not for equal/not equal cat = Series(Categorical(list("abc"), ordered=True)) self.assertRaises(TypeError, lambda: cat < "d") @@ -3295,12 +3645,11 @@ def f(): self.assertRaises(TypeError, lambda: "d" < cat) self.assertRaises(TypeError, lambda: "d" > cat) - self.assert_series_equal(cat == "d" , Series([False, False, False])) - self.assert_series_equal(cat != "d" , Series([True, True, True])) - + self.assert_series_equal(cat == "d", Series([False, False, False])) + self.assert_series_equal(cat != "d", Series([True, True, True])) # And test NaN handling... - cat = Series(Categorical(["a","b","c", np.nan])) + cat = Series(Categorical(["a", "b", "c", np.nan])) exp = Series([True, True, True, False]) res = (cat == cat) tm.assert_series_equal(res, exp) @@ -3309,47 +3658,47 @@ def test_cat_equality(self): # GH 8938 # allow equality comparisons - a = Series(list('abc'),dtype="category") - b = Series(list('abc'),dtype="object") - c = Series(['a','b','cc'],dtype="object") - d = Series(list('acb'),dtype="object") + a = Series(list('abc'), dtype="category") + b = Series(list('abc'), dtype="object") + c = Series(['a', 'b', 'cc'], dtype="object") + d = Series(list('acb'), dtype="object") e = Categorical(list('abc')) f = Categorical(list('acb')) # vs scalar - self.assertFalse((a=='a').all()) - self.assertTrue(((a!='a') == ~(a=='a')).all()) + self.assertFalse((a == 'a').all()) + self.assertTrue(((a != 'a') == ~(a == 'a')).all()) - self.assertFalse(('a'==a).all()) - self.assertTrue((a=='a')[0]) - self.assertTrue(('a'==a)[0]) - self.assertFalse(('a'!=a)[0]) + self.assertFalse(('a' == a).all()) + self.assertTrue((a == 'a')[0]) + self.assertTrue(('a' == a)[0]) + self.assertFalse(('a' != a)[0]) # vs list-like - self.assertTrue((a==a).all()) - self.assertFalse((a!=a).all()) + self.assertTrue((a == a).all()) + self.assertFalse((a != a).all()) - self.assertTrue((a==list(a)).all()) - self.assertTrue((a==b).all()) - self.assertTrue((b==a).all()) - self.assertTrue(((~(a==b))==(a!=b)).all()) - self.assertTrue(((~(b==a))==(b!=a)).all()) + self.assertTrue((a == list(a)).all()) + self.assertTrue((a == b).all()) + self.assertTrue((b == a).all()) + self.assertTrue(((~(a == b)) == (a != b)).all()) + self.assertTrue(((~(b == a)) == (b != a)).all()) - self.assertFalse((a==c).all()) - self.assertFalse((c==a).all()) - self.assertFalse((a==d).all()) - self.assertFalse((d==a).all()) + self.assertFalse((a == c).all()) + self.assertFalse((c == a).all()) + self.assertFalse((a == d).all()) + self.assertFalse((d == a).all()) # vs a cat-like - self.assertTrue((a==e).all()) - self.assertTrue((e==a).all()) - self.assertFalse((a==f).all()) - self.assertFalse((f==a).all()) + self.assertTrue((a == e).all()) + self.assertTrue((e == a).all()) + self.assertFalse((a == f).all()) + self.assertFalse((f == a).all()) - self.assertTrue(((~(a==e)==(a!=e)).all())) - self.assertTrue(((~(e==a)==(e!=a)).all())) - self.assertTrue(((~(a==f)==(a!=f)).all())) - self.assertTrue(((~(f==a)==(f!=a)).all())) + self.assertTrue(((~(a == e) == (a != e)).all())) + self.assertTrue(((~(e == a) == (e != a)).all())) + self.assertTrue(((~(a == f) == (a != f)).all())) + self.assertTrue(((~(f == a) == (f != a)).all())) # non-equality is not comparable self.assertRaises(TypeError, lambda: a < b) @@ -3358,109 +3707,147 @@ def test_cat_equality(self): self.assertRaises(TypeError, lambda: b > a) def test_concat(self): - cat = pd.Categorical(["a","b"], categories=["a","b"]) - vals = [1,2] - df = pd.DataFrame({"cats":cat, "vals":vals}) - cat2 = pd.Categorical(["a","b","a","b"], categories=["a","b"]) - vals2 = [1,2,1,2] - exp = pd.DataFrame({"cats":cat2, "vals":vals2}, index=pd.Index([0, 1, 0, 1])) - - res = pd.concat([df,df]) + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + vals = [1, 2] + df = pd.DataFrame({"cats": cat, "vals": vals}) + cat2 = pd.Categorical(["a", "b", "a", "b"], categories=["a", "b"]) + vals2 = [1, 2, 1, 2] + exp = pd.DataFrame({"cats": cat2, + "vals": vals2}, index=pd.Index([0, 1, 0, 1])) + + res = pd.concat([df, df]) tm.assert_frame_equal(exp, res) - # Concat should raise if the two categoricals do not have the same categories - cat3 = pd.Categorical(["a","b"], categories=["a","b","c"]) - vals3 = [1,2] - df_wrong_categories = pd.DataFrame({"cats":cat3, "vals":vals3}) + # Concat should raise if the two categoricals do not have the same + # categories + cat3 = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) + vals3 = [1, 2] + df_wrong_categories = pd.DataFrame({"cats": cat3, "vals": vals3}) def f(): - pd.concat([df,df_wrong_categories]) + pd.concat([df, df_wrong_categories]) + self.assertRaises(ValueError, f) # GH 7864 # make sure ordering is preserverd - df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], + "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']}) df["grade"] = pd.Categorical(df["raw_grade"]) df['grade'].cat.set_categories(['e', 'a', 'b']) df1 = df[0:3] df2 = df[3:] - self.assert_numpy_array_equal(df['grade'].cat.categories, df1['grade'].cat.categories) - self.assert_numpy_array_equal(df['grade'].cat.categories, df2['grade'].cat.categories) + self.assert_numpy_array_equal(df['grade'].cat.categories, + df1['grade'].cat.categories) + self.assert_numpy_array_equal(df['grade'].cat.categories, + df2['grade'].cat.categories) dfx = pd.concat([df1, df2]) dfx['grade'].cat.categories - self.assert_numpy_array_equal(df['grade'].cat.categories, dfx['grade'].cat.categories) + self.assert_numpy_array_equal(df['grade'].cat.categories, + dfx['grade'].cat.categories) def test_concat_preserve(self): # GH 8641 # series concat not preserving category dtype - s = Series(list('abc'),dtype='category') - s2 = Series(list('abd'),dtype='category') + s = Series(list('abc'), dtype='category') + s2 = Series(list('abd'), dtype='category') def f(): - pd.concat([s,s2]) + pd.concat([s, s2]) + self.assertRaises(ValueError, f) - result = pd.concat([s,s],ignore_index=True) + result = pd.concat([s, s], ignore_index=True) expected = Series(list('abcabc')).astype('category') tm.assert_series_equal(result, expected) - result = pd.concat([s,s]) - expected = Series(list('abcabc'),index=[0,1,2,0,1,2]).astype('category') + result = pd.concat([s, s]) + expected = Series( + list('abcabc'), index=[0, 1, 2, 0, 1, 2]).astype('category') tm.assert_series_equal(result, expected) - a = Series(np.arange(6,dtype='int64')) + a = Series(np.arange(6, dtype='int64')) b = Series(list('aabbca')) - df2 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('cab')) }) - result = pd.concat([df2,df2]) - expected = DataFrame({'A' : pd.concat([a,a]), 'B' : pd.concat([b,b]).astype('category',categories=list('cab')) }) + df2 = DataFrame({'A': a, + 'B': b.astype('category', categories=list('cab'))}) + result = pd.concat([df2, df2]) + expected = DataFrame({'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype( + 'category', categories=list('cab'))}) tm.assert_frame_equal(result, expected) def test_categorical_index_preserver(self): - a = Series(np.arange(6,dtype='int64')) + a = Series(np.arange(6, dtype='int64')) b = Series(list('aabbca')) - df2 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('cab')) }).set_index('B') - result = pd.concat([df2,df2]) - expected = DataFrame({'A' : pd.concat([a,a]), 'B' : pd.concat([b,b]).astype('category',categories=list('cab')) }).set_index('B') + df2 = DataFrame({'A': a, + 'B': b.astype('category', categories=list( + 'cab'))}).set_index('B') + result = pd.concat([df2, df2]) + expected = DataFrame({'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype( + 'category', categories=list( + 'cab'))}).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories - df3 = DataFrame({'A' : a, 'B' : b.astype('category',categories=list('abc')) }).set_index('B') - self.assertRaises(TypeError, lambda : pd.concat([df2,df3])) + df3 = DataFrame({'A': a, + 'B': b.astype('category', categories=list( + 'abc'))}).set_index('B') + self.assertRaises(TypeError, lambda: pd.concat([df2, df3])) def test_append(self): - cat = pd.Categorical(["a","b"], categories=["a","b"]) - vals = [1,2] - df = pd.DataFrame({"cats":cat, "vals":vals}) - cat2 = pd.Categorical(["a","b","a","b"], categories=["a","b"]) - vals2 = [1,2,1,2] - exp = pd.DataFrame({"cats":cat2, "vals":vals2}, index=pd.Index([0, 1, 0, 1])) + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + vals = [1, 2] + df = pd.DataFrame({"cats": cat, "vals": vals}) + cat2 = pd.Categorical(["a", "b", "a", "b"], categories=["a", "b"]) + vals2 = [1, 2, 1, 2] + exp = pd.DataFrame({"cats": cat2, + "vals": vals2}, index=pd.Index([0, 1, 0, 1])) res = df.append(df) tm.assert_frame_equal(exp, res) - # Concat should raise if the two categoricals do not have the same categories - cat3 = pd.Categorical(["a","b"], categories=["a","b","c"]) - vals3 = [1,2] - df_wrong_categories = pd.DataFrame({"cats":cat3, "vals":vals3}) + # Concat should raise if the two categoricals do not have the same + # categories + cat3 = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) + vals3 = [1, 2] + df_wrong_categories = pd.DataFrame({"cats": cat3, "vals": vals3}) def f(): df.append(df_wrong_categories) + self.assertRaises(ValueError, f) def test_merge(self): # GH 9426 - right = DataFrame({'c': {0: 'a', 1: 'b', 2: 'c', 3: 'd', 4: 'e'}, - 'd': {0: 'null', 1: 'null', 2: 'null', 3: 'null', 4: 'null'}}) - left = DataFrame({'a': {0: 'f', 1: 'f', 2: 'f', 3: 'f', 4: 'f'}, - 'b': {0: 'g', 1: 'g', 2: 'g', 3: 'g', 4: 'g'}}) + right = DataFrame({'c': {0: 'a', + 1: 'b', + 2: 'c', + 3: 'd', + 4: 'e'}, + 'd': {0: 'null', + 1: 'null', + 2: 'null', + 3: 'null', + 4: 'null'}}) + left = DataFrame({'a': {0: 'f', + 1: 'f', + 2: 'f', + 3: 'f', + 4: 'f'}, + 'b': {0: 'g', + 1: 'g', + 2: 'g', + 3: 'g', + 4: 'g'}}) df = pd.merge(left, right, how='left', left_on='b', right_on='c') # object-object @@ -3487,33 +3874,34 @@ def test_merge(self): tm.assert_frame_equal(result, expected) def test_repeat(self): - #GH10183 - cat = pd.Categorical(["a","b"], categories=["a","b"]) - exp = pd.Categorical(["a", "a", "b", "b"], categories=["a","b"]) + # GH10183 + cat = pd.Categorical(["a", "b"], categories=["a", "b"]) + exp = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b"]) res = cat.repeat(2) self.assert_categorical_equal(res, exp) def test_na_actions(self): - cat = pd.Categorical([1,2,3,np.nan], categories=[1,2,3]) - vals = ["a","b",np.nan,"d"] - df = pd.DataFrame({"cats":cat, "vals":vals}) - cat2 = pd.Categorical([1,2,3,3], categories=[1,2,3]) - vals2 = ["a","b","b","d"] - df_exp_fill = pd.DataFrame({"cats":cat2, "vals":vals2}) - cat3 = pd.Categorical([1,2,3], categories=[1,2,3]) - vals3 = ["a","b",np.nan] - df_exp_drop_cats = pd.DataFrame({"cats":cat3, "vals":vals3}) - cat4 = pd.Categorical([1,2], categories=[1,2,3]) - vals4 = ["a","b"] - df_exp_drop_all = pd.DataFrame({"cats":cat4, "vals":vals4}) + cat = pd.Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) + vals = ["a", "b", np.nan, "d"] + df = pd.DataFrame({"cats": cat, "vals": vals}) + cat2 = pd.Categorical([1, 2, 3, 3], categories=[1, 2, 3]) + vals2 = ["a", "b", "b", "d"] + df_exp_fill = pd.DataFrame({"cats": cat2, "vals": vals2}) + cat3 = pd.Categorical([1, 2, 3], categories=[1, 2, 3]) + vals3 = ["a", "b", np.nan] + df_exp_drop_cats = pd.DataFrame({"cats": cat3, "vals": vals3}) + cat4 = pd.Categorical([1, 2], categories=[1, 2, 3]) + vals4 = ["a", "b"] + df_exp_drop_all = pd.DataFrame({"cats": cat4, "vals": vals4}) # fillna - res = df.fillna(value={"cats":3, "vals":"b"}) + res = df.fillna(value={"cats": 3, "vals": "b"}) tm.assert_frame_equal(res, df_exp_fill) def f(): - df.fillna(value={"cats":4, "vals":"c"}) + df.fillna(value={"cats": 4, "vals": "c"}) + self.assertRaises(ValueError, f) res = df.fillna(method='pad') @@ -3525,72 +3913,77 @@ def f(): res = df.dropna() tm.assert_frame_equal(res, df_exp_drop_all) - # make sure that fillna takes both missing values and NA categories into account - c = Categorical(["a","b",np.nan]) + # make sure that fillna takes both missing values and NA categories + # into account + c = Categorical(["a", "b", np.nan]) with tm.assert_produces_warning(FutureWarning): - c.set_categories(["a","b",np.nan], rename=True, inplace=True) + c.set_categories(["a", "b", np.nan], rename=True, inplace=True) c[0] = np.nan - df = pd.DataFrame({"cats":c, "vals":[1,2,3]}) - df_exp = pd.DataFrame({"cats": Categorical(["a","b","a"]), "vals": [1,2,3]}) + df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]}) + df_exp = pd.DataFrame({"cats": Categorical(["a", "b", "a"]), + "vals": [1, 2, 3]}) res = df.fillna("a") tm.assert_frame_equal(res, df_exp) - def test_astype_to_other(self): s = self.cat['value_group'] expected = s - tm.assert_series_equal(s.astype('category'),expected) - tm.assert_series_equal(s.astype(com.CategoricalDtype()),expected) - self.assertRaises(ValueError, lambda : s.astype('float64')) + tm.assert_series_equal(s.astype('category'), expected) + tm.assert_series_equal(s.astype(com.CategoricalDtype()), expected) + self.assertRaises(ValueError, lambda: s.astype('float64')) cat = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])) exp = Series(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) tm.assert_series_equal(cat.astype('str'), exp) s2 = Series(Categorical.from_array(['1', '2', '3', '4'])) - exp2 = Series([1,2,3,4]).astype(int) - tm.assert_series_equal(s2.astype('int') , exp2) + exp2 = Series([1, 2, 3, 4]).astype(int) + tm.assert_series_equal(s2.astype('int'), exp2) - # object don't sort correctly, so just compare that we have the same values - def cmp(a,b): - tm.assert_almost_equal(np.sort(np.unique(a)),np.sort(np.unique(b))) - expected = Series(np.array(s.values),name='value_group') - cmp(s.astype('object'),expected) - cmp(s.astype(np.object_),expected) + # object don't sort correctly, so just compare that we have the same + # values + def cmp(a, b): + tm.assert_almost_equal( + np.sort(np.unique(a)), np.sort(np.unique(b))) + + expected = Series(np.array(s.values), name='value_group') + cmp(s.astype('object'), expected) + cmp(s.astype(np.object_), expected) # array conversion - tm.assert_almost_equal(np.array(s),np.array(s.values)) + tm.assert_almost_equal(np.array(s), np.array(s.values)) # valid conversion for valid in [lambda x: x.astype('category'), lambda x: x.astype(com.CategoricalDtype()), lambda x: x.astype('object').astype('category'), - lambda x: x.astype('object').astype(com.CategoricalDtype())]: + lambda x: x.astype('object').astype( + com.CategoricalDtype()) + ]: result = valid(s) - tm.assert_series_equal(result,s) + tm.assert_series_equal(result, s) # invalid conversion (these are NOT a dtype) for invalid in [lambda x: x.astype(pd.Categorical), lambda x: x.astype('object').astype(pd.Categorical)]: - self.assertRaises(TypeError, lambda : invalid(s)) - + self.assertRaises(TypeError, lambda: invalid(s)) def test_astype_categorical(self): cat = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) - tm.assert_categorical_equal(cat,cat.astype('category')) - tm.assert_almost_equal(np.array(cat),cat.astype('object')) + tm.assert_categorical_equal(cat, cat.astype('category')) + tm.assert_almost_equal(np.array(cat), cat.astype('object')) - self.assertRaises(ValueError, lambda : cat.astype(float)) + self.assertRaises(ValueError, lambda: cat.astype(float)) def test_to_records(self): # GH8626 # dict creation - df = DataFrame({ 'A' : list('abc') }, dtype='category') + df = DataFrame({'A': list('abc')}, dtype='category') expected = Series(list('abc'), dtype='category', name='A') tm.assert_series_equal(df['A'], expected) @@ -3609,40 +4002,44 @@ def test_to_records(self): def test_numeric_like_ops(self): # numeric ops should not succeed - for op in ['__add__','__sub__','__mul__','__truediv__']: - self.assertRaises(TypeError, lambda : getattr(self.cat,op)(self.cat)) + for op in ['__add__', '__sub__', '__mul__', '__truediv__']: + self.assertRaises(TypeError, + lambda: getattr(self.cat, op)(self.cat)) - # reduction ops should not succeed (unless specifically defined, e.g. min/max) + # reduction ops should not succeed (unless specifically defined, e.g. + # min/max) s = self.cat['value_group'] - for op in ['kurt','skew','var','std','mean','sum','median']: - self.assertRaises(TypeError, lambda : getattr(s,op)(numeric_only=False)) + for op in ['kurt', 'skew', 'var', 'std', 'mean', 'sum', 'median']: + self.assertRaises(TypeError, + lambda: getattr(s, op)(numeric_only=False)) # mad technically works because it takes always the numeric data # numpy ops - s = pd.Series(pd.Categorical([1,2,3,4])) - self.assertRaises(TypeError, lambda : np.sum(s)) + s = pd.Series(pd.Categorical([1, 2, 3, 4])) + self.assertRaises(TypeError, lambda: np.sum(s)) # numeric ops on a Series - for op in ['__add__','__sub__','__mul__','__truediv__']: - self.assertRaises(TypeError, lambda : getattr(s,op)(2)) + for op in ['__add__', '__sub__', '__mul__', '__truediv__']: + self.assertRaises(TypeError, lambda: getattr(s, op)(2)) # invalid ufunc - self.assertRaises(TypeError, lambda : np.log(s)) + self.assertRaises(TypeError, lambda: np.log(s)) def test_cat_tab_completition(self): - # test the tab completion display - ok_for_cat = ['categories','codes','ordered','set_categories', - 'add_categories', 'remove_categories', 'rename_categories', - 'reorder_categories', 'remove_unused_categories', - 'as_ordered', 'as_unordered'] + # test the tab completion display + ok_for_cat = ['categories', 'codes', 'ordered', 'set_categories', + 'add_categories', 'remove_categories', + 'rename_categories', 'reorder_categories', + 'remove_unused_categories', 'as_ordered', 'as_unordered'] + def get_dir(s): - results = [ r for r in s.cat.__dir__() if not r.startswith('_') ] + results = [r for r in s.cat.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(list('aabbcde')).astype('category') results = get_dir(s) - tm.assert_almost_equal(results,list(sorted(set(ok_for_cat)))) + tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) def test_cat_accessor_api(self): # GH 9322 @@ -3659,7 +4056,8 @@ def test_cat_accessor_api(self): def test_cat_accessor_no_new_attributes(self): # https://github.com/pydata/pandas/issues/10673 c = Series(list('aabbcde')).astype('category') - with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): + with tm.assertRaisesRegexp(AttributeError, + "You cannot add any new attribute"): c.cat.xlabel = "a" def test_str_accessor_api_for_categorical(self): @@ -3684,22 +4082,22 @@ def test_str_accessor_api_for_categorical(self): ('findall', ("a",), {}), ('index', (" ",), {}), ('ljust', (10,), {}), - ('match', ("a"), {}), # deprecated... + ('match', ("a"), {}), # deprecated... ('normalize', ("NFC",), {}), ('pad', (10,), {}), - ('partition', (" ",), {"expand": False}), # not default - ('partition', (" ",), {"expand": True}), # default + ('partition', (" ",), {"expand": False}), # not default + ('partition', (" ",), {"expand": True}), # default ('repeat', (3,), {}), ('replace', ("a", "z"), {}), ('rfind', ("a",), {}), ('rindex', (" ",), {}), ('rjust', (10,), {}), - ('rpartition', (" ",), {"expand": False}), # not default - ('rpartition', (" ",), {"expand": True}), # default - ('slice', (0,1), {}), - ('slice_replace', (0,1,"z"), {}), - ('split', (" ",), {"expand":False}), #default - ('split', (" ",), {"expand":True}), # not default + ('rpartition', (" ",), {"expand": False}), # not default + ('rpartition', (" ",), {"expand": True}), # default + ('slice', (0, 1), {}), + ('slice_replace', (0, 1, "z"), {}), + ('split', (" ",), {"expand": False}), # default + ('split', (" ",), {"expand": True}), # not default ('startswith', ("a",), {}), ('wrap', (2,), {}), ('zfill', (10,), {}) @@ -3712,14 +4110,14 @@ def test_str_accessor_api_for_categorical(self): # * `translate` has different interfaces for py2 vs. py3 _ignore_names = ["get", "join", "translate"] - str_func_names = [f for f in dir(s.str) if not (f.startswith("_") or - f in _special_func_names or - f in _ignore_names)] + str_func_names = [f + for f in dir(s.str) + if not (f.startswith("_") or f in _special_func_names + or f in _ignore_names)] func_defs = [(f, (), {}) for f in str_func_names] func_defs.extend(special_func_defs) - for func, args, kwargs in func_defs: res = getattr(c.str, func)(*args, **kwargs) exp = getattr(s.str, func)(*args, **kwargs) @@ -3729,8 +4127,9 @@ def test_str_accessor_api_for_categorical(self): else: tm.assert_series_equal(res, exp) - invalid = Series([1,2,3]).astype('category') - with tm.assertRaisesRegexp(AttributeError, "Can only use .str accessor with string"): + invalid = Series([1, 2, 3]).astype('category') + with tm.assertRaisesRegexp(AttributeError, + "Can only use .str accessor with string"): invalid.str self.assertFalse(hasattr(invalid, 'str')) @@ -3747,7 +4146,7 @@ def test_dt_accessor_api_for_categorical(self): s_pr = Series(period_range('1/1/2015', freq='D', periods=5)) c_pr = s_pr.astype("category") - s_tdr = Series(timedelta_range('1 days','10 days')) + s_tdr = Series(timedelta_range('1 days', '10 days')) c_tdr = s_tdr.astype("category") test_data = [ @@ -3771,10 +4170,10 @@ def test_dt_accessor_api_for_categorical(self): _ignore_names = ['tz_localize'] for name, attr_names, s, c in test_data: - func_names = [f for f in dir(s.dt) if not (f.startswith("_") or - f in attr_names or - f in _special_func_names or - f in _ignore_names)] + func_names = [f + for f in dir(s.dt) + if not (f.startswith("_") or f in attr_names or f in + _special_func_names or f in _ignore_names)] func_defs = [(f, (), {}) for f in func_names] for f_def in special_func_defs: @@ -3807,8 +4206,9 @@ def test_dt_accessor_api_for_categorical(self): else: tm.assert_numpy_array_equal(res, exp) - invalid = Series([1,2,3]).astype('category') - with tm.assertRaisesRegexp(AttributeError, "Can only use .dt accessor with datetimelike"): + invalid = Series([1, 2, 3]).astype('category') + with tm.assertRaisesRegexp( + AttributeError, "Can only use .dt accessor with datetimelike"): invalid.dt self.assertFalse(hasattr(invalid, 'str')) @@ -3852,25 +4252,31 @@ def test_pickle_v0_15_2(self): def test_concat_categorical(self): # See GH 10177 - df1 = pd.DataFrame(np.arange(18, dtype='int64').reshape(6, 3), columns=["a", "b", "c"]) + df1 = pd.DataFrame( + np.arange(18, dtype='int64').reshape(6, + 3), columns=["a", "b", "c"]) - df2 = pd.DataFrame(np.arange(14, dtype='int64').reshape(7, 2), columns=["a", "c"]) - df2['h'] = pd.Series(pd.Categorical(["one", "one", "two", "one", "two", "two", "one"])) + df2 = pd.DataFrame( + np.arange(14, dtype='int64').reshape(7, 2), columns=["a", "c"]) + df2['h'] = pd.Series(pd.Categorical(["one", "one", "two", "one", "two", + "two", "one"])) df_concat = pd.concat((df1, df2), axis=0).reset_index(drop=True) - df_expected = pd.DataFrame({'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], - 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13]}) - df_expected['h'] = pd.Series(pd.Categorical([None, None, None, None, None, None, - "one", "one", "two", "one", "two", "two", "one"])) + df_expected = pd.DataFrame( + {'a': [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12], + 'b': [1, 4, 7, 10, 13, 16, np.nan, np.nan, np.nan, np.nan, np.nan, + np.nan, np.nan], + 'c': [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13]}) + df_expected['h'] = pd.Series(pd.Categorical( + [None, None, None, None, None, None, "one", "one", "two", "one", + "two", "two", "one"])) tm.assert_frame_equal(df_expected, df_concat) - if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], # '--with-coverage', '--cover-package=pandas.core'] - exit=False) + exit=False) diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index a22d8f11c9a75..3fd8ee5879ff8 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -8,9 +8,8 @@ import numpy as np import pandas as pd from pandas.tslib import iNaT, NaT -from pandas import (Series, DataFrame, date_range, - DatetimeIndex, TimedeltaIndex, - Timestamp, Float64Index) +from pandas import (Series, DataFrame, date_range, DatetimeIndex, + TimedeltaIndex, Timestamp, Float64Index) from pandas import compat from pandas.compat import range, long, lrange, lmap, u from pandas.core.common import notnull, isnull, array_equivalent @@ -33,17 +32,18 @@ def test_mut_exclusive(): def test_is_sequence(): is_seq = com.is_sequence - assert(is_seq((1, 2))) - assert(is_seq([1, 2])) - assert(not is_seq("abcd")) - assert(not is_seq(u("abcd"))) - assert(not is_seq(np.int64)) + assert (is_seq((1, 2))) + assert (is_seq([1, 2])) + assert (not is_seq("abcd")) + assert (not is_seq(u("abcd"))) + assert (not is_seq(np.int64)) class A(object): + def __getitem__(self): return 1 - assert(not is_seq(A())) + assert (not is_seq(A())) def test_get_callable_name(): @@ -52,13 +52,15 @@ def test_get_callable_name(): def fn(x): return x + lambda_ = lambda x: x part1 = partial(fn) part2 = partial(part1) class somecall(object): + def __call__(self): - return x + return x # noqa assert getname(fn) == 'fn' assert getname(lambda_) @@ -67,7 +69,8 @@ def __call__(self): assert getname(somecall()) == 'somecall' assert getname(1) is None -#Issue 10859 + +# Issue 10859 class TestABCClasses(tm.TestCase): tuples = [[1, 2, 2], ['red', 'blue', 'red']] multi_index = pd.MultiIndex.from_arrays(tuples, names=('number', 'color')) @@ -88,7 +91,8 @@ def test_abc_types(self): self.assertIsInstance(self.datetime_index, com.ABCDatetimeIndex) self.assertIsInstance(self.timedelta_index, com.ABCTimedeltaIndex) self.assertIsInstance(self.period_index, com.ABCPeriodIndex) - self.assertIsInstance(self.categorical_df.index, com.ABCCategoricalIndex) + self.assertIsInstance(self.categorical_df.index, + com.ABCCategoricalIndex) self.assertIsInstance(pd.Index(['a', 'b', 'c']), com.ABCIndexClass) self.assertIsInstance(pd.Int64Index([1, 2, 3]), com.ABCIndexClass) self.assertIsInstance(pd.Series([1, 2, 3]), com.ABCSeries) @@ -103,12 +107,11 @@ def test_abc_types(self): class TestInferDtype(tm.TestCase): def test_infer_dtype_from_scalar(self): - # Test that _infer_dtype_from_scalar is returning correct dtype for int and float. + # Test that _infer_dtype_from_scalar is returning correct dtype for int + # and float. - for dtypec in [ np.uint8, np.int8, - np.uint16, np.int16, - np.uint32, np.int32, - np.uint64, np.int64 ]: + for dtypec in [np.uint8, np.int8, np.uint16, np.int16, np.uint32, + np.int32, np.uint64, np.int64]: data = dtypec(12) dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, type(data)) @@ -117,7 +120,7 @@ def test_infer_dtype_from_scalar(self): dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, np.int64) - for dtypec in [ np.float16, np.float32, np.float64 ]: + for dtypec in [np.float16, np.float32, np.float64]: data = dtypec(12) dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, dtypec) @@ -126,36 +129,31 @@ def test_infer_dtype_from_scalar(self): dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, np.float64) - for data in [ True, False ]: + for data in [True, False]: dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, np.bool_) - for data in [ np.complex64(1), np.complex128(1) ]: + for data in [np.complex64(1), np.complex128(1)]: dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, np.complex_) import datetime - for data in [ np.datetime64(1,'ns'), - pd.Timestamp(1), - datetime.datetime(2000,1,1,0,0) - ]: + for data in [np.datetime64(1, 'ns'), pd.Timestamp(1), + datetime.datetime(2000, 1, 1, 0, 0)]: dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, 'M8[ns]') - for data in [ np.timedelta64(1,'ns'), - pd.Timedelta(1), - datetime.timedelta(1) - ]: + for data in [np.timedelta64(1, 'ns'), pd.Timedelta(1), + datetime.timedelta(1)]: dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, 'm8[ns]') - for data in [ datetime.date(2000,1,1), - pd.Timestamp(1,tz='US/Eastern'), - 'foo' - ]: + for data in [datetime.date(2000, 1, 1), + pd.Timestamp(1, tz='US/Eastern'), 'foo']: dtype, val = com._infer_dtype_from_scalar(data) self.assertEqual(dtype, np.object_) + def test_notnull(): assert notnull(1.) assert not notnull(None) @@ -178,9 +176,11 @@ def test_notnull(): assert result.sum() == 2 with cf.option_context("mode.use_inf_as_null", False): - for s in [tm.makeFloatSeries(),tm.makeStringSeries(), - tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: - assert(isinstance(isnull(s), Series)) + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + assert (isinstance(isnull(s), Series)) + def test_isnull(): assert not isnull(1.) @@ -190,52 +190,58 @@ def test_isnull(): assert not isnull(-np.inf) # series - for s in [tm.makeFloatSeries(),tm.makeStringSeries(), - tm.makeObjectSeries(),tm.makeTimeSeries(),tm.makePeriodSeries()]: - assert(isinstance(isnull(s), Series)) + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries(), tm.makeTimeSeries(), + tm.makePeriodSeries()]: + assert (isinstance(isnull(s), Series)) # frame - for df in [tm.makeTimeDataFrame(),tm.makePeriodFrame(),tm.makeMixedDataFrame()]: + for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), + tm.makeMixedDataFrame()]: result = isnull(df) expected = df.apply(isnull) tm.assert_frame_equal(result, expected) # panel - for p in [ tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) ]: + for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel()) + ]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel_equal(result, expected) # panel 4d - for p in [ tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D()) ]: + for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: result = isnull(p) expected = p.apply(isnull) tm.assert_panel4d_equal(result, expected) + def test_isnull_lists(): result = isnull([[False]]) exp = np.array([[False]]) - assert(np.array_equal(result, exp)) + assert (np.array_equal(result, exp)) result = isnull([[1], [2]]) exp = np.array([[False], [False]]) - assert(np.array_equal(result, exp)) + assert (np.array_equal(result, exp)) # list of strings / unicode result = isnull(['foo', 'bar']) - assert(not result.any()) + assert (not result.any()) result = isnull([u('foo'), u('bar')]) - assert(not result.any()) + assert (not result.any()) + def test_isnull_nat(): result = isnull([NaT]) exp = np.array([True]) - assert(np.array_equal(result, exp)) + assert (np.array_equal(result, exp)) result = isnull(np.array([NaT], dtype=object)) exp = np.array([True]) - assert(np.array_equal(result, exp)) + assert (np.array_equal(result, exp)) + def test_isnull_numpy_nat(): arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), @@ -244,31 +250,33 @@ def test_isnull_numpy_nat(): expected = np.array([True] * 4) tm.assert_numpy_array_equal(result, expected) + def test_isnull_datetime(): assert (not isnull(datetime.now())) assert notnull(datetime.now()) idx = date_range('1/1/1990', periods=20) - assert(notnull(idx).all()) + assert (notnull(idx).all()) idx = np.asarray(idx) idx[0] = iNaT idx = DatetimeIndex(idx) mask = isnull(idx) - assert(mask[0]) - assert(not mask[1:].any()) + assert (mask[0]) + assert (not mask[1:].any()) # GH 9129 pidx = idx.to_period(freq='M') mask = isnull(pidx) - assert(mask[0]) - assert(not mask[1:].any()) + assert (mask[0]) + assert (not mask[1:].any()) mask = isnull(pidx[1:]) - assert(not mask.any()) + assert (not mask.any()) class TestIsNull(tm.TestCase): + def test_0d_array(self): self.assertTrue(isnull(np.array(np.nan))) self.assertFalse(isnull(np.array(0.0))) @@ -298,25 +306,27 @@ def test_downcast_conv(): # conversions - expected = np.array([1,2]) - for dtype in [np.float64,object,np.int64]: - arr = np.array([1.0,2.0],dtype=dtype) - result = com._possibly_downcast_to_dtype(arr,'infer') + expected = np.array([1, 2]) + for dtype in [np.float64, object, np.int64]: + arr = np.array([1.0, 2.0], dtype=dtype) + result = com._possibly_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected) - expected = np.array([1.0,2.0,np.nan]) - for dtype in [np.float64,object]: - arr = np.array([1.0,2.0,np.nan],dtype=dtype) - result = com._possibly_downcast_to_dtype(arr,'infer') + expected = np.array([1.0, 2.0, np.nan]) + for dtype in [np.float64, object]: + arr = np.array([1.0, 2.0, np.nan], dtype=dtype) + result = com._possibly_downcast_to_dtype(arr, 'infer') tm.assert_almost_equal(result, expected) # empties - for dtype in [np.int32,np.float64,np.float32,np.bool_,np.int64,object]: - arr = np.array([],dtype=dtype) - result = com._possibly_downcast_to_dtype(arr,'int64') - tm.assert_almost_equal(result, np.array([],dtype=np.int64)) + for dtype in [np.int32, np.float64, np.float32, np.bool_, np.int64, object + ]: + arr = np.array([], dtype=dtype) + result = com._possibly_downcast_to_dtype(arr, 'int64') + tm.assert_almost_equal(result, np.array([], dtype=np.int64)) assert result.dtype == np.int64 + def test_array_equivalent(): assert array_equivalent(np.array([np.nan, np.nan]), np.array([np.nan, np.nan])) @@ -326,70 +336,76 @@ def test_array_equivalent(): np.array([np.nan, None], dtype='object')) assert array_equivalent(np.array([np.nan, 1 + 1j], dtype='complex'), np.array([np.nan, 1 + 1j], dtype='complex')) - assert not array_equivalent(np.array([np.nan, 1 + 1j], dtype='complex'), - np.array([np.nan, 1 + 2j], dtype='complex')) - assert not array_equivalent(np.array([np.nan, 1, np.nan]), - np.array([np.nan, 2, np.nan])) - assert not array_equivalent(np.array(['a', 'b', 'c', 'd']), - np.array(['e', 'e'])) + assert not array_equivalent( + np.array([np.nan, 1 + 1j], dtype='complex'), np.array( + [np.nan, 1 + 2j], dtype='complex')) + assert not array_equivalent( + np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) + assert not array_equivalent( + np.array(['a', 'b', 'c', 'd']), np.array(['e', 'e'])) assert array_equivalent(Float64Index([0, np.nan]), Float64Index([0, np.nan])) - assert not array_equivalent(Float64Index([0, np.nan]), - Float64Index([1, np.nan])) + assert not array_equivalent( + Float64Index([0, np.nan]), Float64Index([1, np.nan])) assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan])) - assert not array_equivalent(DatetimeIndex([0, np.nan]), - DatetimeIndex([1, np.nan])) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) assert array_equivalent(TimedeltaIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) - assert not array_equivalent(TimedeltaIndex([0, np.nan]), - TimedeltaIndex([1, np.nan])) + assert not array_equivalent( + TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex([0, np.nan], tz='US/Eastern')) - assert not array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), - DatetimeIndex([1, np.nan], tz='US/Eastern')) - assert not array_equivalent(DatetimeIndex([0, np.nan]), - DatetimeIndex([0, np.nan], tz='US/Eastern')) - assert not array_equivalent(DatetimeIndex([0, np.nan], tz='CET'), - DatetimeIndex([0, np.nan], tz='US/Eastern')) - assert not array_equivalent(DatetimeIndex([0, np.nan]), - TimedeltaIndex([0, np.nan])) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( + [1, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) def test_datetimeindex_from_empty_datetime64_array(): for unit in ['ms', 'us', 'ns']: idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) - assert(len(idx) == 0) + assert (len(idx) == 0) def test_nan_to_nat_conversions(): df = DataFrame(dict({ - 'A' : np.asarray(lrange(10),dtype='float64'), - 'B' : Timestamp('20010101') })) - df.iloc[3:6,:] = np.nan - result = df.loc[4,'B'].value - assert(result == iNaT) + 'A': np.asarray( + lrange(10), dtype='float64'), + 'B': Timestamp('20010101') + })) + df.iloc[3:6, :] = np.nan + result = df.loc[4, 'B'].value + assert (result == iNaT) s = df['B'].copy() - s._data = s._data.setitem(indexer=tuple([slice(8,9)]),value=np.nan) - assert(isnull(s[8])) + s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) + assert (isnull(s[8])) # numpy < 1.7.0 is wrong from distutils.version import LooseVersion if LooseVersion(np.__version__) >= '1.7.0': - assert(s[8].value == np.datetime64('NaT').astype(np.int64)) + assert (s[8].value == np.datetime64('NaT').astype(np.int64)) def test_any_none(): - assert(com._any_none(1, 2, 3, None)) - assert(not com._any_none(1, 2, 3, 4)) + assert (com._any_none(1, 2, 3, None)) + assert (not com._any_none(1, 2, 3, 4)) def test_all_not_none(): - assert(com._all_not_none(1, 2, 3, 4)) - assert(not com._all_not_none(1, 2, 3, None)) - assert(not com._all_not_none(None, None, None, None)) + assert (com._all_not_none(1, 2, 3, 4)) + assert (not com._all_not_none(1, 2, 3, None)) + assert (not com._all_not_none(None, None, None, None)) def test_repr_binary_type(): @@ -408,23 +424,18 @@ def test_repr_binary_type(): def test_adjoin(): - data = [['a', 'b', 'c'], - ['dd', 'ee', 'ff'], - ['ggg', 'hhh', 'iii']] + data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] expected = 'a dd ggg\nb ee hhh\nc ff iii' adjoined = com.adjoin(2, *data) - assert(adjoined == expected) - + assert (adjoined == expected) class TestFormattBase(tm.TestCase): def test_adjoin(self): - data = [['a', 'b', 'c'], - ['dd', 'ee', 'ff'], - ['ggg', 'hhh', 'iii']] + data = [['a', 'b', 'c'], ['dd', 'ee', 'ff'], ['ggg', 'hhh', 'iii']] expected = 'a dd ggg\nb ee hhh\nc ff iii' adjoined = com.adjoin(2, *data) @@ -432,9 +443,7 @@ def test_adjoin(self): self.assertEqual(adjoined, expected) def test_adjoin_unicode(self): - data = [[u'あ', 'b', 'c'], - ['dd', u'ええ', 'ff'], - ['ggg', 'hhh', u'いいい']] + data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', 'hhh', u'いいい']] expected = u'あ dd ggg\nb ええ hhh\nc ff いいい' adjoined = com.adjoin(2, *data) self.assertEqual(adjoined, expected) @@ -444,6 +453,7 @@ def test_adjoin_unicode(self): expected = u"""あ dd ggg b ええ hhh c ff いいい""" + adjoined = adj.adjoin(2, *data) self.assertEqual(adjoined, expected) cols = adjoined.split('\n') @@ -454,6 +464,7 @@ def test_adjoin_unicode(self): expected = u"""あ dd ggg b ええ hhh c ff いいい""" + adjoined = adj.adjoin(7, *data) self.assertEqual(adjoined, expected) cols = adjoined.split('\n') @@ -494,7 +505,6 @@ def test_east_asian_len(self): self.assertEqual(adj.len(u'パンダpanda'), 11) self.assertEqual(adj.len(u'パンダpanda'), 10) - def test_ambiguous_width(self): adj = fmt.EastAsianTextAdjustment() self.assertEqual(adj.len(u'¡¡ab'), 4) @@ -503,8 +513,7 @@ def test_ambiguous_width(self): adj = fmt.EastAsianTextAdjustment() self.assertEqual(adj.len(u'¡¡ab'), 6) - data = [[u'あ', 'b', 'c'], - ['dd', u'ええ', 'ff'], + data = [[u'あ', 'b', 'c'], ['dd', u'ええ', 'ff'], ['ggg', u'¡¡ab', u'いいい']] expected = u'あ dd ggg \nb ええ ¡¡ab\nc ff いいい' adjoined = adj.adjoin(2, *data) @@ -513,13 +522,11 @@ def test_ambiguous_width(self): def test_iterpairs(): data = [1, 2, 3, 4] - expected = [(1, 2), - (2, 3), - (3, 4)] + expected = [(1, 2), (2, 3), (3, 4)] result = list(com.iterpairs(data)) - assert(result == expected) + assert (result == expected) def test_split_ranges(): @@ -556,12 +563,12 @@ def test_indent(): s = 'a b c\nd e f' result = com.indent(s, spaces=6) - assert(result == ' a b c\n d e f') + assert (result == ' a b c\n d e f') def test_banner(): ban = com.banner('hi') - assert(ban == ('%s\nhi\n%s' % ('=' * 80, '=' * 80))) + assert (ban == ('%s\nhi\n%s' % ('=' * 80, '=' * 80))) def test_map_indices_py(): @@ -570,7 +577,7 @@ def test_map_indices_py(): result = com.map_indices_py(data) - assert(result == expected) + assert (result == expected) def test_union(): @@ -579,7 +586,7 @@ def test_union(): union = sorted(com.union(a, b)) - assert((a + b) == union) + assert ((a + b) == union) def test_difference(): @@ -588,7 +595,7 @@ def test_difference(): inter = sorted(com.difference(b, a)) - assert([4, 5, 6] == inter) + assert ([4, 5, 6] == inter) def test_intersection(): @@ -597,7 +604,7 @@ def test_intersection(): inter = sorted(com.intersection(a, b)) - assert(a == inter) + assert (a == inter) def test_groupby(): @@ -613,7 +620,7 @@ def test_groupby(): def test_is_list_like(): - passes = ([], [1], (1,), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), + passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]), Series([]), Series(['a']).str) fails = (1, '2', object()) @@ -623,9 +630,10 @@ def test_is_list_like(): for f in fails: assert not com.is_list_like(f) + def test_is_named_tuple(): - passes = (collections.namedtuple('Test',list('abc'))(1,2,3),) - fails = ((1,2,3), 'a', Series({'pi':3.14})) + passes = (collections.namedtuple('Test', list('abc'))(1, 2, 3), ) + fails = ((1, 2, 3), 'a', Series({'pi': 3.14})) for p in passes: assert com.is_named_tuple(p) @@ -633,6 +641,7 @@ def test_is_named_tuple(): for f in fails: assert not com.is_named_tuple(f) + def test_is_hashable(): # all new-style classes are hashable by default @@ -643,18 +652,19 @@ class UnhashableClass1(object): __hash__ = None class UnhashableClass2(object): + def __hash__(self): raise TypeError("Not hashable") - hashable = ( - 1, 3.14, np.float64(3.14), 'a', tuple(), (1,), HashableClass(), - ) - not_hashable = ( - [], UnhashableClass1(), - ) - abc_hashable_not_really_hashable = ( - ([],), UnhashableClass2(), - ) + hashable = (1, + 3.14, + np.float64(3.14), + 'a', + tuple(), + (1, ), + HashableClass(), ) + not_hashable = ([], UnhashableClass1(), ) + abc_hashable_not_really_hashable = (([], ), UnhashableClass2(), ) for i in hashable: assert com.is_hashable(i) @@ -671,8 +681,10 @@ def __hash__(self): # old-style classes in Python 2 don't appear hashable to # collections.Hashable but also seem to support hash() by default if compat.PY2: + class OldStyleClass(): pass + c = OldStyleClass() assert not isinstance(c, collections.Hashable) assert com.is_hashable(c) @@ -682,11 +694,11 @@ class OldStyleClass(): def test_ensure_int32(): values = np.arange(10, dtype=np.int32) result = com._ensure_int32(values) - assert(result.dtype == np.int32) + assert (result.dtype == np.int32) values = np.arange(10, dtype=np.int64) result = com._ensure_int32(values) - assert(result.dtype == np.int32) + assert (result.dtype == np.int32) def test_ensure_platform_int(): @@ -697,17 +709,17 @@ def test_ensure_platform_int(): # int64 x = Int64Index([1, 2, 3], dtype='int64') - assert(x.dtype == np.int64) + assert (x.dtype == np.int64) pi = com._ensure_platform_int(x) - assert(pi.dtype == np.int_) + assert (pi.dtype == np.int_) # int32 x = Int64Index([1, 2, 3], dtype='int32') - assert(x.dtype == np.int32) + assert (x.dtype == np.int32) pi = com._ensure_platform_int(x) - assert(pi.dtype == np.int_) + assert (pi.dtype == np.int_) # TODO: fix this broken test @@ -737,8 +749,8 @@ def test_is_re(): def test_is_recompilable(): - passes = (r'a', u('x'), r'asdf', re.compile('adsf'), - u(r'\u2233\s*'), re.compile(r'')) + passes = (r'a', u('x'), r'asdf', re.compile('adsf'), u(r'\u2233\s*'), + re.compile(r'')) fails = 1, [], object() for p in passes: @@ -747,6 +759,7 @@ def test_is_recompilable(): for f in fails: assert not com.is_re_compilable(f) + def test_random_state(): import numpy.random as npr # Check with seed @@ -755,7 +768,8 @@ def test_random_state(): # Check with random state object state2 = npr.RandomState(10) - assert_equal(com._random_state(state2).uniform(), npr.RandomState(10).uniform()) + assert_equal( + com._random_state(state2).uniform(), npr.RandomState(10).uniform()) # check with no arg random state assert isinstance(com._random_state(), npr.RandomState) @@ -770,23 +784,27 @@ def test_random_state(): def test_maybe_match_name(): - matched = com._maybe_match_name(Series([1], name='x'), Series([2], name='x')) - assert(matched == 'x') + matched = com._maybe_match_name( + Series([1], name='x'), Series( + [2], name='x')) + assert (matched == 'x') - matched = com._maybe_match_name(Series([1], name='x'), Series([2], name='y')) - assert(matched is None) + matched = com._maybe_match_name( + Series([1], name='x'), Series( + [2], name='y')) + assert (matched is None) matched = com._maybe_match_name(Series([1]), Series([2], name='x')) - assert(matched is None) + assert (matched is None) matched = com._maybe_match_name(Series([1], name='x'), Series([2])) - assert(matched is None) + assert (matched is None) matched = com._maybe_match_name(Series([1], name='x'), [2]) - assert(matched == 'x') + assert (matched == 'x') matched = com._maybe_match_name([1], Series([2], name='y')) - assert(matched == 'y') + assert (matched == 'y') class TestTake(tm.TestCase): @@ -843,15 +861,15 @@ def _test_dtype(dtype, fill_value, out_dtype): indexer = [2, 1, 0, -1] result = com.take_1d(data, indexer, fill_value=fill_value) - assert((result[[0, 1, 2]] == data[[2, 1, 0]]).all()) - assert(result[3] == fill_value) - assert(result.dtype == out_dtype) + assert ((result[[0, 1, 2]] == data[[2, 1, 0]]).all()) + assert (result[3] == fill_value) + assert (result.dtype == out_dtype) indexer = [2, 1, 0, 1] result = com.take_1d(data, indexer, fill_value=fill_value) - assert((result[[0, 1, 2, 3]] == data[indexer]).all()) - assert(result.dtype == dtype) + assert ((result[[0, 1, 2, 3]] == data[indexer]).all()) + assert (result.dtype == dtype) _test_dtype(np.int8, np.int16(127), np.int8) _test_dtype(np.int8, np.int16(128), np.int16) @@ -934,24 +952,24 @@ def _test_dtype(dtype, fill_value, out_dtype): indexer = [2, 1, 0, -1] result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()) - assert((result[3, :] == fill_value).all()) - assert(result.dtype == out_dtype) + assert ((result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()) + assert ((result[3, :] == fill_value).all()) + assert (result.dtype == out_dtype) result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()) - assert((result[:, 3] == fill_value).all()) - assert(result.dtype == out_dtype) + assert ((result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()) + assert ((result[:, 3] == fill_value).all()) + assert (result.dtype == out_dtype) indexer = [2, 1, 0, 1] result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert((result[[0, 1, 2, 3], :] == data[indexer, :]).all()) - assert(result.dtype == dtype) + assert ((result[[0, 1, 2, 3], :] == data[indexer, :]).all()) + assert (result.dtype == dtype) result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert((result[:, [0, 1, 2, 3]] == data[:, indexer]).all()) - assert(result.dtype == dtype) + assert ((result[:, [0, 1, 2, 3]] == data[:, indexer]).all()) + assert (result.dtype == dtype) _test_dtype(np.int8, np.int16(127), np.int8) _test_dtype(np.int8, np.int16(128), np.int16) @@ -1038,33 +1056,33 @@ def _test_dtype(dtype, fill_value, out_dtype): indexer = [2, 1, 0, -1] result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()) - assert((result[3, :, :] == fill_value).all()) - assert(result.dtype == out_dtype) + assert ((result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()) + assert ((result[3, :, :] == fill_value).all()) + assert (result.dtype == out_dtype) result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()) - assert((result[:, 3, :] == fill_value).all()) - assert(result.dtype == out_dtype) + assert ((result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()) + assert ((result[:, 3, :] == fill_value).all()) + assert (result.dtype == out_dtype) result = com.take_nd(data, indexer, axis=2, fill_value=fill_value) - assert((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()) - assert((result[:, :, 3] == fill_value).all()) - assert(result.dtype == out_dtype) + assert ((result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()) + assert ((result[:, :, 3] == fill_value).all()) + assert (result.dtype == out_dtype) indexer = [2, 1, 0, 1] result = com.take_nd(data, indexer, axis=0, fill_value=fill_value) - assert((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()) - assert(result.dtype == dtype) + assert ((result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()) + assert (result.dtype == dtype) result = com.take_nd(data, indexer, axis=1, fill_value=fill_value) - assert((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()) - assert(result.dtype == dtype) + assert ((result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()) + assert (result.dtype == dtype) result = com.take_nd(data, indexer, axis=2, fill_value=fill_value) - assert((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()) - assert(result.dtype == dtype) + assert ((result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()) + assert (result.dtype == dtype) _test_dtype(np.int8, np.int16(127), np.int8) _test_dtype(np.int8, np.int16(128), np.int16) @@ -1126,9 +1144,7 @@ def test_1d_bool(self): self.assertEqual(result.dtype, np.object_) def test_2d_bool(self): - arr = np.array([[0, 1, 0], - [1, 0, 1], - [0, 1, 1]], dtype=bool) + arr = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 1]], dtype=bool) result = com.take_nd(arr, [0, 2, 2, 1]) expected = arr.take([0, 2, 2, 1], axis=0) @@ -1155,7 +1171,7 @@ def test_2d_float32(self): expected[[2, 4], :] = np.nan tm.assert_almost_equal(result, expected) - #### this now accepts a float32! # test with float64 out buffer + # this now accepts a float32! # test with float64 out buffer out = np.empty((len(indexer), arr.shape[1]), dtype='float32') com.take_nd(arr, indexer, out=out) # it works! @@ -1171,7 +1187,8 @@ def test_2d_float32(self): def test_2d_datetime64(self): # 2005/01/01 - 2006/01/01 - arr = np.random.randint(long(11045376), long(11360736), (5, 3))*100000000000 + arr = np.random.randint( + long(11045376), long(11360736), (5, 3)) * 100000000000 arr = arr.view(dtype='datetime64[ns]') indexer = [0, 2, -1, 1, -1] @@ -1245,6 +1262,7 @@ def test_maybe_convert_string_to_array(self): tm.assert_numpy_array_equal(result, np.array(['x', 2], dtype=object)) self.assertTrue(result.dtype == object) + def test_possibly_convert_objects_copy(): values = np.array([1, 2]) @@ -1254,7 +1272,7 @@ def test_possibly_convert_objects_copy(): out = convert._possibly_convert_objects(values, copy=True) assert_true(values is not out) - values = np.array(['apply','banana']) + values = np.array(['apply', 'banana']) out = convert._possibly_convert_objects(values, copy=False) assert_true(values is out) @@ -1267,9 +1285,9 @@ def test_dict_compat(): np.datetime64('2015-03-15'): 2} data_unchanged = {1: 2, 3: 4, 5: 6} expected = {Timestamp('1990-3-15'): 1, Timestamp('2015-03-15'): 2} - assert(com._dict_compat(data_datetime64) == expected) - assert(com._dict_compat(expected) == expected) - assert(com._dict_compat(data_unchanged) == data_unchanged) + assert (com._dict_compat(data_datetime64) == expected) + assert (com._dict_compat(expected) == expected) + assert (com._dict_compat(data_unchanged) == data_unchanged) if __name__ == '__main__': diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py index 13596bd35bb62..2ea95b4e0b300 100644 --- a/pandas/tests/test_compat.py +++ b/pandas/tests/test_compat.py @@ -3,18 +3,16 @@ Testing that functions from compat work as expected """ -from pandas.compat import ( - range, zip, map, filter, - lrange, lzip, lmap, lfilter, - builtins -) -import unittest -import nose +from pandas.compat import (range, zip, map, filter, lrange, lzip, lmap, + lfilter, builtins) import pandas.util.testing as tm + class TestBuiltinIterators(tm.TestCase): + def check_result(self, actual, expected, lengths): - for (iter_res, list_res), exp, length in zip(actual, expected, lengths): + for (iter_res, list_res), exp, length in zip(actual, expected, + lengths): self.assertNotIsInstance(iter_res, list) tm.assertIsInstance(list_res, list) iter_res = list(iter_res) @@ -47,7 +45,6 @@ def test_map(self): lengths = 10, self.check_result(actual, expected, lengths) - def test_filter(self): func = lambda x: x lst = list(builtins.range(10)) @@ -64,8 +61,3 @@ def test_zip(self): expected = list(builtins.zip(*lst)), lengths = 10, self.check_result(actual, expected, lengths) - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - # '--with-coverage', '--cover-package=pandas.core'], - exit=False) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index 0e286e93160b8..693b1d0ec71de 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -3,7 +3,6 @@ import pandas as pd import unittest import warnings -import nose class TestConfig(unittest.TestCase): @@ -39,11 +38,11 @@ def test_api(self): self.assertTrue(hasattr(pd, 'describe_option')) def test_is_one_of_factory(self): - v = self.cf.is_one_of_factory([None,12]) + v = self.cf.is_one_of_factory([None, 12]) v(12) v(None) - self.assertRaises(ValueError,v,1.1) + self.assertRaises(ValueError, v, 1.1) def test_register_option(self): self.cf.register_option('a', 1, 'doc') @@ -117,7 +116,7 @@ def test_describe_option(self): # current value is reported self.assertFalse( 'bar' in self.cf.describe_option('l', _print_desc=False)) - self.cf.set_option("l","bar") + self.cf.set_option("l", "bar") self.assertTrue( 'bar' in self.cf.describe_option('l', _print_desc=False)) @@ -168,7 +167,6 @@ def test_set_option(self): self.assertRaises(KeyError, self.cf.set_option, 'no.such.key', None) - def test_set_option_empty_args(self): self.assertRaises(ValueError, self.cf.set_option) @@ -244,9 +242,8 @@ def test_reset_option_all(self): self.assertEqual(self.cf.get_option('b.c'), 'hullo') def test_deprecate_option(self): - import sys - self.cf.deprecate_option( - 'foo') # we can deprecate non-existent options + # we can deprecate non-existent options + self.cf.deprecate_option('foo') self.assertTrue(self.cf._is_deprecated('foo')) with warnings.catch_warnings(record=True) as w: diff --git a/pandas/tests/test_dtypes.py b/pandas/tests/test_dtypes.py index 4403465576848..943e7c92d988b 100644 --- a/pandas/tests/test_dtypes.py +++ b/pandas/tests/test_dtypes.py @@ -4,14 +4,16 @@ import nose import numpy as np from pandas import Series, Categorical, date_range -import pandas.core.common as com -from pandas.core.common import (CategoricalDtype, is_categorical_dtype, is_categorical, - DatetimeTZDtype, is_datetime64tz_dtype, is_datetimetz, - is_dtype_equal, is_datetime64_ns_dtype, is_datetime64_dtype) +from pandas.core.common import (CategoricalDtype, is_categorical_dtype, + is_categorical, DatetimeTZDtype, + is_datetime64tz_dtype, is_datetimetz, + is_dtype_equal, is_datetime64_ns_dtype, + is_datetime64_dtype) import pandas.util.testing as tm _multiprocess_can_split_ = True + class Base(object): def test_hash(self): @@ -25,6 +27,7 @@ def test_numpy_informed(self): # np.dtype doesn't know about our new dtype def f(): np.dtype(self.dtype) + self.assertRaises(TypeError, f) self.assertNotEqual(self.dtype, np.str_) @@ -34,6 +37,7 @@ def test_pickle(self): result = self.round_trip_pickle(self.dtype) self.assertEqual(result, self.dtype) + class TestCategoricalDtype(Base, tm.TestCase): def setUp(self): @@ -47,7 +51,8 @@ def test_equality(self): def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') self.assertTrue(is_dtype_equal(self.dtype, result)) - self.assertRaises(TypeError, lambda : CategoricalDtype.construct_from_string('foo')) + self.assertRaises( + TypeError, lambda: CategoricalDtype.construct_from_string('foo')) def test_is_dtype(self): self.assertTrue(CategoricalDtype.is_dtype(self.dtype)) @@ -60,10 +65,10 @@ def test_basic(self): self.assertTrue(is_categorical_dtype(self.dtype)) - factor = Categorical.from_array(['a', 'b', 'b', 'a', - 'a', 'c', 'c', 'c']) + factor = Categorical.from_array(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c' + ]) - s = Series(factor,name='A') + s = Series(factor, name='A') # dtypes self.assertTrue(is_categorical_dtype(s.dtype)) @@ -75,13 +80,15 @@ def test_basic(self): self.assertFalse(is_categorical(np.dtype('float64'))) self.assertFalse(is_categorical(1.0)) + class TestDatetimeTZDtype(Base, tm.TestCase): def setUp(self): - self.dtype = DatetimeTZDtype('ns','US/Eastern') + self.dtype = DatetimeTZDtype('ns', 'US/Eastern') def test_construction(self): - self.assertRaises(ValueError, lambda : DatetimeTZDtype('ms','US/Eastern')) + self.assertRaises(ValueError, + lambda: DatetimeTZDtype('ms', 'US/Eastern')) def test_subclass(self): a = DatetimeTZDtype('datetime64[ns, US/Eastern]') @@ -99,33 +106,41 @@ def test_compat(self): def test_construction_from_string(self): result = DatetimeTZDtype('datetime64[ns, US/Eastern]') self.assertTrue(is_dtype_equal(self.dtype, result)) - result = DatetimeTZDtype.construct_from_string('datetime64[ns, US/Eastern]') + result = DatetimeTZDtype.construct_from_string( + 'datetime64[ns, US/Eastern]') self.assertTrue(is_dtype_equal(self.dtype, result)) - self.assertRaises(TypeError, lambda : DatetimeTZDtype.construct_from_string('foo')) + self.assertRaises(TypeError, + lambda: DatetimeTZDtype.construct_from_string('foo')) def test_is_dtype(self): self.assertTrue(DatetimeTZDtype.is_dtype(self.dtype)) self.assertTrue(DatetimeTZDtype.is_dtype('datetime64[ns, US/Eastern]')) self.assertFalse(DatetimeTZDtype.is_dtype('foo')) - self.assertTrue(DatetimeTZDtype.is_dtype(DatetimeTZDtype('ns','US/Pacific'))) + self.assertTrue(DatetimeTZDtype.is_dtype(DatetimeTZDtype( + 'ns', 'US/Pacific'))) self.assertFalse(DatetimeTZDtype.is_dtype(np.float64)) def test_equality(self): - self.assertTrue(is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]')) - self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype('ns','US/Eastern'))) + self.assertTrue(is_dtype_equal(self.dtype, + 'datetime64[ns, US/Eastern]')) + self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype( + 'ns', 'US/Eastern'))) self.assertFalse(is_dtype_equal(self.dtype, 'foo')) - self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns','CET'))) - self.assertFalse(is_dtype_equal(DatetimeTZDtype('ns','US/Eastern'), DatetimeTZDtype('ns','US/Pacific'))) + self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns', + 'CET'))) + self.assertFalse(is_dtype_equal( + DatetimeTZDtype('ns', 'US/Eastern'), DatetimeTZDtype( + 'ns', 'US/Pacific'))) # numpy compat - self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"),"datetime64[ns]")) + self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]")) def test_basic(self): self.assertTrue(is_datetime64tz_dtype(self.dtype)) - dr = date_range('20130101',periods=3,tz='US/Eastern') - s = Series(dr,name='A') + dr = date_range('20130101', periods=3, tz='US/Eastern') + s = Series(dr, name='A') # dtypes self.assertTrue(is_datetime64tz_dtype(s.dtype)) @@ -159,8 +174,6 @@ def test_parser(self): ) - - if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 3bd76dfb9da61..688f074e31a42 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -13,37 +13,45 @@ from pandas.core.api import DataFrame, Panel from pandas.computation import expressions as expr from pandas import compat - from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_panel4d_equal) +import pandas.core.common as com import pandas.util.testing as tm from numpy.testing.decorators import slow - if not expr._USE_NUMEXPR: try: - import numexpr + import numexpr # noqa except ImportError: msg = "don't have" else: msg = "not using" raise nose.SkipTest("{0} numexpr".format(msg)) -_frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') -_frame2 = DataFrame(randn(100, 4), columns = list('ABCD'), dtype='float64') -_mixed = DataFrame({ 'A' : _frame['A'].copy(), 'B' : _frame['B'].astype('float32'), 'C' : _frame['C'].astype('int64'), 'D' : _frame['D'].astype('int32') }) -_mixed2 = DataFrame({ 'A' : _frame2['A'].copy(), 'B' : _frame2['B'].astype('float32'), 'C' : _frame2['C'].astype('int64'), 'D' : _frame2['D'].astype('int32') }) -_integer = DataFrame(np.random.randint(1, 100, size=(10001, 4)), columns = list('ABCD'), dtype='int64') +_frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64') +_frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64') +_mixed = DataFrame({'A': _frame['A'].copy(), + 'B': _frame['B'].astype('float32'), + 'C': _frame['C'].astype('int64'), + 'D': _frame['D'].astype('int32')}) +_mixed2 = DataFrame({'A': _frame2['A'].copy(), + 'B': _frame2['B'].astype('float32'), + 'C': _frame2['C'].astype('int64'), + 'D': _frame2['D'].astype('int32')}) +_integer = DataFrame( + np.random.randint(1, 100, + size=(10001, 4)), columns=list('ABCD'), dtype='int64') _integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)), columns=list('ABCD'), dtype='int64') -_frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=(_frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) +_frame_panel = Panel(dict(ItemA=_frame.copy(), ItemB=( + _frame.copy() + 3), ItemC=_frame.copy(), ItemD=_frame.copy())) _frame2_panel = Panel(dict(ItemA=_frame2.copy(), ItemB=(_frame2.copy() + 3), ItemC=_frame2.copy(), ItemD=_frame2.copy())) -_integer_panel = Panel(dict(ItemA=_integer, - ItemB=(_integer + 34).astype('int64'))) -_integer2_panel = Panel(dict(ItemA=_integer2, - ItemB=(_integer2 + 34).astype('int64'))) +_integer_panel = Panel(dict(ItemA=_integer, ItemB=(_integer + 34).astype( + 'int64'))) +_integer2_panel = Panel(dict(ItemA=_integer2, ItemB=(_integer2 + 34).astype( + 'int64'))) _mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3))) _mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3))) @@ -54,9 +62,9 @@ class TestExpressions(tm.TestCase): def setUp(self): - self.frame = _frame.copy() + self.frame = _frame.copy() self.frame2 = _frame2.copy() - self.mixed = _mixed.copy() + self.mixed = _mixed.copy() self.mixed2 = _mixed2.copy() self.integer = _integer.copy() self._MIN_ELEMENTS = expr._MIN_ELEMENTS @@ -97,13 +105,13 @@ def run_arithmetic_test(self, df, other, assert_func, check_dtype=False, def test_integer_arithmetic(self): self.run_arithmetic_test(self.integer, self.integer, assert_frame_equal) - self.run_arithmetic_test(self.integer.iloc[:,0], self.integer.iloc[:, 0], - assert_series_equal, check_dtype=True) + self.run_arithmetic_test(self.integer.iloc[:, 0], + self.integer.iloc[:, 0], assert_series_equal, + check_dtype=True) @nose.tools.nottest - def run_binary_test(self, df, other, assert_func, - test_flex=False, numexpr_ops=set(['gt', 'lt', 'ge', - 'le', 'eq', 'ne'])): + def run_binary_test(self, df, other, assert_func, test_flex=False, + numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])): """ tests solely that the result is the same whether or not numexpr is enabled. Need to test whether the function does the correct thing @@ -159,10 +167,10 @@ def run_series(self, ser, other, binary_comp=None, **kwargs): # series doesn't uses vec_compare instead of numexpr... # if binary_comp is None: # binary_comp = other + 1 - # self.run_binary_test(ser, binary_comp, assert_frame_equal, test_flex=False, - # **kwargs) - # self.run_binary_test(ser, binary_comp, assert_frame_equal, test_flex=True, - # **kwargs) + # self.run_binary_test(ser, binary_comp, assert_frame_equal, + # test_flex=False, **kwargs) + # self.run_binary_test(ser, binary_comp, assert_frame_equal, + # test_flex=True, **kwargs) def run_panel(self, panel, other, binary_comp=None, run_binary=True, assert_func=assert_panel_equal, **kwargs): @@ -231,51 +239,60 @@ def test_mixed_arithmetic(self): def test_integer_with_zeros(self): self.integer *= np.random.randint(0, 2, size=np.shape(self.integer)) - self.run_arithmetic_test(self.integer, self.integer, assert_frame_equal) - self.run_arithmetic_test(self.integer.iloc[:, 0], self.integer.iloc[:, 0], - assert_series_equal) + self.run_arithmetic_test(self.integer, self.integer, + assert_frame_equal) + self.run_arithmetic_test(self.integer.iloc[:, 0], + self.integer.iloc[:, 0], assert_series_equal) def test_invalid(self): # no op - result = expr._can_use_numexpr(operator.add, None, self.frame, self.frame, 'evaluate') + result = expr._can_use_numexpr(operator.add, None, self.frame, + self.frame, 'evaluate') self.assertFalse(result) # mixed - result = expr._can_use_numexpr(operator.add, '+', self.mixed, self.frame, 'evaluate') + result = expr._can_use_numexpr(operator.add, '+', self.mixed, + self.frame, 'evaluate') self.assertFalse(result) # min elements - result = expr._can_use_numexpr(operator.add, '+', self.frame2, self.frame2, 'evaluate') + result = expr._can_use_numexpr(operator.add, '+', self.frame2, + self.frame2, 'evaluate') self.assertFalse(result) # ok, we only check on first part of expression - result = expr._can_use_numexpr(operator.add, '+', self.frame, self.frame2, 'evaluate') + result = expr._can_use_numexpr(operator.add, '+', self.frame, + self.frame2, 'evaluate') self.assertTrue(result) def test_binary_ops(self): - def testit(): - for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]: + for f, f2 in [(self.frame, self.frame2), + (self.mixed, self.mixed2)]: - for op, op_str in [('add','+'),('sub','-'),('mul','*'),('div','/'),('pow','**')]: + for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'), + ('div', '/'), ('pow', '**')]: if op == 'div': op = getattr(operator, 'truediv', None) else: op = getattr(operator, op, None) if op is not None: - result = expr._can_use_numexpr(op, op_str, f, f, 'evaluate') + result = expr._can_use_numexpr(op, op_str, f, f, + 'evaluate') self.assertNotEqual(result, f._is_mixed_type) - result = expr.evaluate(op, op_str, f, f, use_numexpr=True) - expected = expr.evaluate(op, op_str, f, f, use_numexpr=False) - tm.assert_numpy_array_equal(result,expected.values) + result = expr.evaluate(op, op_str, f, f, + use_numexpr=True) + expected = expr.evaluate(op, op_str, f, f, + use_numexpr=False) + tm.assert_numpy_array_equal(result, expected.values) - result = expr._can_use_numexpr(op, op_str, f2, f2, 'evaluate') + result = expr._can_use_numexpr(op, op_str, f2, f2, + 'evaluate') self.assertFalse(result) - expr.set_use_numexpr(False) testit() expr.set_use_numexpr(True) @@ -285,10 +302,9 @@ def testit(): testit() def test_boolean_ops(self): - - def testit(): - for f, f2 in [ (self.frame, self.frame2), (self.mixed, self.mixed2) ]: + for f, f2 in [(self.frame, self.frame2), + (self.mixed, self.mixed2)]: f11 = f f12 = f + 1 @@ -296,18 +312,23 @@ def testit(): f21 = f2 f22 = f2 + 1 - for op, op_str in [('gt','>'),('lt','<'),('ge','>='),('le','<='),('eq','=='),('ne','!=')]: + for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='), + ('le', '<='), ('eq', '=='), ('ne', '!=')]: - op = getattr(operator,op) + op = getattr(operator, op) - result = expr._can_use_numexpr(op, op_str, f11, f12, 'evaluate') + result = expr._can_use_numexpr(op, op_str, f11, f12, + 'evaluate') self.assertNotEqual(result, f11._is_mixed_type) - result = expr.evaluate(op, op_str, f11, f12, use_numexpr=True) - expected = expr.evaluate(op, op_str, f11, f12, use_numexpr=False) - tm.assert_numpy_array_equal(result,expected.values) + result = expr.evaluate(op, op_str, f11, f12, + use_numexpr=True) + expected = expr.evaluate(op, op_str, f11, f12, + use_numexpr=False) + tm.assert_numpy_array_equal(result, expected.values) - result = expr._can_use_numexpr(op, op_str, f21, f22, 'evaluate') + result = expr._can_use_numexpr(op, op_str, f21, f22, + 'evaluate') self.assertFalse(result) expr.set_use_numexpr(False) @@ -319,18 +340,16 @@ def testit(): testit() def test_where(self): - def testit(): - for f in [ self.frame, self.frame2, self.mixed, self.mixed2 ]: - + for f in [self.frame, self.frame2, self.mixed, self.mixed2]: - for cond in [ True, False ]: + for cond in [True, False]: - c = np.empty(f.shape,dtype=np.bool_) + c = np.empty(f.shape, dtype=np.bool_) c.fill(cond) - result = expr.where(c, f.values, f.values+1) - expected = np.where(c, f.values, f.values+1) - tm.assert_numpy_array_equal(result,expected) + result = expr.where(c, f.values, f.values + 1) + expected = np.where(c, f.values, f.values + 1) + tm.assert_numpy_array_equal(result, expected) expr.set_use_numexpr(False) testit() diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index a73b459459321..b7691033dfc83 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,4 +1,8 @@ # -*- coding: utf-8 -*- + +# TODO(wesm): lots of issues making flake8 hard +# flake8: noqa + from __future__ import print_function from distutils.version import LooseVersion import re @@ -32,8 +36,8 @@ import pandas.core.common as com from pandas.util.terminal import get_terminal_size import pandas as pd -from pandas.core.config import (set_option, get_option, - option_context, reset_option) +from pandas.core.config import (set_option, get_option, option_context, + reset_option) from datetime import datetime import nose @@ -45,44 +49,54 @@ def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth + def has_info_repr(df): r = repr(df) c1 = r.split('\n')[0].startswith(", 2. Index, 3. Columns, 4. dtype, 5. memory usage, 6. trailing newline + nv = len(r.split( + '\n')) == 6 # 1. , 2. Index, 3. Columns, 4. dtype, 5. memory usage, 6. trailing newline return has_info and nv + def has_horizontally_truncated_repr(df): - try: # Check header row + try: # Check header row fst_line = np.array(repr(df).splitlines()[0].split()) - cand_col = np.where(fst_line=='...')[0][0] + cand_col = np.where(fst_line == '...')[0][0] except: return False # Make sure each row has this ... in the same place r = repr(df) - for ix,l in enumerate(r.splitlines()): + for ix, l in enumerate(r.splitlines()): if not r.split()[cand_col] == '...': return False return True + def has_vertically_truncated_repr(df): r = repr(df) only_dot_row = False for row in r.splitlines(): - if re.match('^[\.\ ]+$',row): + if re.match('^[\.\ ]+$', row): only_dot_row = True return only_dot_row + def has_truncated_repr(df): - return has_horizontally_truncated_repr(df) or has_vertically_truncated_repr(df) + return has_horizontally_truncated_repr( + df) or has_vertically_truncated_repr(df) + def has_doubly_truncated_repr(df): - return has_horizontally_truncated_repr(df) and has_vertically_truncated_repr(df) + return has_horizontally_truncated_repr( + df) and has_vertically_truncated_repr(df) + def has_expanded_repr(df): r = repr(df) @@ -91,13 +105,13 @@ def has_expanded_repr(df): return True return False + class TestDataFrameFormatting(tm.TestCase): _multiprocess_can_split_ = True def setUp(self): self.warn_filters = warnings.filters - warnings.filterwarnings('ignore', - category=FutureWarning, + warnings.filterwarnings('ignore', category=FutureWarning, module=".*format") self.frame = _frame.copy() @@ -130,20 +144,22 @@ def test_eng_float_formatter(self): def test_show_null_counts(self): - df = DataFrame(1,columns=range(10),index=range(10)) - df.iloc[1,1] = np.nan + df = DataFrame(1, columns=range(10), index=range(10)) + df.iloc[1, 1] = np.nan def check(null_counts, result): buf = StringIO() df.info(buf=buf, null_counts=null_counts) self.assertTrue(('non-null' in buf.getvalue()) is result) - with option_context('display.max_info_rows',20,'display.max_info_columns',20): + with option_context('display.max_info_rows', 20, + 'display.max_info_columns', 20): check(None, True) check(True, True) check(False, False) - with option_context('display.max_info_rows',5,'display.max_info_columns',5): + with option_context('display.max_info_rows', 5, + 'display.max_info_columns', 5): check(None, False) check(True, False) check(False, False) @@ -159,8 +175,9 @@ def test_repr_truncation(self): max_len = 20 with option_context("display.max_colwidth", max_len): df = DataFrame({'A': np.random.randn(10), - 'B': [tm.rands(np.random.randint(max_len - 1, - max_len + 1)) for i in range(10)]}) + 'B': [tm.rands(np.random.randint( + max_len - 1, max_len + 1)) for i in range(10) + ]}) r = repr(df) r = r[r.find('\n') + 1:] @@ -179,34 +196,35 @@ def test_repr_truncation(self): self.assertNotIn('...', repr(df)) def test_repr_chop_threshold(self): - df = DataFrame([[0.1, 0.5],[0.5, -0.1]]) - pd.reset_option("display.chop_threshold") # default None + df = DataFrame([[0.1, 0.5], [0.5, -0.1]]) + pd.reset_option("display.chop_threshold") # default None self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') - with option_context("display.chop_threshold", 0.2 ): + with option_context("display.chop_threshold", 0.2): self.assertEqual(repr(df), ' 0 1\n0 0.0 0.5\n1 0.5 0.0') - with option_context("display.chop_threshold", 0.6 ): + with option_context("display.chop_threshold", 0.6): self.assertEqual(repr(df), ' 0 1\n0 0 0\n1 0 0') - with option_context("display.chop_threshold", None ): - self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') + with option_context("display.chop_threshold", None): + self.assertEqual(repr(df), ' 0 1\n0 0.1 0.5\n1 0.5 -0.1') def test_repr_obeys_max_seq_limit(self): - with option_context("display.max_seq_items",2000): + with option_context("display.max_seq_items", 2000): self.assertTrue(len(com.pprint_thing(lrange(1000))) > 1000) - with option_context("display.max_seq_items",5): + with option_context("display.max_seq_items", 5): self.assertTrue(len(com.pprint_thing(lrange(1000))) < 100) def test_repr_set(self): self.assertEqual(com.pprint_thing(set([1])), '{1}') def test_repr_is_valid_construction_code(self): - # for the case of Index, where the repr is traditional rather then stylized - idx = Index(['a','b']) - res = eval("pd."+repr(idx)) - tm.assert_series_equal(Series(res),Series(idx)) + # for the case of Index, where the repr is traditional rather then + # stylized + idx = Index(['a', 'b']) + res = eval("pd." + repr(idx)) + tm.assert_series_equal(Series(res), Series(idx)) def test_repr_should_return_str(self): # http://docs.python.org/py3k/reference/datamodel.html#object.__repr__ @@ -215,10 +233,8 @@ def test_repr_should_return_str(self): # (str on py2.x, str (unicode) on py3) - data = [8, 5, 3, 5] - index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), - u("\u03c6")] + index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] cols = [u("\u03c8")] df = DataFrame(data, columns=cols, index=index1) self.assertTrue(type(df.__repr__()) == str) # both py2 / 3 @@ -234,8 +250,7 @@ def test_expand_frame_repr(self): df_tall = DataFrame('hello', lrange(30), lrange(5)) with option_context('mode.sim_interactive', True): - with option_context('display.max_columns', 10, - 'display.width',20, + with option_context('display.max_columns', 10, 'display.width', 20, 'display.max_rows', 20, 'display.show_dimensions', True): with option_context('display.expand_frame_repr', True): @@ -259,10 +274,8 @@ def test_repr_non_interactive(self): # result of terminal auto size detection df = DataFrame('hello', lrange(1000), lrange(5)) - with option_context('mode.sim_interactive', False, - 'display.width', 0, - 'display.height', 0, - 'display.max_rows',5000): + with option_context('mode.sim_interactive', False, 'display.width', 0, + 'display.height', 0, 'display.max_rows', 5000): self.assertFalse(has_truncated_repr(df)) self.assertFalse(has_expanded_repr(df)) @@ -301,9 +314,8 @@ def mkframe(n): self.assertTrue(has_vertically_truncated_repr(df10)) # width=None in terminal, auto detection - with option_context('display.max_columns', 100, - 'display.max_rows', term_width * 20, - 'display.width', None): + with option_context('display.max_columns', 100, 'display.max_rows', + term_width * 20, 'display.width', None): df = mkframe((term_width // 7) - 2) self.assertFalse(has_expanded_repr(df)) df = mkframe((term_width // 7) + 2) @@ -312,18 +324,23 @@ def mkframe(n): def test_str_max_colwidth(self): # GH 7856 - df = pd.DataFrame([{'a': 'foo', 'b': 'bar', + df = pd.DataFrame([{'a': 'foo', + 'b': 'bar', 'c': 'uncomfortably long line with lots of stuff', - 'd': 1}, - {'a': 'foo', 'b': 'bar', 'c': 'stuff', 'd': 1}]) + 'd': 1}, {'a': 'foo', + 'b': 'bar', + 'c': 'stuff', + 'd': 1}]) df.set_index(['a', 'b', 'c']) - self.assertTrue(str(df) == ' a b c d\n' - '0 foo bar uncomfortably long line with lots of stuff 1\n' - '1 foo bar stuff 1') + self.assertTrue( + str(df) == + ' a b c d\n' + '0 foo bar uncomfortably long line with lots of stuff 1\n' + '1 foo bar stuff 1') with option_context('max_colwidth', 20): self.assertTrue(str(df) == ' a b c d\n' - '0 foo bar uncomfortably lo... 1\n' - '1 foo bar stuff 1') + '0 foo bar uncomfortably lo... 1\n' + '1 foo bar stuff 1') def test_auto_detect(self): term_width, term_height = get_terminal_size() @@ -332,29 +349,28 @@ def test_auto_detect(self): index = range(10) df = DataFrame(index=index, columns=cols) with option_context('mode.sim_interactive', True): - with option_context('max_rows',None): - with option_context('max_columns',None): + with option_context('max_rows', None): + with option_context('max_columns', None): # Wrap around with None self.assertTrue(has_expanded_repr(df)) - with option_context('max_rows',0): - with option_context('max_columns',0): + with option_context('max_rows', 0): + with option_context('max_columns', 0): # Truncate with auto detection. self.assertTrue(has_horizontally_truncated_repr(df)) index = range(int(term_height * fac)) df = DataFrame(index=index, columns=cols) - with option_context('max_rows',0): - with option_context('max_columns',None): + with option_context('max_rows', 0): + with option_context('max_columns', None): # Wrap around with None self.assertTrue(has_expanded_repr(df)) # Truncate vertically self.assertTrue(has_vertically_truncated_repr(df)) - with option_context('max_rows',None): - with option_context('max_columns',0): + with option_context('max_rows', None): + with option_context('max_columns', 0): self.assertTrue(has_horizontally_truncated_repr(df)) - def test_to_string_repr_unicode(self): buf = StringIO() @@ -379,7 +395,7 @@ def test_to_string_repr_unicode(self): self.assertEqual(len(line), line_len) # it works even if sys.stdin in None - _stdin= sys.stdin + _stdin = sys.stdin try: sys.stdin = None repr(df) @@ -436,10 +452,8 @@ def test_to_string_with_formatters(self): def test_to_string_with_formatters_unicode(self): df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) - result = df.to_string(formatters={u('c/\u03c3'): - lambda x: '%s' % x}) - self.assertEqual(result, u(' c/\u03c3\n') + - '0 1\n1 2\n2 3') + result = df.to_string(formatters={u('c/\u03c3'): lambda x: '%s' % x}) + self.assertEqual(result, u(' c/\u03c3\n') + '0 1\n1 2\n2 3') def test_east_asian_unicode_frame(self): if PY3: @@ -499,7 +513,8 @@ def test_east_asian_unicode_frame(self): 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) expected = (u" a b\nおおおお \nあ あああああ あ\n" - u"い い いいい\nうう う う\nえ えええ ええええええ") + u"い い いいい\nうう う う\nえ えええ ええええええ" + ) self.assertEqual(_rep(df), expected) # all @@ -511,8 +526,8 @@ def test_east_asian_unicode_frame(self): self.assertEqual(_rep(df), expected) # MultiIndex - idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), - (u'おおお', u'かかかか'), (u'き', u'くく')]) + idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( + u'おおお', u'かかかか'), (u'き', u'くく')]) df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) expected = (u" a b\nあ いい あああああ あ\n" @@ -533,7 +548,7 @@ def test_east_asian_unicode_frame(self): u"\n[4 rows x 4 columns]") self.assertEqual(_rep(df), expected) - df.index = [u'あああ', u'いいいい', u'う', 'aaa'] + df.index = [u'あああ', u'いいいい', u'う', 'aaa'] expected = (u" a ... ああああ\nあああ あああああ ... さ\n" u".. ... ... ...\naaa えええ ... せ\n" u"\n[4 rows x 4 columns]") @@ -544,8 +559,8 @@ def test_east_asian_unicode_frame(self): # mid col df = DataFrame({'a': [u'あ', u'いいい', u'う', u'ええええええ'], - 'b': [1, 222, 33333, 4]}, - index=['a', 'bb', 'c', 'ddd']) + 'b': [1, 222, 33333, 4]}, + index=['a', 'bb', 'c', 'ddd']) expected = (u" a b\na あ 1\n" u"bb いいい 222\nc う 33333\n" u"ddd ええええええ 4") @@ -553,8 +568,8 @@ def test_east_asian_unicode_frame(self): # last col df = DataFrame({'a': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=['a', 'bb', 'c', 'ddd']) expected = (u" a b\na 1 あ\n" u"bb 222 いいい\nc 33333 う\n" u"ddd 4 ええええええ") @@ -562,17 +577,18 @@ def test_east_asian_unicode_frame(self): # all col df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=['a', 'bb', 'c', 'ddd']) expected = (u" a b\na あああああ あ\n" u"bb い いいい\nc う う\n" - u"ddd えええ ええええええ""") + u"ddd えええ ええええええ" + "") self.assertEqual(_rep(df), expected) # column name df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, - index=['a', 'bb', 'c', 'ddd']) + 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + index=['a', 'bb', 'c', 'ddd']) expected = (u" b あああああ\na あ 1\n" u"bb いいい 222\nc う 33333\n" u"ddd ええええええ 4") @@ -593,7 +609,8 @@ def test_east_asian_unicode_frame(self): index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお')) expected = (u" a b\nおおおお \n" u"あ あああああ あ\nい い いいい\n" - u"うう う う\nえ えええ ええええええ") + u"うう う う\nえ えええ ええええええ" + ) self.assertEqual(_rep(df), expected) # all @@ -606,8 +623,8 @@ def test_east_asian_unicode_frame(self): self.assertEqual(_rep(df), expected) # MultiIndex - idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), - (u'おおお', u'かかかか'), (u'き', u'くく')]) + idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'), ( + u'おおお', u'かかかか'), (u'き', u'くく')]) df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx) expected = (u" a b\nあ いい あああああ あ\n" @@ -616,7 +633,8 @@ def test_east_asian_unicode_frame(self): self.assertEqual(_rep(df), expected) # truncate - with option_context('display.max_rows', 3, 'display.max_columns', 3): + with option_context('display.max_rows', 3, 'display.max_columns', + 3): df = pd.DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'], 'b': [u'あ', u'いいい', u'う', u'ええええええ'], @@ -629,7 +647,7 @@ def test_east_asian_unicode_frame(self): u"\n[4 rows x 4 columns]") self.assertEqual(_rep(df), expected) - df.index = [u'あああ', u'いいいい', u'う', 'aaa'] + df.index = [u'あああ', u'いいいい', u'う', 'aaa'] expected = (u" a ... ああああ\nあああ あああああ ... さ\n" u"... ... ... ...\naaa えええ ... せ\n" u"\n[4 rows x 4 columns]") @@ -637,8 +655,8 @@ def test_east_asian_unicode_frame(self): # ambiguous unicode df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, - index=['a', 'bb', 'c', '¡¡¡']) + 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, + index=['a', 'bb', 'c', '¡¡¡']) expected = (u" b あああああ\na あ 1\n" u"bb いいい 222\nc ¡¡ 33333\n" u"¡¡¡ ええええええ 4") @@ -671,37 +689,44 @@ def test_to_string_with_col_space(self): self.assertEqual(len(with_header_row1), len(no_header)) def test_to_string_truncate_indices(self): - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: - for column in [ tm.makeStringIndex ]: - for h in [10,20]: - for w in [10,20]: - with option_context("display.expand_frame_repr",False): + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, + tm.makeDateIndex, tm.makePeriodIndex]: + for column in [tm.makeStringIndex]: + for h in [10, 20]: + for w in [10, 20]: + with option_context("display.expand_frame_repr", + False): df = DataFrame(index=index(h), columns=column(w)) with option_context("display.max_rows", 15): if h == 20: - self.assertTrue(has_vertically_truncated_repr(df)) + self.assertTrue( + has_vertically_truncated_repr(df)) else: - self.assertFalse(has_vertically_truncated_repr(df)) + self.assertFalse( + has_vertically_truncated_repr(df)) with option_context("display.max_columns", 15): if w == 20: - self.assertTrue(has_horizontally_truncated_repr(df)) + self.assertTrue( + has_horizontally_truncated_repr(df)) else: - self.assertFalse(has_horizontally_truncated_repr(df)) - with option_context("display.max_rows", 15,"display.max_columns", 15): + self.assertFalse( + has_horizontally_truncated_repr(df)) + with option_context("display.max_rows", 15, + "display.max_columns", 15): if h == 20 and w == 20: - self.assertTrue(has_doubly_truncated_repr(df)) + self.assertTrue(has_doubly_truncated_repr( + df)) else: - self.assertFalse(has_doubly_truncated_repr(df)) + self.assertFalse(has_doubly_truncated_repr( + df)) def test_to_string_truncate_multilevel(self): arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays,columns=arrays) - with option_context("display.max_rows", 7,"display.max_columns", 7): + df = DataFrame(index=arrays, columns=arrays) + with option_context("display.max_rows", 7, "display.max_columns", 7): self.assertTrue(has_doubly_truncated_repr(df)) - def test_to_html_with_col_space(self): def check_with_width(df, col_space): import re @@ -740,8 +765,8 @@ def test_to_html_escaped(self): test_dict = {'co", b: ""}, - 'co>l2':{a: "", - b: ""}} + 'co>l2': {a: "", + b: ""}} rs = DataFrame(test_dict).to_html() xp = """ @@ -764,6 +789,7 @@ def test_to_html_escaped(self):
""" + self.assertEqual(xp, rs) def test_to_html_escape_disabled(self): @@ -796,6 +822,7 @@ def test_to_html_escape_disabled(self): """ + self.assertEqual(xp, rs) def test_to_html_multiindex_index_false(self): @@ -804,8 +831,8 @@ def test_to_html_multiindex_index_false(self): 'a': range(2), 'b': range(3, 5), 'c': range(5, 7), - 'd': range(3, 5)} - ) + 'd': range(3, 5) + }) df.columns = MultiIndex.from_product([['a', 'b'], ['c', 'd']]) result = df.to_html(index=False) expected = """\ @@ -837,6 +864,7 @@ def test_to_html_multiindex_index_false(self): """ + self.assertEqual(result, expected) df.index = Index(df.index.values, name='idx') @@ -846,7 +874,7 @@ def test_to_html_multiindex_index_false(self): def test_to_html_multiindex_sparsify_false_multi_sparse(self): with option_context('display.multi_sparse', False): index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) + names=['foo', None]) df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) @@ -894,6 +922,7 @@ def test_to_html_multiindex_sparsify_false_multi_sparse(self): """ + self.assertEqual(result, expected) df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], @@ -949,11 +978,12 @@ def test_to_html_multiindex_sparsify_false_multi_sparse(self): """ + self.assertEqual(result, expected) def test_to_html_multiindex_sparsify(self): index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], - names=['foo', None]) + names=['foo', None]) df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index) @@ -998,10 +1028,11 @@ def test_to_html_multiindex_sparsify(self): """ + self.assertEqual(result, expected) - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=index[::2], index=index) + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=index[::2], + index=index) result = df.to_html() expected = """\ @@ -1051,13 +1082,14 @@ def test_to_html_multiindex_sparsify(self): """ + self.assertEqual(result, expected) def test_to_html_index_formatter(self): - df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=['foo', None], index=lrange(4)) + df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=['foo', None], + index=lrange(4)) - f = lambda x: 'abcd'[x] + f = lambda x: 'abcd' [x] result = df.to_html(formatters={'__index__': f}) expected = """\ @@ -1091,22 +1123,24 @@ def test_to_html_index_formatter(self):
""" + self.assertEqual(result, expected) def test_to_html_regression_GH6098(self): df = DataFrame({u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], - u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), u('1er')], - 'données1': np.random.randn(5), - 'données2': np.random.randn(5)}) + u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), + u('1er')], + 'données1': np.random.randn(5), + 'données2': np.random.randn(5)}) # it works df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() def test_to_html_truncate(self): raise nose.SkipTest("unreliable on travis") - index = pd.DatetimeIndex(start='20010101',freq='D',periods=20) - df = DataFrame(index=index,columns=range(20)) - fmt.set_option('display.max_rows',8) - fmt.set_option('display.max_columns',4) + index = pd.DatetimeIndex(start='20010101', freq='D', periods=20) + df = DataFrame(index=index, columns=range(20)) + fmt.set_option('display.max_rows', 8) + fmt.set_option('display.max_columns', 4) result = df._repr_html_() expected = '''\ @@ -1206,9 +1240,9 @@ def test_to_html_truncate_multi_index(self): raise nose.SkipTest("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays,columns=arrays) - fmt.set_option('display.max_rows',7) - fmt.set_option('display.max_columns',7) + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) result = df._repr_html_() expected = '''\ @@ -1323,10 +1357,10 @@ def test_to_html_truncate_multi_index_sparse_off(self): raise nose.SkipTest("unreliable on travis") arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - df = DataFrame(index=arrays,columns=arrays) - fmt.set_option('display.max_rows',7) - fmt.set_option('display.max_columns',7) - fmt.set_option('display.multi_sparse',False) + df = DataFrame(index=arrays, columns=arrays) + fmt.set_option('display.max_rows', 7) + fmt.set_option('display.max_columns', 7) + fmt.set_option('display.multi_sparse', False) result = df._repr_html_() expected = '''\ @@ -1430,8 +1464,6 @@ def test_to_html_truncate_multi_index_sparse_off(self): expected = expected.decode('utf-8') self.assertEqual(result, expected) - - def test_nonunicode_nonascii_alignment(self): df = DataFrame([["aa\xc3\xa4\xc3\xa4", 1], ["bbbb", 2]]) rep_str = df.to_string() @@ -1468,31 +1500,30 @@ def test_pprint_thing(self): if PY3: raise nose.SkipTest("doesn't work on Python 3") - self.assertEqual(pp_t('a') , u('a')) - self.assertEqual(pp_t(u('a')) , u('a')) - self.assertEqual(pp_t(None) , 'None') - self.assertEqual(pp_t(u('\u05d0'), quote_strings=True), - u("u'\u05d0'")) - self.assertEqual(pp_t(u('\u05d0'), quote_strings=False), - u('\u05d0')) + self.assertEqual(pp_t('a'), u('a')) + self.assertEqual(pp_t(u('a')), u('a')) + self.assertEqual(pp_t(None), 'None') + self.assertEqual(pp_t(u('\u05d0'), quote_strings=True), u("u'\u05d0'")) + self.assertEqual(pp_t(u('\u05d0'), quote_strings=False), u('\u05d0')) self.assertEqual(pp_t((u('\u05d0'), - u('\u05d1')), quote_strings=True), - u("(u'\u05d0', u'\u05d1')")) + u('\u05d1')), quote_strings=True), + u("(u'\u05d0', u'\u05d1')")) self.assertEqual(pp_t((u('\u05d0'), (u('\u05d1'), - u('\u05d2'))), - quote_strings=True), - u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) + u('\u05d2'))), + quote_strings=True), + u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) self.assertEqual(pp_t(('foo', u('\u05d0'), (u('\u05d0'), - u('\u05d0'))), - quote_strings=True), - u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) + u('\u05d0'))), + quote_strings=True), + u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) # escape embedded tabs in string # GH #2038 - self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t",))) + self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t", ))) def test_wide_repr(self): - with option_context('mode.sim_interactive', True, 'display.show_dimensions', True): + with option_context('mode.sim_interactive', True, + 'display.show_dimensions', True): max_cols = get_option('display.max_columns') df = DataFrame(tm.rands_array(25, size=(10, max_cols - 1))) set_option('display.expand_frame_repr', False) @@ -1562,10 +1593,9 @@ def test_wide_repr_multiindex(self): def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') - midx = MultiIndex.from_arrays( - tm.rands_array(5, size=(2, 10))) - mcols = MultiIndex.from_arrays( - tm.rands_array(3, size=(2, max_cols - 1))) + midx = MultiIndex.from_arrays(tm.rands_array(5, size=(2, 10))) + mcols = MultiIndex.from_arrays(tm.rands_array(3, size=(2, max_cols + - 1))) df = DataFrame(tm.rands_array(25, (10, max_cols - 1)), index=midx, columns=mcols) df.index.names = ['Level 0', 'Level 1'] @@ -1599,8 +1629,8 @@ def test_wide_repr_unicode(self): def test_wide_repr_wide_long_columns(self): with option_context('mode.sim_interactive', True): - df = DataFrame( - {'a': ['a' * 30, 'b' * 30], 'b': ['c' * 70, 'd' * 80]}) + df = DataFrame({'a': ['a' * 30, 'b' * 30], + 'b': ['c' * 70, 'd' * 80]}) result = repr(df) self.assertTrue('ccccc' in result) @@ -1608,58 +1638,78 @@ def test_wide_repr_wide_long_columns(self): def test_long_series(self): n = 1000 - s = Series(np.random.randint(-50,50,n),index=['s%04d' % x for x in range(n)], dtype='int64') + s = Series( + np.random.randint(-50, 50, n), + index=['s%04d' % x for x in range(n)], dtype='int64') import re str_rep = str(s) - nmatches = len(re.findall('dtype',str_rep)) + nmatches = len(re.findall('dtype', str_rep)) self.assertEqual(nmatches, 1) def test_index_with_nan(self): # GH 2850 - df = DataFrame({'id1': {0: '1a3', 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, - 'id3': {0: '78d', 1: '79d'}, 'value': {0: 123, 1: 64}}) + df = DataFrame({'id1': {0: '1a3', + 1: '9h4'}, + 'id2': {0: np.nan, + 1: 'd67'}, + 'id3': {0: '78d', + 1: '79d'}, + 'value': {0: 123, + 1: 64}}) # multi-index y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + expected = u( + ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') self.assertEqual(result, expected) # index y = df.set_index('id2') result = y.to_string() - expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') + expected = u( + ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') self.assertEqual(result, expected) # with append (this failed in 0.12) y = df.set_index(['id1', 'id2']).set_index('id3', append=True) result = y.to_string() - expected = u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + expected = u( + ' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') self.assertEqual(result, expected) # all-nan in mi df2 = df.copy() - df2.ix[:,'id2'] = np.nan + df2.ix[:, 'id2'] = np.nan y = df2.set_index('id2') result = y.to_string() - expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') + expected = u( + ' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') self.assertEqual(result, expected) # partial nan in mi df2 = df.copy() - df2.ix[:,'id2'] = np.nan - y = df2.set_index(['id2','id3']) + df2.ix[:, 'id2'] = np.nan + y = df2.set_index(['id2', 'id3']) result = y.to_string() - expected = u(' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') + expected = u( + ' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') self.assertEqual(result, expected) - df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, - 'id3': {0: np.nan, 1: '79d'}, 'value': {0: 123, 1: 64}}) + df = DataFrame({'id1': {0: np.nan, + 1: '9h4'}, + 'id2': {0: np.nan, + 1: 'd67'}, + 'id3': {0: np.nan, + 1: '79d'}, + 'value': {0: 123, + 1: 64}}) - y = df.set_index(['id1','id2','id3']) + y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = u(' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') + expected = u( + ' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') self.assertEqual(result, expected) def test_to_string(self): @@ -1671,8 +1721,8 @@ def test_to_string(self): 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20,'A'] = nan - biggie.loc[:20,'B'] = nan + biggie.loc[:20, 'A'] = nan + biggie.loc[:20, 'B'] = nan s = biggie.to_string() buf = StringIO() @@ -1692,8 +1742,8 @@ def test_to_string(self): header=None, sep=' ') tm.assert_series_equal(recons['B'], biggie['B']) self.assertEqual(recons['A'].count(), biggie['A'].count()) - self.assertTrue((np.abs(recons['A'].dropna() - - biggie['A'].dropna()) < 0.1).all()) + self.assertTrue((np.abs(recons['A'].dropna() - biggie['A'].dropna()) < + 0.1).all()) # expected = ['B', 'A'] # self.assertEqual(header, expected) @@ -1707,15 +1757,13 @@ def test_to_string(self): formatters={'A': lambda x: '%.1f' % x}) biggie.to_string(columns=['B', 'A'], float_format=str) - biggie.to_string(columns=['B', 'A'], col_space=12, - float_format=str) + biggie.to_string(columns=['B', 'A'], col_space=12, float_format=str) frame = DataFrame(index=np.arange(200)) frame.to_string() def test_to_string_no_header(self): - df = DataFrame({'x': [1, 2, 3], - 'y': [4, 5, 6]}) + df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) df_s = df.to_string(header=False) expected = "0 1 4\n1 2 5\n2 3 6" @@ -1723,8 +1771,7 @@ def test_to_string_no_header(self): self.assertEqual(df_s, expected) def test_to_string_no_index(self): - df = DataFrame({'x': [1, 2, 3], - 'y': [4, 5, 6]}) + df = DataFrame({'x': [1, 2, 3], 'y': [4, 5, 6]}) df_s = df.to_string(index=False) expected = "x y\n1 4\n2 5\n3 6" @@ -1733,11 +1780,11 @@ def test_to_string_no_index(self): def test_to_string_float_formatting(self): self.reset_display_options() - fmt.set_option('display.precision', 5, 'display.column_space', - 12, 'display.notebook_repr_html', False) + fmt.set_option('display.precision', 5, 'display.column_space', 12, + 'display.notebook_repr_html', False) - df = DataFrame({'x': [0, 0.25, 3456.000, 12e+45, 1.64e+6, - 1.7e+8, 1.253456, np.pi, -1e6]}) + df = DataFrame({'x': [0, 0.25, 3456.000, 12e+45, 1.64e+6, 1.7e+8, + 1.253456, np.pi, -1e6]}) df_s = df.to_string() @@ -1758,9 +1805,7 @@ def test_to_string_float_formatting(self): df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string() - expected = (' x\n' - '0 3234.000\n' - '1 0.253') + expected = (' x\n' '0 3234.000\n' '1 0.253') self.assertEqual(df_s, expected) self.reset_display_options() @@ -1800,10 +1845,7 @@ def test_to_string_small_float_values(self): # but not all exactly zero df = df * 0 result = df.to_string() - expected = (' 0\n' - '0 0\n' - '1 0\n' - '2 -0') + expected = (' 0\n' '0 0\n' '1 0\n' '2 -0') def test_to_string_float_index(self): index = Index([1.5, 2, 3, 4, 5]) @@ -1819,9 +1861,7 @@ def test_to_string_float_index(self): self.assertEqual(result, expected) def test_to_string_ascii_error(self): - data = [('0 ', - u(' .gitignore '), - u(' 5 '), + data = [('0 ', u(' .gitignore '), u(' 5 '), ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80' '\xa2\xe2\x80\xa2\xe2\x80\xa2')] df = DataFrame(data) @@ -1834,17 +1874,13 @@ def test_to_string_int_formatting(self): self.assertTrue(issubclass(df['x'].dtype.type, np.integer)) output = df.to_string() - expected = (' x\n' - '0 -15\n' - '1 20\n' - '2 25\n' - '3 -35') + expected = (' x\n' '0 -15\n' '1 20\n' '2 25\n' '3 -35') self.assertEqual(output, expected) def test_to_string_index_formatter(self): df = DataFrame([lrange(5), lrange(5, 10), lrange(10, 15)]) - rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]}) + rs = df.to_string(formatters={'__index__': lambda x: 'abc' [x]}) xp = """\ 0 1 2 3 4 @@ -1852,15 +1888,14 @@ def test_to_string_index_formatter(self): b 5 6 7 8 9 c 10 11 12 13 14\ """ + self.assertEqual(rs, xp) def test_to_string_left_justify_cols(self): self.reset_display_options() df = DataFrame({'x': [3234, 0.253]}) df_s = df.to_string(justify='left') - expected = (' x \n' - '0 3234.000\n' - '1 0.253') + expected = (' x \n' '0 3234.000\n' '1 0.253') self.assertEqual(df_s, expected) def test_to_string_format_na(self): @@ -1897,20 +1932,24 @@ def test_to_string_line_width(self): def test_show_dimensions(self): df = DataFrame(123, lrange(10, 15), lrange(30)) - with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', - 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', True): + with option_context('display.max_rows', 10, 'display.max_columns', 40, + 'display.width', 500, 'display.expand_frame_repr', + 'info', 'display.show_dimensions', True): self.assertTrue('5 rows' in str(df)) self.assertTrue('5 rows' in df._repr_html_()) - with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', - 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', False): + with option_context('display.max_rows', 10, 'display.max_columns', 40, + 'display.width', 500, 'display.expand_frame_repr', + 'info', 'display.show_dimensions', False): self.assertFalse('5 rows' in str(df)) self.assertFalse('5 rows' in df._repr_html_()) - with option_context('display.max_rows', 2, 'display.max_columns', 2, 'display.width', - 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): + with option_context('display.max_rows', 2, 'display.max_columns', 2, + 'display.width', 500, 'display.expand_frame_repr', + 'info', 'display.show_dimensions', 'truncate'): self.assertTrue('5 rows' in str(df)) self.assertTrue('5 rows' in df._repr_html_()) - with option_context('display.max_rows', 10, 'display.max_columns', 40, 'display.width', - 500, 'display.expand_frame_repr', 'info', 'display.show_dimensions', 'truncate'): + with option_context('display.max_rows', 10, 'display.max_columns', 40, + 'display.width', 500, 'display.expand_frame_repr', + 'info', 'display.show_dimensions', 'truncate'): self.assertFalse('5 rows' in str(df)) self.assertFalse('5 rows' in df._repr_html_()) @@ -1920,8 +1959,8 @@ def test_to_html(self): 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20,'A'] = nan - biggie.loc[:20,'B'] = nan + biggie.loc[:20, 'A'] = nan + biggie.loc[:20, 'B'] = nan s = biggie.to_html() buf = StringIO() @@ -1936,8 +1975,7 @@ def test_to_html(self): formatters={'A': lambda x: '%.1f' % x}) biggie.to_html(columns=['B', 'A'], float_format=str) - biggie.to_html(columns=['B', 'A'], col_space=12, - float_format=str) + biggie.to_html(columns=['B', 'A'], col_space=12, float_format=str) frame = DataFrame(index=np.arange(200)) frame.to_html() @@ -1947,8 +1985,8 @@ def test_to_html_filename(self): 'B': tm.makeStringIndex(200)}, index=lrange(200)) - biggie.loc[:20,'A'] = nan - biggie.loc[:20,'B'] = nan + biggie.loc[:20, 'A'] = nan + biggie.loc[:20, 'B'] = nan with tm.ensure_clean('test.html') as path: biggie.to_html(path) with open(path, 'r') as f: @@ -1973,8 +2011,8 @@ def test_to_html_columns_arg(self): def test_to_html_multiindex(self): columns = MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), - np.mod(lrange(4), 2))), - names=['CL0', 'CL1']) + np.mod(lrange(4), 2))), + names=['CL0', 'CL1']) df = DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='left') expected = ('\n' @@ -2012,8 +2050,9 @@ def test_to_html_multiindex(self): self.assertEqual(result, expected) - columns = MultiIndex.from_tuples(list(zip(range(4), - np.mod(lrange(4), 2)))) + columns = MultiIndex.from_tuples(list(zip( + range(4), np.mod( + lrange(4), 2)))) df = DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='right') @@ -2056,9 +2095,9 @@ def test_to_html_multiindex(self): def test_to_html_justify(self): df = DataFrame({'A': [6, 30000, 2], - 'B': [1, 2, 70000], - 'C': [223442, 0, 1]}, - columns=['A', 'B', 'C']) + 'B': [1, 2, 70000], + 'C': [223442, 0, 1]}, + columns=['A', 'B', 'C']) result = df.to_html(justify='left') expected = ('
\n' ' \n' @@ -2128,10 +2167,10 @@ def test_to_html_justify(self): def test_to_html_index(self): index = ['foo', 'bar', 'baz'] df = DataFrame({'A': [1, 2, 3], - 'B': [1.2, 3.4, 5.6], - 'C': ['one', 'two', np.NaN]}, - columns=['A', 'B', 'C'], - index=index) + 'B': [1.2, 3.4, 5.6], + 'C': ['one', 'two', np.NaN]}, + columns=['A', 'B', 'C'], + index=index) expected_with_index = ('
\n' ' \n' ' \n' @@ -2354,17 +2393,17 @@ def test_repr_html_wide(self): def test_repr_html_wide_multiindex_cols(self): max_cols = get_option('display.max_columns') - mcols = MultiIndex.from_product([np.arange(max_cols//2), - ['foo', 'bar']], - names=['first', 'second']) + mcols = MultiIndex.from_product([np.arange(max_cols // 2), + ['foo', 'bar']], + names=['first', 'second']) df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) reg_repr = df._repr_html_() assert '...' not in reg_repr - mcols = MultiIndex.from_product((np.arange(1+(max_cols//2)), - ['foo', 'bar']), - names=['first', 'second']) + mcols = MultiIndex.from_product((np.arange(1 + (max_cols // 2)), + ['foo', 'bar']), + names=['first', 'second']) df = DataFrame(tm.rands_array(25, size=(10, len(mcols))), columns=mcols) wide_repr = df._repr_html_() @@ -2373,13 +2412,13 @@ def test_repr_html_wide_multiindex_cols(self): def test_repr_html_long(self): max_rows = get_option('display.max_rows') h = max_rows - 1 - df = DataFrame({'A':np.arange(1,1+h), 'B':np.arange(41, 41+h)}) + df = DataFrame({'A': np.arange(1, 1 + h), 'B': np.arange(41, 41 + h)}) reg_repr = df._repr_html_() assert '..' not in reg_repr assert str(41 + max_rows // 2) in reg_repr h = max_rows + 1 - df = DataFrame({'A':np.arange(1,1+h), 'B':np.arange(41, 41+h)}) + df = DataFrame({'A': np.arange(1, 1 + h), 'B': np.arange(41, 41 + h)}) long_repr = df._repr_html_() assert '..' in long_repr assert str(41 + max_rows // 2) not in long_repr @@ -2389,13 +2428,17 @@ def test_repr_html_long(self): def test_repr_html_float(self): max_rows = get_option('display.max_rows') h = max_rows - 1 - df = DataFrame({'idx':np.linspace(-10,10,h), 'A':np.arange(1,1+h), 'B': np.arange(41, 41+h) }).set_index('idx') + df = DataFrame({'idx': np.linspace(-10, 10, h), + 'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}).set_index('idx') reg_repr = df._repr_html_() assert '..' not in reg_repr assert str(40 + h) in reg_repr h = max_rows + 1 - df = DataFrame({'idx':np.linspace(-10,10,h), 'A':np.arange(1,1+h), 'B': np.arange(41, 41+h) }).set_index('idx') + df = DataFrame({'idx': np.linspace(-10, 10, h), + 'A': np.arange(1, 1 + h), + 'B': np.arange(41, 41 + h)}).set_index('idx') long_repr = df._repr_html_() assert '..' in long_repr assert '31' not in long_repr @@ -2404,18 +2447,18 @@ def test_repr_html_float(self): def test_repr_html_long_multiindex(self): max_rows = get_option('display.max_rows') - max_L1 = max_rows//2 + max_L1 = max_rows // 2 tuples = list(itertools.product(np.arange(max_L1), ['foo', 'bar'])) idx = MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = DataFrame(np.random.randn(max_L1*2, 2), index=idx, + df = DataFrame(np.random.randn(max_L1 * 2, 2), index=idx, columns=['A', 'B']) reg_repr = df._repr_html_() assert '...' not in reg_repr - tuples = list(itertools.product(np.arange(max_L1+1), ['foo', 'bar'])) + tuples = list(itertools.product(np.arange(max_L1 + 1), ['foo', 'bar'])) idx = MultiIndex.from_tuples(tuples, names=['first', 'second']) - df = DataFrame(np.random.randn((max_L1+1)*2, 2), index=idx, + df = DataFrame(np.random.randn((max_L1 + 1) * 2, 2), index=idx, columns=['A', 'B']) long_repr = df._repr_html_() assert '...' in long_repr @@ -2424,27 +2467,27 @@ def test_repr_html_long_and_wide(self): max_cols = get_option('display.max_columns') max_rows = get_option('display.max_rows') - h, w = max_rows-1, max_cols-1 - df = DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) - assert '...' not in df._repr_html_() + h, w = max_rows - 1, max_cols - 1 + df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) + assert '...' not in df._repr_html_() - h, w = max_rows+1, max_cols+1 - df = DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) - assert '...' in df._repr_html_() + h, w = max_rows + 1, max_cols + 1 + df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) + assert '...' in df._repr_html_() def test_info_repr(self): max_rows = get_option('display.max_rows') max_cols = get_option('display.max_columns') # Long - h, w = max_rows+1, max_cols-1 - df = DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) + h, w = max_rows + 1, max_cols - 1 + df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert has_vertically_truncated_repr(df) with option_context('display.large_repr', 'info'): assert has_info_repr(df) # Wide - h, w = max_rows-1, max_cols+1 - df = DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) + h, w = max_rows - 1, max_cols + 1 + df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert has_horizontally_truncated_repr(df) with option_context('display.large_repr', 'info'): assert has_info_repr(df) @@ -2469,24 +2512,23 @@ def test_info_repr_html(self): max_rows = get_option('display.max_rows') max_cols = get_option('display.max_columns') # Long - h, w = max_rows+1, max_cols-1 - df = DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) + h, w = max_rows + 1, max_cols - 1 + df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert r'<class' not in df._repr_html_() with option_context('display.large_repr', 'info'): assert r'<class' in df._repr_html_() # Wide - h, w = max_rows-1, max_cols+1 - df = DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) + h, w = max_rows - 1, max_cols + 1 + df = DataFrame(dict((k, np.arange(1, 1 + h)) for k in np.arange(w))) assert ' 1e8 and something that normally formats to having length > display.precision + 6 + # need both a number > 1e8 and something that normally formats to + # having length > display.precision + 6 df = pd.DataFrame(dict(x=[12345.6789])) self.assertEqual(str(df), ' x\n0 12345.6789') df = pd.DataFrame(dict(x=[2e8])) self.assertEqual(str(df), ' x\n0 200000000') df = pd.DataFrame(dict(x=[12345.6789, 2e8])) - self.assertEqual(str(df), ' x\n0 1.2346e+04\n1 2.0000e+08') + self.assertEqual( + str(df), ' x\n0 1.2346e+04\n1 2.0000e+08') class TestRepr_timedelta64(tm.TestCase): @@ -3806,7 +3819,8 @@ def test_none(self): self.assertEqual(drepr(delta_1s), "0 days 00:00:01") self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + self.assertEqual( + drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") def test_even_day(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -3821,7 +3835,8 @@ def test_even_day(self): self.assertEqual(drepr(delta_1s), "0 days 00:00:01") self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + self.assertEqual( + drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") def test_sub_day(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -3836,7 +3851,8 @@ def test_sub_day(self): self.assertEqual(drepr(delta_1s), "00:00:01") self.assertEqual(drepr(delta_500ms), "00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + self.assertEqual( + drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") def test_long(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -3851,7 +3867,8 @@ def test_long(self): self.assertEqual(drepr(delta_1s), "0 days 00:00:01") self.assertEqual(drepr(delta_500ms), "0 days 00:00:00.500000") self.assertEqual(drepr(delta_1d + delta_1s), "1 days 00:00:01") - self.assertEqual(drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") + self.assertEqual( + drepr(delta_1d + delta_500ms), "1 days 00:00:00.500000") def test_all(self): delta_1d = pd.to_timedelta(1, unit='D') @@ -3863,53 +3880,55 @@ def test_all(self): self.assertEqual(drepr(delta_0d), "0 days 00:00:00.000000000") self.assertEqual(drepr(delta_1ns), "0 days 00:00:00.000000001") + class TestTimedelta64Formatter(tm.TestCase): def test_days(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(x,box=True).get_result() + result = fmt.Timedelta64Formatter(x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'") self.assertEqual(result[1].strip(), "'1 days'") - result = fmt.Timedelta64Formatter(x[1:2],box=True).get_result() + result = fmt.Timedelta64Formatter(x[1:2], box=True).get_result() self.assertEqual(result[0].strip(), "'1 days'") - result = fmt.Timedelta64Formatter(x,box=False).get_result() + result = fmt.Timedelta64Formatter(x, box=False).get_result() self.assertEqual(result[0].strip(), "0 days") self.assertEqual(result[1].strip(), "1 days") - result = fmt.Timedelta64Formatter(x[1:2],box=False).get_result() + result = fmt.Timedelta64Formatter(x[1:2], box=False).get_result() self.assertEqual(result[0].strip(), "1 days") def test_days_neg(self): x = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(-x,box=True).get_result() + result = fmt.Timedelta64Formatter(-x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'") self.assertEqual(result[1].strip(), "'-1 days'") def test_subdays(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(y,box=True).get_result() + result = fmt.Timedelta64Formatter(y, box=True).get_result() self.assertEqual(result[0].strip(), "'00:00:00'") self.assertEqual(result[1].strip(), "'00:00:01'") def test_subdays_neg(self): y = pd.to_timedelta(list(range(5)) + [pd.NaT], unit='s') - result = fmt.Timedelta64Formatter(-y,box=True).get_result() + result = fmt.Timedelta64Formatter(-y, box=True).get_result() self.assertEqual(result[0].strip(), "'00:00:00'") self.assertEqual(result[1].strip(), "'-1 days +23:59:59'") def test_zero(self): x = pd.to_timedelta(list(range(1)) + [pd.NaT], unit='D') - result = fmt.Timedelta64Formatter(x,box=True).get_result() + result = fmt.Timedelta64Formatter(x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'") x = pd.to_timedelta(list(range(1)), unit='D') - result = fmt.Timedelta64Formatter(x,box=True).get_result() + result = fmt.Timedelta64Formatter(x, box=True).get_result() self.assertEqual(result[0].strip(), "'0 days'") class TestDatetime64Formatter(tm.TestCase): + def test_mixed(self): x = Series([datetime(2013, 1, 1), datetime(2013, 1, 1, 12), pd.NaT]) result = fmt.Datetime64Formatter(x).get_result() @@ -3931,42 +3950,44 @@ def test_dates_display(self): # 10170 # make sure that we are consistently display date formatting - x = Series(date_range('20130101 09:00:00',periods=5,freq='D')) + x = Series(date_range('20130101 09:00:00', periods=5, freq='D')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() self.assertEqual(result[0].strip(), "2013-01-01 09:00:00") self.assertEqual(result[1].strip(), "NaT") self.assertEqual(result[4].strip(), "2013-01-05 09:00:00") - x = Series(date_range('20130101 09:00:00',periods=5,freq='s')) + x = Series(date_range('20130101 09:00:00', periods=5, freq='s')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() self.assertEqual(result[0].strip(), "2013-01-01 09:00:00") self.assertEqual(result[1].strip(), "NaT") self.assertEqual(result[4].strip(), "2013-01-01 09:00:04") - x = Series(date_range('20130101 09:00:00',periods=5,freq='ms')) + x = Series(date_range('20130101 09:00:00', periods=5, freq='ms')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000") self.assertEqual(result[1].strip(), "NaT") self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.004") - x = Series(date_range('20130101 09:00:00',periods=5,freq='us')) + x = Series(date_range('20130101 09:00:00', periods=5, freq='us')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000") self.assertEqual(result[1].strip(), "NaT") self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000004") - x = Series(date_range('20130101 09:00:00',periods=5,freq='N')) + x = Series(date_range('20130101 09:00:00', periods=5, freq='N')) x.iloc[1] = np.nan result = fmt.Datetime64Formatter(x).get_result() self.assertEqual(result[0].strip(), "2013-01-01 09:00:00.000000000") self.assertEqual(result[1].strip(), "NaT") self.assertEqual(result[4].strip(), "2013-01-01 09:00:00.000000004") + class TestNaTFormatting(tm.TestCase): + def test_repr(self): self.assertEqual(repr(pd.NaT), "NaT") @@ -3975,6 +3996,7 @@ def test_str(self): class TestDatetimeIndexFormat(tm.TestCase): + def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), pd.NaT]).format() self.assertEqual(formatted[0], "2003-01-01 12:00:00") @@ -3986,31 +4008,37 @@ def test_date(self): self.assertEqual(formatted[1], "NaT") def test_date_tz(self): - formatted = pd.to_datetime([datetime(2013,1,1)], utc=True).format() + formatted = pd.to_datetime([datetime(2013, 1, 1)], utc=True).format() self.assertEqual(formatted[0], "2013-01-01 00:00:00+00:00") - formatted = pd.to_datetime([datetime(2013,1,1), pd.NaT], utc=True).format() + formatted = pd.to_datetime( + [datetime(2013, 1, 1), pd.NaT], utc=True).format() self.assertEqual(formatted[0], "2013-01-01 00:00:00+00:00") def test_date_explict_date_format(self): - formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format(date_format="%m-%d-%Y", na_rep="UT") + formatted = pd.to_datetime([datetime(2003, 2, 1), pd.NaT]).format( + date_format="%m-%d-%Y", na_rep="UT") self.assertEqual(formatted[0], "02-01-2003") self.assertEqual(formatted[1], "UT") class TestDatetimeIndexUnicode(tm.TestCase): + def test_dates(self): - text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1)])) + text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1) + ])) self.assertTrue("['2013-01-01'," in text) self.assertTrue(", '2014-01-01']" in text) def test_mixed(self): - text = str(pd.to_datetime([datetime(2013,1,1), datetime(2014,1,1,12), datetime(2014,1,1)])) + text = str(pd.to_datetime([datetime(2013, 1, 1), datetime( + 2014, 1, 1, 12), datetime(2014, 1, 1)])) self.assertTrue("'2013-01-01 00:00:00'," in text) self.assertTrue("'2014-01-01 00:00:00']" in text) class TestStringRepTimestamp(tm.TestCase): + def test_no_tz(self): dt_date = datetime(2013, 1, 2) self.assertEqual(str(dt_date), str(Timestamp(dt_date))) @@ -4055,6 +4083,7 @@ def test_tz_dateutil(self): dt_datetime_us = datetime(2013, 1, 2, 12, 1, 3, 45, tzinfo=utc) self.assertEqual(str(dt_datetime_us), str(Timestamp(dt_datetime_us))) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 37cb38454f74e..3754155cca0a3 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -1,36 +1,33 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=E1101,W0612 -from datetime import datetime, timedelta import nose import numpy as np from numpy import nan import pandas as pd -from pandas import (Index, Series, DataFrame, Panel, - isnull, notnull, date_range, period_range) -from pandas.core.index import Index, MultiIndex +from pandas import (Index, Series, DataFrame, Panel, isnull, + date_range, period_range) +from pandas.core.index import MultiIndex import pandas.core.common as com -from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long +from pandas.compat import range, zip from pandas import compat from pandas.util.testing import (assert_series_equal, assert_frame_equal, assert_panel_equal, - assert_almost_equal, - assert_equal, - ensure_clean) + assert_equal) import pandas.util.testing as tm def _skip_if_no_pchip(): try: - from scipy.interpolate import pchip_interpolate + from scipy.interpolate import pchip_interpolate # noqa except ImportError: raise nose.SkipTest('scipy.interpolate.pchip missing') -#------------------------------------------------------------------------------ +# ---------------------------------------------------------------------- # Generic types test cases @@ -54,7 +51,7 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): if value is specified use that if its a scalar if value is an array, repeat it as needed """ - if isinstance(shape,int): + if isinstance(shape, int): shape = tuple([shape] * self._ndim) if value is not None: if np.isscalar(value): @@ -62,39 +59,39 @@ def _construct(self, shape, value=None, dtype=None, **kwargs): arr = None # remove the info axis - kwargs.pop(self._typ._info_axis_name,None) + kwargs.pop(self._typ._info_axis_name, None) else: - arr = np.empty(shape,dtype=dtype) + arr = np.empty(shape, dtype=dtype) arr.fill(value) else: fshape = np.prod(shape) arr = value.ravel() - new_shape = fshape/arr.shape[0] + new_shape = fshape / arr.shape[0] if fshape % arr.shape[0] != 0: raise Exception("invalid value passed in _construct") - arr = np.repeat(arr,new_shape).reshape(shape) + arr = np.repeat(arr, new_shape).reshape(shape) else: arr = np.random.randn(*shape) - return self._typ(arr,dtype=dtype,**kwargs) + return self._typ(arr, dtype=dtype, **kwargs) def _compare(self, result, expected): - self._comparator(result,expected) + self._comparator(result, expected) def test_rename(self): # single axis for axis in self._axes(): - kwargs = { axis : list('ABCD') } - obj = self._construct(4,**kwargs) + kwargs = {axis: list('ABCD')} + obj = self._construct(4, **kwargs) # no values passed - #self.assertRaises(Exception, o.rename(str.lower)) + # self.assertRaises(Exception, o.rename(str.lower)) # rename a single axis - result = obj.rename(**{ axis : str.lower }) + result = obj.rename(**{axis: str.lower}) expected = obj.copy() - setattr(expected,axis,list('abcd')) + setattr(expected, axis, list('abcd')) self._compare(result, expected) # multiple axes at once @@ -102,27 +99,28 @@ def test_rename(self): def test_get_numeric_data(self): n = 4 - kwargs = { } + kwargs = {} for i in range(self._ndim): kwargs[self._typ._AXIS_NAMES[i]] = list(range(n)) # get the numeric data - o = self._construct(n,**kwargs) + o = self._construct(n, **kwargs) result = o._get_numeric_data() self._compare(result, o) # non-inclusion result = o._get_bool_data() - expected = self._construct(n,value='empty',**kwargs) - self._compare(result,expected) + expected = self._construct(n, value='empty', **kwargs) + self._compare(result, expected) # get the bool data - arr = np.array([True,True,False,True]) - o = self._construct(n,value=arr,**kwargs) + arr = np.array([True, True, False, True]) + o = self._construct(n, value=arr, **kwargs) result = o._get_numeric_data() self._compare(result, o) - # _get_numeric_data is includes _get_bool_data, so can't test for non-inclusion + # _get_numeric_data is includes _get_bool_data, so can't test for + # non-inclusion def test_get_default(self): @@ -133,7 +131,7 @@ def test_get_default(self): for data, index in ((d0, d1), (d1, d0)): s = Series(data, index=index) - for i,d in zip(index, data): + for i, d in zip(index, data): self.assertEqual(s.get(i), d) self.assertEqual(s.get(i, d), d) self.assertEqual(s.get(i, "z"), d) @@ -146,45 +144,47 @@ def test_nonzero(self): # GH 4633 # look at the boolean/nonzero behavior for objects obj = self._construct(shape=4) - self.assertRaises(ValueError, lambda : bool(obj == 0)) - self.assertRaises(ValueError, lambda : bool(obj == 1)) - self.assertRaises(ValueError, lambda : bool(obj)) + self.assertRaises(ValueError, lambda: bool(obj == 0)) + self.assertRaises(ValueError, lambda: bool(obj == 1)) + self.assertRaises(ValueError, lambda: bool(obj)) - obj = self._construct(shape=4,value=1) - self.assertRaises(ValueError, lambda : bool(obj == 0)) - self.assertRaises(ValueError, lambda : bool(obj == 1)) - self.assertRaises(ValueError, lambda : bool(obj)) + obj = self._construct(shape=4, value=1) + self.assertRaises(ValueError, lambda: bool(obj == 0)) + self.assertRaises(ValueError, lambda: bool(obj == 1)) + self.assertRaises(ValueError, lambda: bool(obj)) - obj = self._construct(shape=4,value=np.nan) - self.assertRaises(ValueError, lambda : bool(obj == 0)) - self.assertRaises(ValueError, lambda : bool(obj == 1)) - self.assertRaises(ValueError, lambda : bool(obj)) + obj = self._construct(shape=4, value=np.nan) + self.assertRaises(ValueError, lambda: bool(obj == 0)) + self.assertRaises(ValueError, lambda: bool(obj == 1)) + self.assertRaises(ValueError, lambda: bool(obj)) # empty obj = self._construct(shape=0) - self.assertRaises(ValueError, lambda : bool(obj)) + self.assertRaises(ValueError, lambda: bool(obj)) # invalid behaviors - obj1 = self._construct(shape=4,value=1) - obj2 = self._construct(shape=4,value=1) + obj1 = self._construct(shape=4, value=1) + obj2 = self._construct(shape=4, value=1) def f(): if obj1: com.pprint_thing("this works and shouldn't") + self.assertRaises(ValueError, f) - self.assertRaises(ValueError, lambda : obj1 and obj2) - self.assertRaises(ValueError, lambda : obj1 or obj2) - self.assertRaises(ValueError, lambda : not obj1) + self.assertRaises(ValueError, lambda: obj1 and obj2) + self.assertRaises(ValueError, lambda: obj1 or obj2) + self.assertRaises(ValueError, lambda: not obj1) def test_numpy_1_7_compat_numeric_methods(self): # GH 4435 # numpy in 1.7 tries to pass addtional arguments to pandas functions o = self._construct(shape=4) - for op in ['min','max','max','var','std','prod','sum','cumsum','cumprod', - 'median','skew','kurt','compound','cummax','cummin','all','any']: - f = getattr(np,op,None) + for op in ['min', 'max', 'max', 'var', 'std', 'prod', 'sum', 'cumsum', + 'cumprod', 'median', 'skew', 'kurt', 'compound', 'cummax', + 'cummin', 'all', 'any']: + f = getattr(np, op, None) if f is not None: f(o) @@ -221,7 +221,9 @@ def test_constructor_compound_dtypes(self): def f(dtype): return self._construct(shape=3, dtype=dtype) - self.assertRaises(NotImplementedError, f, [("A","datetime64[h]"), ("B","str"), ("C","int32")]) + self.assertRaises(NotImplementedError, f, [("A", "datetime64[h]"), + ("B", "str"), + ("C", "int32")]) # these work (though results may be unexpected) f('int64') @@ -230,11 +232,11 @@ def f(dtype): def check_metadata(self, x, y=None): for m in x._metadata: - v = getattr(x,m,None) + v = getattr(x, m, None) if y is None: self.assertIsNone(v) else: - self.assertEqual(v, getattr(y,m,None)) + self.assertEqual(v, getattr(y, m, None)) def test_metadata_propagation(self): # check that the metadata matches up on the resulting ops @@ -250,28 +252,27 @@ def test_metadata_propagation(self): # this, though it actually does work) # can remove all of these try: except: blocks on the actual operations - # ---------- # preserving # ---------- # simple ops with scalars - for op in [ '__add__','__sub__','__truediv__','__mul__' ]: - result = getattr(o,op)(1) - self.check_metadata(o,result) + for op in ['__add__', '__sub__', '__truediv__', '__mul__']: + result = getattr(o, op)(1) + self.check_metadata(o, result) # ops with like - for op in [ '__add__','__sub__','__truediv__','__mul__' ]: + for op in ['__add__', '__sub__', '__truediv__', '__mul__']: try: - result = getattr(o,op)(o) - self.check_metadata(o,result) + result = getattr(o, op)(o) + self.check_metadata(o, result) except (ValueError, AttributeError): pass # simple boolean - for op in [ '__eq__','__le__', '__ge__' ]: - v1 = getattr(o,op)(o) - self.check_metadata(o,v1) + for op in ['__eq__', '__le__', '__ge__']: + v1 = getattr(o, op)(o) + self.check_metadata(o, v1) try: self.check_metadata(o, v1 & v1) @@ -286,7 +287,7 @@ def test_metadata_propagation(self): # combine_first try: result = o.combine_first(o2) - self.check_metadata(o,result) + self.check_metadata(o, result) except (AttributeError): pass @@ -302,12 +303,12 @@ def test_metadata_propagation(self): pass # simple boolean - for op in [ '__eq__','__le__', '__ge__' ]: + for op in ['__eq__', '__le__', '__ge__']: # this is a name matching op - v1 = getattr(o,op)(o) + v1 = getattr(o, op)(o) - v2 = getattr(o,op)(o2) + v2 = getattr(o, op)(o2) self.check_metadata(v2) try: @@ -326,17 +327,18 @@ def test_head_tail(self): o = self._construct(shape=10) # check all index types - for index in [ tm.makeFloatIndex, tm.makeIntIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: + for index in [tm.makeFloatIndex, tm.makeIntIndex, tm.makeStringIndex, + tm.makeUnicodeIndex, tm.makeDateIndex, + tm.makePeriodIndex]: axis = o._get_axis_name(0) - setattr(o,axis,index(len(getattr(o,axis)))) + setattr(o, axis, index(len(getattr(o, axis)))) # Panel + dims try: o.head() except (NotImplementedError): - raise nose.SkipTest('not implemented on {0}'.format(o.__class__.__name__)) + raise nose.SkipTest('not implemented on {0}'.format( + o.__class__.__name__)) self._compare(o.head(), o.iloc[:5]) self._compare(o.tail(), o.iloc[-5:]) @@ -346,8 +348,8 @@ def test_head_tail(self): self._compare(o.tail(0), o.iloc[0:0]) # bounded - self._compare(o.head(len(o)+1), o) - self._compare(o.tail(len(o)+1), o) + self._compare(o.head(len(o) + 1), o) + self._compare(o.tail(len(o) + 1), o) # neg index self._compare(o.head(-3), o.head(7)) @@ -362,18 +364,24 @@ def test_sample(self): # Check behavior of random_state argument ### - # Check for stability when receives seed or random state -- run 10 times. + # Check for stability when receives seed or random state -- run 10 + # times. for test in range(10): - seed = np.random.randint(0,100) - self._compare(o.sample(n=4, random_state=seed), o.sample(n=4, random_state=seed)) - self._compare(o.sample(frac=0.7,random_state=seed), o.sample(frac=0.7, random_state=seed)) - - self._compare(o.sample(n=4, random_state=np.random.RandomState(test)), - o.sample(n=4, random_state=np.random.RandomState(test))) - - self._compare(o.sample(frac=0.7,random_state=np.random.RandomState(test)), - o.sample(frac=0.7, random_state=np.random.RandomState(test))) - + seed = np.random.randint(0, 100) + self._compare( + o.sample(n=4, random_state=seed), o.sample(n=4, + random_state=seed)) + self._compare( + o.sample(frac=0.7, random_state=seed), o.sample( + frac=0.7, random_state=seed)) + + self._compare( + o.sample(n=4, random_state=np.random.RandomState(test)), + o.sample(n=4, random_state=np.random.RandomState(test))) + + self._compare( + o.sample(frac=0.7, random_state=np.random.RandomState(test)), + o.sample(frac=0.7, random_state=np.random.RandomState(test))) # Check for error when random_state argument invalid. with tm.assertRaises(ValueError): @@ -395,7 +403,7 @@ def test_sample(self): # Make sure float values of `n` give error with tm.assertRaises(ValueError): - o.sample(n= 3.2) + o.sample(n=3.2) # Check lengths are right self.assertTrue(len(o.sample(n=4) == 4)) @@ -408,51 +416,50 @@ def test_sample(self): # Weight length must be right with tm.assertRaises(ValueError): - o.sample(n=3, weights=[0,1]) + o.sample(n=3, weights=[0, 1]) with tm.assertRaises(ValueError): - bad_weights = [0.5]*11 + bad_weights = [0.5] * 11 o.sample(n=3, weights=bad_weights) with tm.assertRaises(ValueError): - bad_weight_series = Series([0,0,0.2]) + bad_weight_series = Series([0, 0, 0.2]) o.sample(n=4, weights=bad_weight_series) # Check won't accept negative weights with tm.assertRaises(ValueError): - bad_weights = [-0.1]*10 + bad_weights = [-0.1] * 10 o.sample(n=3, weights=bad_weights) # Check inf and -inf throw errors: with tm.assertRaises(ValueError): - weights_with_inf = [0.1]*10 + weights_with_inf = [0.1] * 10 weights_with_inf[0] = np.inf o.sample(n=3, weights=weights_with_inf) with tm.assertRaises(ValueError): - weights_with_ninf = [0.1]*10 - weights_with_ninf[0] = -np.inf + weights_with_ninf = [0.1] * 10 + weights_with_ninf[0] = -np.inf o.sample(n=3, weights=weights_with_ninf) # All zeros raises errors - zero_weights = [0]*10 + zero_weights = [0] * 10 with tm.assertRaises(ValueError): o.sample(n=3, weights=zero_weights) # All missing weights - nan_weights = [np.nan]*10 + nan_weights = [np.nan] * 10 with tm.assertRaises(ValueError): o.sample(n=3, weights=nan_weights) - # A few dataframe test with degenerate weights. - easy_weight_list = [0]*10 + easy_weight_list = [0] * 10 easy_weight_list[5] = 1 - df = pd.DataFrame({'col1':range(10,20), - 'col2':range(20,30), - 'colString': ['a']*10, - 'easyweights':easy_weight_list}) + df = pd.DataFrame({'col1': range(10, 20), + 'col2': range(20, 30), + 'colString': ['a'] * 10, + 'easyweights': easy_weight_list}) sample1 = df.sample(n=1, weights='easyweights') assert_frame_equal(sample1, df.iloc[5:6]) @@ -462,47 +469,52 @@ def test_sample(self): with tm.assertRaises(ValueError): s.sample(n=3, weights='weight_column') - panel = pd.Panel(items = [0,1,2], major_axis = [2,3,4], minor_axis = [3,4,5]) + panel = pd.Panel(items=[0, 1, 2], major_axis=[2, 3, 4], + minor_axis=[3, 4, 5]) with tm.assertRaises(ValueError): panel.sample(n=1, weights='weight_column') with tm.assertRaises(ValueError): - df.sample(n=1, weights='weight_column', axis = 1) + df.sample(n=1, weights='weight_column', axis=1) # Check weighting key error with tm.assertRaises(KeyError): df.sample(n=3, weights='not_a_real_column_name') - # Check np.nan are replaced by zeros. - weights_with_nan = [np.nan]*10 + # Check np.nan are replaced by zeros. + weights_with_nan = [np.nan] * 10 weights_with_nan[5] = 0.5 - self._compare(o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) + self._compare( + o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) # Check None are also replaced by zeros. - weights_with_None = [None]*10 + weights_with_None = [None] * 10 weights_with_None[5] = 0.5 - self._compare(o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) + self._compare( + o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) # Check that re-normalizes weights that don't sum to one. - weights_less_than_1 = [0]*10 + weights_less_than_1 = [0] * 10 weights_less_than_1[0] = 0.5 - tm.assert_frame_equal(df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) - + tm.assert_frame_equal( + df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) ### # Test axis argument ### # Test axis argument - df = pd.DataFrame({'col1':range(10), 'col2':['a']*10}) - second_column_weight = [0,1] - assert_frame_equal(df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']]) + df = pd.DataFrame({'col1': range(10), 'col2': ['a'] * 10}) + second_column_weight = [0, 1] + assert_frame_equal( + df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']]) # Different axis arg types - assert_frame_equal(df.sample(n=1, axis='columns', weights=second_column_weight), + assert_frame_equal(df.sample(n=1, axis='columns', + weights=second_column_weight), df[['col2']]) - weight = [0]*10 + weight = [0] * 10 weight[5] = 0.5 assert_frame_equal(df.sample(n=1, axis='rows', weights=weight), df.iloc[5:6]) @@ -522,56 +534,62 @@ def test_sample(self): # Test weight length compared to correct axis with tm.assertRaises(ValueError): - df.sample(n=1, axis=1, weights=[0.5]*10) + df.sample(n=1, axis=1, weights=[0.5] * 10) # Check weights with axis = 1 - easy_weight_list = [0]*3 + easy_weight_list = [0] * 3 easy_weight_list[2] = 1 - df = pd.DataFrame({'col1':range(10,20), - 'col2':range(20,30), - 'colString': ['a']*10}) + df = pd.DataFrame({'col1': range(10, 20), + 'col2': range(20, 30), + 'colString': ['a'] * 10}) sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) assert_frame_equal(sample1, df[['colString']]) # Test default axes - p = pd.Panel(items = ['a','b','c'], major_axis=[2,4,6], minor_axis=[1,3,5]) - assert_panel_equal(p.sample(n=3, random_state=42), p.sample(n=3, axis=1, random_state=42)) - assert_frame_equal(df.sample(n=3, random_state=42), df.sample(n=3, axis=0, random_state=42)) + p = pd.Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], + minor_axis=[1, 3, 5]) + assert_panel_equal( + p.sample(n=3, random_state=42), p.sample(n=3, axis=1, + random_state=42)) + assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, + random_state=42)) # Test that function aligns weights with frame - df = DataFrame({'col1':[5,6,7], 'col2':['a','b','c'], }, index = [9,5,3]) - s = Series([1,0,0], index=[3,5,9]) + df = DataFrame( + {'col1': [5, 6, 7], + 'col2': ['a', 'b', 'c'], }, index=[9, 5, 3]) + s = Series([1, 0, 0], index=[3, 5, 9]) assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) # Weights have index values to be dropped because not in # sampled DataFrame - s2 = Series([0.001,0,10000], index=[3,5,10]) + s2 = Series([0.001, 0, 10000], index=[3, 5, 10]) assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) # Weights have empty values to be filed with zeros - s3 = Series([0.01,0], index=[3,5]) + s3 = Series([0.01, 0], index=[3, 5]) assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) # No overlap in weight and sampled DataFrame indices - s4 = Series([1,0], index=[1,2]) + s4 = Series([1, 0], index=[1, 2]) with tm.assertRaises(ValueError): df.sample(1, weights=s4) - def test_size_compat(self): # GH8846 # size property should be defined o = self._construct(shape=10) self.assertTrue(o.size == np.prod(o.shape)) - self.assertTrue(o.size == 10**len(o.axes)) + self.assertTrue(o.size == 10 ** len(o.axes)) def test_split_compat(self): # xref GH8846 o = self._construct(shape=10) - self.assertTrue(len(np.array_split(o,5)) == 5) - self.assertTrue(len(np.array_split(o,2)) == 2) + self.assertTrue(len(np.array_split(o, 5)) == 5) + self.assertTrue(len(np.array_split(o, 2)) == 2) def test_unexpected_keyword(self): # GH8597 from pandas.util.testing import assertRaisesRegexp @@ -593,9 +611,10 @@ def test_unexpected_keyword(self): # GH8597 with assertRaisesRegexp(TypeError, 'unexpected keyword'): ts.fillna(0, in_place=True) + class TestSeries(tm.TestCase, Generic): _typ = Series - _comparator = lambda self, x, y: assert_series_equal(x,y) + _comparator = lambda self, x, y: assert_series_equal(x, y) def setUp(self): self.ts = tm.makeTimeSeries() # Was at top level in test_series @@ -605,9 +624,10 @@ def setUp(self): self.series.name = 'series' def test_rename_mi(self): - s = Series([11,21,31], - index=MultiIndex.from_tuples([("A",x) for x in ["a","B","c"]])) - result = s.rename(str.lower) + s = Series([11, 21, 31], + index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]])) + s.rename(str.lower) def test_get_numeric_data_preserve_dtype(self): @@ -629,9 +649,9 @@ def test_get_numeric_data_preserve_dtype(self): result = o._get_bool_data() self._compare(result, o) - o = Series(date_range('20130101',periods=3)) + o = Series(date_range('20130101', periods=3)) result = o._get_numeric_data() - expected = Series([],dtype='M8[ns]', index=pd.Index([], dtype=object)) + expected = Series([], dtype='M8[ns]', index=pd.Index([], dtype=object)) self._compare(result, expected) def test_nonzero_single_element(self): @@ -644,57 +664,59 @@ def test_nonzero_single_element(self): self.assertFalse(s.bool()) # single item nan to raise - for s in [ Series([np.nan]), Series([pd.NaT]), Series([True]), Series([False]) ]: - self.assertRaises(ValueError, lambda : bool(s)) + for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), + Series([False])]: + self.assertRaises(ValueError, lambda: bool(s)) - for s in [ Series([np.nan]), Series([pd.NaT])]: - self.assertRaises(ValueError, lambda : s.bool()) + for s in [Series([np.nan]), Series([pd.NaT])]: + self.assertRaises(ValueError, lambda: s.bool()) # multiple bool are still an error - for s in [Series([True,True]), Series([False, False])]: - self.assertRaises(ValueError, lambda : bool(s)) - self.assertRaises(ValueError, lambda : s.bool()) + for s in [Series([True, True]), Series([False, False])]: + self.assertRaises(ValueError, lambda: bool(s)) + self.assertRaises(ValueError, lambda: s.bool()) # single non-bool are an error - for s in [Series([1]), Series([0]), - Series(['a']), Series([0.0])]: - self.assertRaises(ValueError, lambda : bool(s)) - self.assertRaises(ValueError, lambda : s.bool()) + for s in [Series([1]), Series([0]), Series(['a']), Series([0.0])]: + self.assertRaises(ValueError, lambda: bool(s)) + self.assertRaises(ValueError, lambda: s.bool()) def test_metadata_propagation_indiv(self): # check that the metadata matches up on the resulting ops - o = Series(range(3),range(3)) + o = Series(range(3), range(3)) o.name = 'foo' - o2 = Series(range(3),range(3)) + o2 = Series(range(3), range(3)) o2.name = 'bar' result = o.T - self.check_metadata(o,result) + self.check_metadata(o, result) # resample ts = Series(np.random.rand(1000), - index=date_range('20130101',periods=1000,freq='s'), + index=date_range('20130101', periods=1000, freq='s'), name='foo') result = ts.resample('1T') - self.check_metadata(ts,result) + self.check_metadata(ts, result) - result = ts.resample('1T',how='min') - self.check_metadata(ts,result) + result = ts.resample('1T', how='min') + self.check_metadata(ts, result) - result = ts.resample('1T',how=lambda x: x.sum()) - self.check_metadata(ts,result) + result = ts.resample('1T', how=lambda x: x.sum()) + self.check_metadata(ts, result) _metadata = Series._metadata _finalize = Series.__finalize__ - Series._metadata = ['name','filename'] + Series._metadata = ['name', 'filename'] o.filename = 'foo' o2.filename = 'bar' def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == 'concat' and name == 'filename': - value = '+'.join([ getattr(o,name) for o in other.objs if getattr(o,name,None) ]) + value = '+'.join([getattr( + o, name) for o in other.objs if getattr(o, name, None) + ]) object.__setattr__(self, name, value) else: object.__setattr__(self, name, getattr(other, name, None)) @@ -704,7 +726,7 @@ def finalize(self, other, method=None, **kwargs): Series.__finalize__ = finalize result = pd.concat([o, o2]) - self.assertEqual(result.filename,'foo+bar') + self.assertEqual(result.filename, 'foo+bar') self.assertIsNone(result.name) # reset @@ -742,7 +764,8 @@ def test_interp_regression(self): ser = Series(np.sort(np.random.uniform(size=100))) # interpolate at new_index - new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])) + new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5, + 50.75])) interp_s = ser.reindex(new_index).interpolate(method='pchip') # does not blow up, GH5977 interp_s[49:51] @@ -772,8 +795,9 @@ def test_interpolate_index_values(self): expected = s.copy() bad = isnull(expected.values) good = ~bad - expected = Series( - np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]) + expected = Series(np.interp(vals[bad], vals[good], + s.values[good]), + index=s.index[bad]) assert_series_equal(result[bad], expected) @@ -867,24 +891,22 @@ def test_interp_limit_forward(self): # Provide 'forward' (the default) explicitly here. expected = Series([1., 3., 5., 7., np.nan, 11.]) - result = s.interpolate( - method='linear', limit=2, limit_direction='forward') + result = s.interpolate(method='linear', limit=2, + limit_direction='forward') assert_series_equal(result, expected) - result = s.interpolate( - method='linear', limit=2, limit_direction='FORWARD') + result = s.interpolate(method='linear', limit=2, + limit_direction='FORWARD') assert_series_equal(result, expected) def test_interp_limit_bad_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - self.assertRaises(ValueError, s.interpolate, - method='linear', limit=2, + self.assertRaises(ValueError, s.interpolate, method='linear', limit=2, limit_direction='abc') # raises an error even if no limit is specified. - self.assertRaises(ValueError, s.interpolate, - method='linear', + self.assertRaises(ValueError, s.interpolate, method='linear', limit_direction='abc') def test_interp_limit_direction(self): @@ -892,26 +914,27 @@ def test_interp_limit_direction(self): s = Series([1, 3, np.nan, np.nan, np.nan, 11]) expected = Series([1., 3., np.nan, 7., 9., 11.]) - result = s.interpolate( - method='linear', limit=2, limit_direction='backward') + result = s.interpolate(method='linear', limit=2, + limit_direction='backward') assert_series_equal(result, expected) expected = Series([1., 3., 5., np.nan, 9., 11.]) - result = s.interpolate( - method='linear', limit=1, limit_direction='both') + result = s.interpolate(method='linear', limit=1, + limit_direction='both') assert_series_equal(result, expected) # Check that this works on a longer series of nans. - s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan]) + s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, + np.nan]) expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.]) - result = s.interpolate( - method='linear', limit=2, limit_direction='both') + result = s.interpolate(method='linear', limit=2, + limit_direction='both') assert_series_equal(result, expected) expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.]) - result = s.interpolate( - method='linear', limit=1, limit_direction='both') + result = s.interpolate(method='linear', limit=1, + limit_direction='both') assert_series_equal(result, expected) def test_interp_limit_to_ends(self): @@ -919,13 +942,13 @@ def test_interp_limit_to_ends(self): s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) expected = Series([5., 5., 5., 7., 9., np.nan]) - result = s.interpolate( - method='linear', limit=2, limit_direction='backward') + result = s.interpolate(method='linear', limit=2, + limit_direction='backward') assert_series_equal(result, expected) expected = Series([5., 5., 5., 7., 9., 9.]) - result = s.interpolate( - method='linear', limit=2, limit_direction='both') + result = s.interpolate(method='linear', limit=2, + limit_direction='both') assert_series_equal(result, expected) def test_interp_limit_before_ends(self): @@ -933,18 +956,18 @@ def test_interp_limit_before_ends(self): s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) expected = Series([np.nan, np.nan, 5., 7., 7., np.nan]) - result = s.interpolate( - method='linear', limit=1, limit_direction='forward') + result = s.interpolate(method='linear', limit=1, + limit_direction='forward') assert_series_equal(result, expected) expected = Series([np.nan, 5., 5., 7., np.nan, np.nan]) - result = s.interpolate( - method='linear', limit=1, limit_direction='backward') + result = s.interpolate(method='linear', limit=1, + limit_direction='backward') assert_series_equal(result, expected) expected = Series([np.nan, 5., 5., 7., 7., np.nan]) - result = s.interpolate( - method='linear', limit=1, limit_direction='both') + result = s.interpolate(method='linear', limit=1, + limit_direction='both') assert_series_equal(result, expected) def test_interp_all_good(self): @@ -981,7 +1004,8 @@ def test_interp_datetime64(self): tm._skip_if_no_scipy() df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3)) result = df.interpolate(method='nearest') - expected = Series([1., 1., 3.], index=date_range('1/1/2000', periods=3)) + expected = Series([1., 1., 3.], + index=date_range('1/1/2000', periods=3)) assert_series_equal(result, expected) def test_interp_limit_no_nans(self): @@ -992,8 +1016,8 @@ def test_interp_limit_no_nans(self): assert_series_equal(result, expected) def test_describe(self): - _ = self.series.describe() - _ = self.ts.describe() + self.series.describe() + self.ts.describe() def test_describe_objects(self): s = Series(['a', 'b', 'b', np.nan, np.nan, np.nan, 'c', 'd', 'a', 'a']) @@ -1035,12 +1059,13 @@ def test_describe_none(self): class TestDataFrame(tm.TestCase, Generic): _typ = DataFrame - _comparator = lambda self, x, y: assert_frame_equal(x,y) + _comparator = lambda self, x, y: assert_frame_equal(x, y) def test_rename_mi(self): - df = DataFrame([11,21,31], - index=MultiIndex.from_tuples([("A",x) for x in ["a","B","c"]])) - result = df.rename(str.lower) + df = DataFrame([ + 11, 21, 31 + ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]])) + df.rename(str.lower) def test_nonzero_single_element(self): @@ -1052,8 +1077,8 @@ def test_nonzero_single_element(self): self.assertFalse(df.bool()) df = DataFrame([[False, False]]) - self.assertRaises(ValueError, lambda : df.bool()) - self.assertRaises(ValueError, lambda : bool(df)) + self.assertRaises(ValueError, lambda: df.bool()) + self.assertRaises(ValueError, lambda: bool(df)) def test_get_numeric_data_preserve_dtype(self): @@ -1064,28 +1089,36 @@ def test_get_numeric_data_preserve_dtype(self): self._compare(result, expected) def test_interp_basic(self): - df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [1, 4, 9, np.nan], - 'C': [1, 2, 3, 5], 'D': list('abcd')}) - expected = DataFrame({'A': [1., 2., 3., 4.], 'B': [1., 4., 9., 9.], - 'C': [1, 2, 3, 5], 'D': list('abcd')}) + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': [1, 4, 9, np.nan], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) + expected = DataFrame({'A': [1., 2., 3., 4.], + 'B': [1., 4., 9., 9.], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) result = df.interpolate() assert_frame_equal(result, expected) result = df.set_index('C').interpolate() expected = df.set_index('C') - expected.loc[3,'A'] = 3 - expected.loc[5,'B'] = 9 + expected.loc[3, 'A'] = 3 + expected.loc[5, 'B'] = 9 assert_frame_equal(result, expected) def test_interp_bad_method(self): - df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [1, 4, 9, np.nan], - 'C': [1, 2, 3, 5], 'D': list('abcd')}) + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': [1, 4, 9, np.nan], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) with tm.assertRaises(ValueError): df.interpolate(method='not_a_method') def test_interp_combo(self): - df = DataFrame({'A': [1., 2., np.nan, 4.], 'B': [1, 4, 9, np.nan], - 'C': [1, 2, 3, 5], 'D': list('abcd')}) + df = DataFrame({'A': [1., 2., np.nan, 4.], + 'B': [1, 4, 9, np.nan], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) result = df['A'].interpolate() expected = Series([1., 2., 3., 4.], name='A') @@ -1149,8 +1182,8 @@ def test_interp_alt_scipy(self): 'C': [1, 2, 3, 5, 8, 13, 21]}) result = df.interpolate(method='barycentric') expected = df.copy() - expected.ix[2,'A'] = 3 - expected.ix[5,'A'] = 6 + expected.ix[2, 'A'] = 3 + expected.ix[5, 'A'] = 6 assert_frame_equal(result, expected) result = df.interpolate(method='barycentric', downcast='infer') @@ -1163,8 +1196,8 @@ def test_interp_alt_scipy(self): _skip_if_no_pchip() result = df.interpolate(method='pchip') - expected.ix[2,'A'] = 3 - expected.ix[5,'A'] = 6.125 + expected.ix[2, 'A'] = 3 + expected.ix[5, 'A'] = 6.125 assert_frame_equal(result, expected) def test_interp_rowwise(self): @@ -1175,9 +1208,9 @@ def test_interp_rowwise(self): 4: [1, 2, 3, 4]}) result = df.interpolate(axis=1) expected = df.copy() - expected.loc[3,1] = 5 - expected.loc[0,2] = 3 - expected.loc[1,3] = 3 + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 expected[4] = expected[4].astype(np.float64) assert_frame_equal(result, expected) @@ -1208,8 +1241,10 @@ def test_interp_leading_nans(self): assert_frame_equal(result, expected) def test_interp_raise_on_only_mixed(self): - df = DataFrame({'A': [1, 2, np.nan, 4], 'B': ['a', 'b', 'c', 'd'], - 'C': [np.nan, 2, 5, 7], 'D': [np.nan, np.nan, 9, 9], + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': [np.nan, 2, 5, 7], + 'D': [np.nan, np.nan, 9, 9], 'E': [1, 2, 3, 4]}) with tm.assertRaises(TypeError): df.interpolate(axis=1) @@ -1227,7 +1262,8 @@ def test_interp_inplace(self): def test_interp_inplace_row(self): # GH 10395 - result = DataFrame({'a': [1.,2.,3.,4.], 'b': [np.nan, 2., 3., 4.], + result = DataFrame({'a': [1., 2., 3., 4.], + 'b': [np.nan, 2., 3., 4.], 'c': [3, 2, 2, 2]}) expected = result.interpolate(method='linear', axis=1, inplace=False) result.interpolate(method='linear', axis=1, inplace=True) @@ -1239,10 +1275,14 @@ def test_interp_ignore_all_good(self): 'B': [1, 2, 3, 4], 'C': [1., 2., np.nan, 4.], 'D': [1., 2., 3., 4.]}) - expected = DataFrame({'A': np.array([1, 2, 3, 4], dtype='float64'), - 'B': np.array([1, 2, 3, 4], dtype='int64'), - 'C': np.array([1., 2., 3, 4.], dtype='float64'), - 'D': np.array([1., 2., 3., 4.], dtype='float64')}) + expected = DataFrame({'A': np.array( + [1, 2, 3, 4], dtype='float64'), + 'B': np.array( + [1, 2, 3, 4], dtype='int64'), + 'C': np.array( + [1., 2., 3, 4.], dtype='float64'), + 'D': np.array( + [1., 2., 3., 4.], dtype='float64')}) result = df.interpolate(downcast=None) assert_frame_equal(result, expected) @@ -1252,9 +1292,9 @@ def test_interp_ignore_all_good(self): assert_frame_equal(result, df[['B', 'D']]) def test_describe(self): - desc = tm.makeDataFrame().describe() - desc = tm.makeMixedDataFrame().describe() - desc = tm.makeTimeDataFrame().describe() + tm.makeDataFrame().describe() + tm.makeMixedDataFrame().describe() + tm.makeTimeDataFrame().describe() def test_describe_percentiles_percent_or_raw(self): msg = 'percentiles should all be in the interval \\[0, 1\\]' @@ -1332,7 +1372,8 @@ def test_describe_objects(self): index=['count', 'unique', 'top', 'freq']) assert_frame_equal(result, expected) - df = DataFrame({"C1": pd.date_range('2010-01-01', periods=4, freq='D')}) + df = DataFrame({"C1": pd.date_range('2010-01-01', periods=4, freq='D') + }) df.loc[4] = pd.Timestamp('2010-01-04') result = df.describe() expected = DataFrame({"C1": [5, 4, pd.Timestamp('2010-01-04'), 2, @@ -1373,7 +1414,7 @@ def test_describe_typefiltering(self): 'ts': tm.makeTimeSeries()[:24].index}) descN = df.describe() - expected_cols = ['numC', 'numD',] + expected_cols = ['numC', 'numD', ] expected = DataFrame(dict((k, df[k].describe()) for k in expected_cols), columns=expected_cols) @@ -1384,7 +1425,7 @@ def test_describe_typefiltering(self): desc = df.describe(exclude=['object', 'datetime']) assert_frame_equal(desc, descN) desc = df.describe(include=['float']) - assert_frame_equal(desc, descN.drop('numC',1)) + assert_frame_equal(desc, descN.drop('numC', 1)) descC = df.describe(include=['O']) expected_cols = ['catA', 'catB'] @@ -1394,32 +1435,34 @@ def test_describe_typefiltering(self): assert_frame_equal(descC, expected) descD = df.describe(include=['datetime']) - assert_series_equal( descD.ts, df.ts.describe()) + assert_series_equal(descD.ts, df.ts.describe()) - desc = df.describe(include=['object','number', 'datetime']) - assert_frame_equal(desc.loc[:,["numC","numD"]].dropna(), descN) - assert_frame_equal(desc.loc[:,["catA","catB"]].dropna(), descC) - descDs = descD.sort_index() # the index order change for mixed-types - assert_frame_equal(desc.loc[:,"ts":].dropna().sort_index(), descDs) + desc = df.describe(include=['object', 'number', 'datetime']) + assert_frame_equal(desc.loc[:, ["numC", "numD"]].dropna(), descN) + assert_frame_equal(desc.loc[:, ["catA", "catB"]].dropna(), descC) + descDs = descD.sort_index() # the index order change for mixed-types + assert_frame_equal(desc.loc[:, "ts":].dropna().sort_index(), descDs) - desc = df.loc[:,'catA':'catB'].describe(include='all') + desc = df.loc[:, 'catA':'catB'].describe(include='all') assert_frame_equal(desc, descC) - desc = df.loc[:,'numC':'numD'].describe(include='all') + desc = df.loc[:, 'numC':'numD'].describe(include='all') assert_frame_equal(desc, descN) - desc = df.describe(percentiles = [], include='all') - cnt = Series(data=[4,4,6,6,6], index=['catA','catB','numC','numD','ts']) - assert_series_equal( desc.count(), cnt) + desc = df.describe(percentiles=[], include='all') + cnt = Series(data=[4, 4, 6, 6, 6], + index=['catA', 'catB', 'numC', 'numD', 'ts']) + assert_series_equal(desc.count(), cnt) self.assertTrue('count' in desc.index) self.assertTrue('unique' in desc.index) self.assertTrue('50%' in desc.index) self.assertTrue('first' in desc.index) - desc = df.drop("ts", 1).describe(percentiles = [], include='all') - assert_series_equal( desc.count(), cnt.drop("ts")) + desc = df.drop("ts", 1).describe(percentiles=[], include='all') + assert_series_equal(desc.count(), cnt.drop("ts")) self.assertTrue('first' not in desc.index) - desc = df.drop(["numC","numD"], 1).describe(percentiles = [], include='all') - assert_series_equal( desc.count(), cnt.drop(["numC","numD"])) + desc = df.drop(["numC", "numD"], 1).describe(percentiles=[], + include='all') + assert_series_equal(desc.count(), cnt.drop(["numC", "numD"])) self.assertTrue('50%' not in desc.index) def test_describe_typefiltering_category_bool(self): @@ -1446,8 +1489,9 @@ def test_describe_typefiltering_category_bool(self): assert_frame_equal(desc1, desc2) def test_describe_timedelta(self): - df = DataFrame({"td": pd.to_timedelta(np.arange(24)%20,"D")}) - self.assertTrue(df.describe().loc["mean"][0] == pd.to_timedelta("8d4h")) + df = DataFrame({"td": pd.to_timedelta(np.arange(24) % 20, "D")}) + self.assertTrue(df.describe().loc["mean"][0] == pd.to_timedelta( + "8d4h")) def test_describe_typefiltering_dupcol(self): df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, @@ -1462,33 +1506,37 @@ def test_describe_typefiltering_dupcol(self): def test_describe_typefiltering_groupby(self): df = DataFrame({'catA': ['foo', 'foo', 'bar'] * 8, - 'catB': ['a', 'b', 'c', 'd'] * 6, - 'numC': np.arange(24), - 'numD': np.arange(24.) + .5, - 'ts': tm.makeTimeSeries()[:24].index}) + 'catB': ['a', 'b', 'c', 'd'] * 6, + 'numC': np.arange(24), + 'numD': np.arange(24.) + .5, + 'ts': tm.makeTimeSeries()[:24].index}) G = df.groupby('catA') self.assertTrue(G.describe(include=['number']).shape == (16, 2)) - self.assertTrue(G.describe(include=['number', 'object']).shape == (22, 3)) + self.assertTrue(G.describe(include=['number', 'object']).shape == (22, + 3)) self.assertTrue(G.describe(include='all').shape == (26, 4)) def test_describe_multi_index_df_column_names(self): """ Test that column names persist after the describe operation.""" - df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) + df = pd.DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) # GH 11517 # test for hierarchical index hierarchical_index_df = df.groupby(['A', 'B']).mean().T self.assertTrue(hierarchical_index_df.columns.names == ['A', 'B']) - self.assertTrue(hierarchical_index_df.describe().columns.names == ['A', 'B']) + self.assertTrue(hierarchical_index_df.describe().columns.names == + ['A', 'B']) # test for non-hierarchical index non_hierarchical_index_df = df.groupby(['A']).mean().T self.assertTrue(non_hierarchical_index_df.columns.names == ['A']) - self.assertTrue(non_hierarchical_index_df.describe().columns.names == ['A']) + self.assertTrue(non_hierarchical_index_df.describe().columns.names == + ['A']) def test_no_order(self): tm._skip_if_no_scipy() @@ -1506,7 +1554,9 @@ def test_spline(self): assert_series_equal(result, expected) def test_spline_extrapolate(self): - tm.skip_if_no_package('scipy', '0.15', 'setting ext on scipy.interpolate.UnivariateSpline') + tm.skip_if_no_package( + 'scipy', '0.15', + 'setting ext on scipy.interpolate.UnivariateSpline') s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) result3 = s.interpolate(method='spline', order=1, ext=3) expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) @@ -1525,8 +1575,8 @@ def test_spline_smooth(self): def test_spline_interpolation(self): tm._skip_if_no_scipy() - s = Series(np.arange(10)**2) - s[np.random.randint(0,9,3)] = np.nan + s = Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan result1 = s.interpolate(method='spline', order=1) expected1 = s.interpolate(method='spline', order=1) assert_series_equal(result1, expected1) @@ -1535,8 +1585,8 @@ def test_spline_interpolation(self): def test_spline_error(self): tm._skip_if_no_scipy() - s = pd.Series(np.arange(10)**2) - s[np.random.randint(0,9,3)] = np.nan + s = pd.Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan with tm.assertRaises(ValueError): s.interpolate(method='spline') @@ -1546,20 +1596,19 @@ def test_spline_error(self): def test_metadata_propagation_indiv(self): # groupby - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) result = df.groupby('A').sum() - self.check_metadata(df,result) + self.check_metadata(df, result) # resample - df = DataFrame(np.random.randn(1000,2), - index=date_range('20130101',periods=1000,freq='s')) + df = DataFrame(np.random.randn(1000, 2), + index=date_range('20130101', periods=1000, freq='s')) result = df.resample('1T') - self.check_metadata(df,result) + self.check_metadata(df, result) # merging with override # GH 6923 @@ -1578,7 +1627,8 @@ def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == 'merge': left, right = other.left, other.right - value = getattr(left, name, '') + '|' + getattr(right, name, '') + value = getattr(left, name, '') + '|' + getattr(right, + name, '') object.__setattr__(self, name, value) else: object.__setattr__(self, name, getattr(other, name, '')) @@ -1587,7 +1637,7 @@ def finalize(self, other, method=None, **kwargs): DataFrame.__finalize__ = finalize result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner') - self.assertEqual(result.filename,'fname1.csv|fname2.csv') + self.assertEqual(result.filename, 'fname1.csv|fname2.csv') # concat # GH 6927 @@ -1598,7 +1648,9 @@ def finalize(self, other, method=None, **kwargs): def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == 'concat': - value = '+'.join([ getattr(o,name) for o in other.objs if getattr(o,name,None) ]) + value = '+'.join([getattr( + o, name) for o in other.objs if getattr(o, name, None) + ]) object.__setattr__(self, name, value) else: object.__setattr__(self, name, getattr(other, name, None)) @@ -1608,7 +1660,7 @@ def finalize(self, other, method=None, **kwargs): DataFrame.__finalize__ = finalize result = pd.concat([df1, df1]) - self.assertEqual(result.filename,'foo+foo') + self.assertEqual(result.filename, 'foo+foo') # reset DataFrame._metadata = _metadata @@ -1636,8 +1688,8 @@ def test_tz_convert_and_localize(self): for idx in [l0, l1]: - l0_expected = getattr(idx, fn)('US/Pacific') - l1_expected = getattr(idx, fn)('US/Pacific') + l0_expected = getattr(idx, fn)('US/Pacific') + l1_expected = getattr(idx, fn)('US/Pacific') df1 = DataFrame(np.ones(5), index=l0) df1 = getattr(df1, fn)('US/Pacific') @@ -1645,8 +1697,7 @@ def test_tz_convert_and_localize(self): # MultiIndex # GH7846 - df2 = DataFrame(np.ones(5), - MultiIndex.from_arrays([l0, l1])) + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) df3 = getattr(df2, fn)('US/Pacific', level=0) self.assertFalse(df3.index.levels[0].equals(l0)) @@ -1663,7 +1714,9 @@ def test_tz_convert_and_localize(self): df4 = DataFrame(np.ones(5), MultiIndex.from_arrays([int_idx, l0])) - df5 = getattr(df4, fn)('US/Pacific', level=1) + # TODO: untested + df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa + self.assertTrue(df3.index.levels[0].equals(l0)) self.assertFalse(df3.index.levels[0].equals(l0_expected)) self.assertTrue(df3.index.levels[1].equals(l1_expected)) @@ -1679,7 +1732,7 @@ def test_tz_convert_and_localize(self): # Not DatetimeIndex / PeriodIndex with tm.assertRaisesRegexp(TypeError, 'DatetimeIndex'): df = DataFrame(np.ones(5), - MultiIndex.from_arrays([int_idx, l0])) + MultiIndex.from_arrays([int_idx, l0])) df = getattr(df, fn)('US/Pacific', level=0) # Invalid level @@ -1690,7 +1743,7 @@ def test_tz_convert_and_localize(self): def test_set_attribute(self): # Test for consistent setattr behavior when an attribute and a column # have the same name (Issue #8994) - df = DataFrame({'x':[1, 2, 3]}) + df = DataFrame({'x': [1, 2, 3]}) df.y = 2 df['y'] = [2, 4, 6] @@ -1701,20 +1754,23 @@ def test_set_attribute(self): def test_pct_change(self): # GH 11150 - pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange(0, 40, 10)]).astype(np.float64) - pnl.iat[1,0] = np.nan - pnl.iat[1,1] = np.nan - pnl.iat[2,3] = 60 + pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( + 0, 40, 10)]).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 mask = pnl.isnull() for axis in range(2): - expected = pnl.ffill(axis=axis)/pnl.ffill(axis=axis).shift(axis=axis) - 1 + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( + axis=axis) - 1 expected[mask] = np.nan result = pnl.pct_change(axis=axis, fill_method='pad') self.assert_frame_equal(result, expected) + class TestPanel(tm.TestCase, Generic): _typ = Panel _comparator = lambda self, x, y: assert_panel_equal(x, y) @@ -1725,40 +1781,40 @@ class TestNDFrame(tm.TestCase): def test_squeeze(self): # noop - for s in [ tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries() ]: - tm.assert_series_equal(s.squeeze(),s) - for df in [ tm.makeTimeDataFrame() ]: - tm.assert_frame_equal(df.squeeze(),df) - for p in [ tm.makePanel() ]: - tm.assert_panel_equal(p.squeeze(),p) - for p4d in [ tm.makePanel4D() ]: - tm.assert_panel4d_equal(p4d.squeeze(),p4d) + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + tm.assert_series_equal(s.squeeze(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.squeeze(), df) + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.squeeze(), p) + for p4d in [tm.makePanel4D()]: + tm.assert_panel4d_equal(p4d.squeeze(), p4d) # squeezing df = tm.makeTimeDataFrame().reindex(columns=['A']) - tm.assert_series_equal(df.squeeze(),df['A']) + tm.assert_series_equal(df.squeeze(), df['A']) p = tm.makePanel().reindex(items=['ItemA']) - tm.assert_frame_equal(p.squeeze(),p['ItemA']) + tm.assert_frame_equal(p.squeeze(), p['ItemA']) - p = tm.makePanel().reindex(items=['ItemA'],minor_axis=['A']) - tm.assert_series_equal(p.squeeze(),p.ix['ItemA',:,'A']) + p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) + tm.assert_series_equal(p.squeeze(), p.ix['ItemA', :, 'A']) p4d = tm.makePanel4D().reindex(labels=['label1']) - tm.assert_panel_equal(p4d.squeeze(),p4d['label1']) + tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) - p4d = tm.makePanel4D().reindex(labels=['label1'],items=['ItemA']) - tm.assert_frame_equal(p4d.squeeze(),p4d.ix['label1','ItemA']) + p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) + tm.assert_frame_equal(p4d.squeeze(), p4d.ix['label1', 'ItemA']) # don't fail with 0 length dimensions GH11229 & GH8999 - empty_series=pd.Series([], name='five') - empty_frame=pd.DataFrame([empty_series]) - empty_panel=pd.Panel({'six':empty_frame}) + empty_series = pd.Series([], name='five') + empty_frame = pd.DataFrame([empty_series]) + empty_panel = pd.Panel({'six': empty_frame}) [tm.assert_series_equal(empty_series, higher_dim.squeeze()) for higher_dim in [empty_series, empty_frame, empty_panel]] - def test_equals(self): s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) s2 = s1.copy() @@ -1782,8 +1838,10 @@ def test_equals(self): # Add object dtype column with nans index = np.random.random(10) - df1 = DataFrame(np.random.random(10,), index=index, columns=['floats']) - df1['text'] = 'the sky is so blue. we could use more chocolate.'.split() + df1 = DataFrame( + np.random.random(10, ), index=index, columns=['floats']) + df1['text'] = 'the sky is so blue. we could use more chocolate.'.split( + ) df1['start'] = date_range('2000-1-1', periods=10, freq='T') df1['end'] = date_range('2000-1-1', periods=10, freq='D') df1['diff'] = df1['end'] - df1['start'] @@ -1874,10 +1932,10 @@ def test_pipe_tuple_error(self): df = DataFrame({"A": [1, 2, 3]}) f = lambda x, y: y with tm.assertRaises(ValueError): - result = df.pipe((f, 'y'), x=1, y=0) + df.pipe((f, 'y'), x=1, y=0) with tm.assertRaises(ValueError): - result = df.A.pipe((f, 'y'), x=1, y=0) + df.A.pipe((f, 'y'), x=1, y=0) def test_pipe_panel(self): wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 0fc5916676dd3..add7245561d3f 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -6,7 +6,6 @@ import os import string import warnings -from distutils.version import LooseVersion from datetime import datetime, date @@ -21,7 +20,6 @@ from pandas.util.testing import ensure_clean from pandas.core.config import set_option - import numpy as np from numpy import random from numpy.random import rand, randn @@ -29,8 +27,6 @@ from numpy.testing import assert_allclose from numpy.testing.decorators import slow import pandas.tools.plotting as plotting - - """ These tests are for ``Dataframe.plot`` and ``Series.plot``. Other plot methods such as ``.hist``, ``.boxplot`` and other miscellaneous @@ -40,17 +36,15 @@ def _skip_if_no_scipy_gaussian_kde(): try: - import scipy - from scipy.stats import gaussian_kde + from scipy.stats import gaussian_kde # noqa except ImportError: raise nose.SkipTest("scipy version doesn't support gaussian_kde") def _ok_for_gaussian_kde(kind): - if kind in ['kde','density']: + if kind in ['kde', 'density']: try: - import scipy - from scipy.stats import gaussian_kde + from scipy.stats import gaussian_kde # noqa except ImportError: return False return True @@ -91,7 +85,6 @@ def setUp(self): else: self.polycollection_factor = 1 - def tearDown(self): tm.close() @@ -115,7 +108,8 @@ def _check_legend_labels(self, axes, labels=None, visible=True): labels : list-like expected legend labels visible : bool - expected legend visibility. labels are checked only when visible is True + expected legend visibility. labels are checked only when visible is + True """ if visible and (labels is None): @@ -161,7 +155,8 @@ def _check_visible(self, collections, visible=True): expected visibility """ from matplotlib.collections import Collection - if not isinstance(collections, Collection) and not com.is_list_like(collections): + if not isinstance(collections, + Collection) and not com.is_list_like(collections): collections = [collections] for patch in collections: @@ -276,14 +271,17 @@ def _check_ticks_props(self, axes, xlabelsize=None, xrot=None, for ax in axes: if xlabelsize or xrot: if isinstance(ax.xaxis.get_minor_formatter(), NullFormatter): - # If minor ticks has NullFormatter, rot / fontsize are not retained + # If minor ticks has NullFormatter, rot / fontsize are not + # retained labels = ax.get_xticklabels() else: - labels = ax.get_xticklabels() + ax.get_xticklabels(minor=True) + labels = ax.get_xticklabels() + ax.get_xticklabels( + minor=True) for label in labels: if xlabelsize is not None: - self.assertAlmostEqual(label.get_fontsize(), xlabelsize) + self.assertAlmostEqual(label.get_fontsize(), + xlabelsize) if xrot is not None: self.assertAlmostEqual(label.get_rotation(), xrot) @@ -291,11 +289,13 @@ def _check_ticks_props(self, axes, xlabelsize=None, xrot=None, if isinstance(ax.yaxis.get_minor_formatter(), NullFormatter): labels = ax.get_yticklabels() else: - labels = ax.get_yticklabels() + ax.get_yticklabels(minor=True) + labels = ax.get_yticklabels() + ax.get_yticklabels( + minor=True) for label in labels: if ylabelsize is not None: - self.assertAlmostEqual(label.get_fontsize(), ylabelsize) + self.assertAlmostEqual(label.get_fontsize(), + ylabelsize) if yrot is not None: self.assertAlmostEqual(label.get_rotation(), yrot) @@ -316,7 +316,8 @@ def _check_ax_scales(self, axes, xaxis='linear', yaxis='linear'): self.assertEqual(ax.xaxis.get_scale(), xaxis) self.assertEqual(ax.yaxis.get_scale(), yaxis) - def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=(8.0, 6.0)): + def _check_axes_shape(self, axes, axes_num=None, layout=None, + figsize=(8.0, 6.0)): """ Check expected number of axes is drawn in expected layout @@ -324,7 +325,8 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=(8.0, 6.0) ---------- axes : matplotlib Axes object, or its list-like axes_num : number - expected number of axes. Unnecessary axes should be set to invisible. + expected number of axes. Unnecessary axes should be set to + invisible. layout : tuple expected layout, (expected number of rows , columns) figsize : tuple @@ -342,8 +344,9 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=(8.0, 6.0) result = self._get_axes_layout(plotting._flatten(axes)) self.assertEqual(result, layout) - self.assert_numpy_array_equal(np.round(visible_axes[0].figure.get_size_inches()), - np.array(figsize)) + self.assert_numpy_array_equal( + np.round(visible_axes[0].figure.get_size_inches()), + np.array(figsize)) def _get_axes_layout(self, axes): x_set = set() @@ -457,33 +460,39 @@ def _check_grid_settings(self, obj, kinds, kws={}): import matplotlib as mpl def is_grid_on(): - xoff = all(not g.gridOn for g in self.plt.gca().xaxis.get_major_ticks()) - yoff = all(not g.gridOn for g in self.plt.gca().yaxis.get_major_ticks()) - return not(xoff and yoff) + xoff = all(not g.gridOn + for g in self.plt.gca().xaxis.get_major_ticks()) + yoff = all(not g.gridOn + for g in self.plt.gca().yaxis.get_major_ticks()) + return not (xoff and yoff) - spndx=1 + spndx = 1 for kind in kinds: if not _ok_for_gaussian_kde(kind): continue - self.plt.subplot(1,4*len(kinds),spndx); spndx+=1 - mpl.rc('axes',grid=False) + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc('axes', grid=False) obj.plot(kind=kind, **kws) self.assertFalse(is_grid_on()) - self.plt.subplot(1,4*len(kinds),spndx); spndx+=1 - mpl.rc('axes',grid=True) + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc('axes', grid=True) obj.plot(kind=kind, grid=False, **kws) self.assertFalse(is_grid_on()) if kind != 'pie': - self.plt.subplot(1,4*len(kinds),spndx); spndx+=1 - mpl.rc('axes',grid=True) + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc('axes', grid=True) obj.plot(kind=kind, **kws) self.assertTrue(is_grid_on()) - self.plt.subplot(1,4*len(kinds),spndx); spndx+=1 - mpl.rc('axes',grid=False) + self.plt.subplot(1, 4 * len(kinds), spndx) + spndx += 1 + mpl.rc('axes', grid=False) obj.plot(kind=kind, grid=True, **kws) self.assertTrue(is_grid_on()) @@ -651,8 +660,10 @@ def test_line_area_nan_series(self): ax = _check_plot_works(d.plot) masked = ax.lines[0].get_ydata() # remove nan for comparison purpose - self.assert_numpy_array_equal(np.delete(masked.data, 2), np.array([1, 2, 3])) - self.assert_numpy_array_equal(masked.mask, np.array([False, False, True, False])) + self.assert_numpy_array_equal( + np.delete(masked.data, 2), np.array([1, 2, 3])) + self.assert_numpy_array_equal( + masked.mask, np.array([False, False, True, False])) expected = np.array([1, 2, 0, 3]) ax = _check_plot_works(d.plot, stacked=True) @@ -694,12 +705,14 @@ def test_bar_log(self): expected = np.hstack((1.0e-04, expected, 1.0e+01)) ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='bar') - tm.assert_numpy_array_equal(ax.get_ylim(), (0.001, 0.10000000000000001)) + tm.assert_numpy_array_equal(ax.get_ylim(), + (0.001, 0.10000000000000001)) tm.assert_numpy_array_equal(ax.yaxis.get_ticklocs(), expected) tm.close() ax = Series([0.1, 0.01, 0.001]).plot(log=True, kind='barh') - tm.assert_numpy_array_equal(ax.get_xlim(), (0.001, 0.10000000000000001)) + tm.assert_numpy_array_equal(ax.get_xlim(), + (0.001, 0.10000000000000001)) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), expected) @slow @@ -728,7 +741,8 @@ def test_irregular_datetime(self): @slow def test_pie_series(self): - # if sum of values is less than 1.0, pie handle them as rate and draw semicircle. + # if sum of values is less than 1.0, pie handle them as rate and draw + # semicircle. series = Series(np.random.randint(1, 5), index=['a', 'b', 'c', 'd', 'e'], name='YLABEL') ax = _check_plot_works(series.plot.pie) @@ -749,14 +763,16 @@ def test_pie_series(self): # with labels and colors labels = ['A', 'B', 'C', 'D', 'E'] color_args = ['r', 'g', 'b', 'c', 'm'] - ax = _check_plot_works(series.plot.pie, labels=labels, colors=color_args) + ax = _check_plot_works(series.plot.pie, labels=labels, + colors=color_args) self._check_text_labels(ax.texts, labels) self._check_colors(ax.patches, facecolors=color_args) # with autopct and fontsize ax = _check_plot_works(series.plot.pie, colors=color_args, autopct='%.2f', fontsize=7) - pcts = ['{0:.2f}'.format(s * 100) for s in series.values / float(series.sum())] + pcts = ['{0:.2f}'.format(s * 100) + for s in series.values / float(series.sum())] iters = [iter(series.index), iter(pcts)] expected_texts = list(next(it) for it in itertools.cycle(iters)) self._check_text_labels(ax.texts, expected_texts) @@ -769,8 +785,8 @@ def test_pie_series(self): series.plot.pie() # includes nan - series = Series([1, 2, np.nan, 4], - index=['a', 'b', 'c', 'd'], name='YLABEL') + series = Series([1, 2, np.nan, 4], index=['a', 'b', 'c', 'd'], + name='YLABEL') ax = _check_plot_works(series.plot.pie) self._check_text_labels(ax.texts, ['a', 'b', '', 'd']) @@ -791,12 +807,13 @@ def test_hist_df_kwargs(self): def test_hist_df_with_nonnumerics(self): # GH 9853 with tm.RNGContext(1): - df = DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) + df = DataFrame( + np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) df['E'] = ['x', 'y'] * 5 ax = df.plot.hist(bins=5) self.assertEqual(len(ax.patches), 20) - ax = df.plot.hist() # bins=10 + ax = df.plot.hist() # bins=10 self.assertEqual(len(ax.patches), 40) @slow @@ -804,8 +821,10 @@ def test_hist_legacy(self): _check_plot_works(self.ts.hist) _check_plot_works(self.ts.hist, grid=False) _check_plot_works(self.ts.hist, figsize=(8, 10)) - _check_plot_works(self.ts.hist, filterwarnings='ignore', by=self.ts.index.month) - _check_plot_works(self.ts.hist, filterwarnings='ignore', by=self.ts.index.month, bins=5) + _check_plot_works(self.ts.hist, filterwarnings='ignore', + by=self.ts.index.month) + _check_plot_works(self.ts.hist, filterwarnings='ignore', + by=self.ts.index.month, bins=5) fig, ax = self.plt.subplots(1, 1) _check_plot_works(self.ts.hist, ax=ax) @@ -868,7 +887,8 @@ def test_hist_layout_with_by(self): self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7)) - self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) + self._check_axes_shape(axes, axes_num=4, layout=(4, 2), + figsize=(12, 7)) @slow def test_hist_no_overlap(self): @@ -903,7 +923,8 @@ def test_hist_secondary_legend(self): df['b'].plot.hist(ax=ax, legend=True, secondary_y=True) # both legends are draw on left ax # left axis must be invisible, right axis must be visible - self._check_legend_labels(ax.left_ax, labels=['a (right)', 'b (right)']) + self._check_legend_labels(ax.left_ax, + labels=['a (right)', 'b (right)']) self.assertFalse(ax.left_ax.get_yaxis().get_visible()) self.assertTrue(ax.get_yaxis().get_visible()) tm.close() @@ -1010,9 +1031,12 @@ def test_kde_kwargs(self): tm._skip_if_no_scipy() _skip_if_no_scipy_gaussian_kde() from numpy import linspace - _check_plot_works(self.ts.plot.kde, bw_method=.5, ind=linspace(-100,100,20)) - _check_plot_works(self.ts.plot.density, bw_method=.5, ind=linspace(-100,100,20)) - ax = self.ts.plot.kde(logy=True, bw_method=.5, ind=linspace(-100,100,20)) + _check_plot_works(self.ts.plot.kde, bw_method=.5, + ind=linspace(-100, 100, 20)) + _check_plot_works(self.ts.plot.density, bw_method=.5, + ind=linspace(-100, 100, 20)) + ax = self.ts.plot.kde(logy=True, bw_method=.5, + ind=linspace(-100, 100, 20)) self._check_ax_scales(ax, yaxis='log') self._check_text_labels(ax.yaxis.get_label(), 'Density') @@ -1022,7 +1046,7 @@ def test_kde_missing_vals(self): _skip_if_no_scipy_gaussian_kde() s = Series(np.random.uniform(size=50)) s[0] = np.nan - ax = _check_plot_works(s.plot.kde) + _check_plot_works(s.plot.kde) @slow def test_hist_kwargs(self): @@ -1116,7 +1140,7 @@ def test_errorbar_plot(self): s = Series(np.arange(10), name='x') s_err = np.random.randn(10) - d_err = DataFrame(randn(10, 2), index=s.index, columns=['x', 'y']) + d_err = DataFrame(randn(10, 2), index=s.index, columns=['x', 'y']) # test line and bar plots kinds = ['line', 'bar'] for kind in kinds: @@ -1138,7 +1162,7 @@ def test_errorbar_plot(self): ix = date_range('1/1/2000', '1/1/2001', freq='M') ts = Series(np.arange(12), index=ix, name='x') ts_err = Series(np.random.randn(12), index=ix) - td_err = DataFrame(randn(12, 2), index=ix, columns=['x', 'y']) + td_err = DataFrame(randn(12, 2), index=ix, columns=['x', 'y']) ax = _check_plot_works(ts.plot, yerr=ts_err) self._check_has_errorbars(ax, xerr=0, yerr=1) @@ -1149,7 +1173,7 @@ def test_errorbar_plot(self): with tm.assertRaises(ValueError): s.plot(yerr=np.arange(11)) - s_err = ['zzz']*10 + s_err = ['zzz'] * 10 # in mpl 1.5+ this is a TypeError with tm.assertRaises((ValueError, TypeError)): s.plot(yerr=s_err) @@ -1161,8 +1185,9 @@ def test_table(self): @slow def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 - self._check_grid_settings(Series([1,2,3]), - plotting._series_kinds + plotting._common_kinds) + self._check_grid_settings(Series([1, 2, 3]), + plotting._series_kinds + + plotting._common_kinds) @slow def test_standard_colors(self): @@ -1241,13 +1266,14 @@ def test_time_series_plot_color_with_empty_kwargs(self): def test_xticklabels(self): # GH11529 s = Series(np.arange(10), index=['P%02d' % i for i in range(10)]) - ax = s.plot(xticks=[0,3,5,9]) - exp = ['P%02d' % i for i in [0,3,5,9]] + ax = s.plot(xticks=[0, 3, 5, 9]) + exp = ['P%02d' % i for i in [0, 3, 5, 9]] self._check_text_labels(ax.get_xticklabels(), exp) @tm.mplskip class TestDataFramePlots(TestPlotBase): + def setUp(self): TestPlotBase.setUp(self) import matplotlib as mpl @@ -1255,8 +1281,9 @@ def setUp(self): self.tdf = tm.makeTimeDataFrame() self.hexbin_df = DataFrame({"A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20)}) + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform( + size=20)}) from pandas import read_csv path = os.path.join(curpath(), 'data', 'iris.csv') @@ -1266,7 +1293,8 @@ def setUp(self): def test_plot(self): df = self.tdf _check_plot_works(df.plot, filterwarnings='ignore', grid=False) - axes = _check_plot_works(df.plot, filterwarnings='ignore', subplots=True) + axes = _check_plot_works(df.plot, filterwarnings='ignore', + subplots=True) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) axes = _check_plot_works(df.plot, filterwarnings='ignore', @@ -1290,16 +1318,19 @@ def test_plot(self): _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) - _check_plot_works(df.plot, filterwarnings='ignore', subplots=True, title='blah') - # We have to redo it here because _check_plot_works does two plots, once without an ax - # kwarg and once with an ax kwarg and the new sharex behaviour does not remove the - # visibility of the latter axis (as ax is present). - # see: https://github.com/pydata/pandas/issues/9737 + _check_plot_works(df.plot, filterwarnings='ignore', + subplots=True, title='blah') + + # We have to redo it here because _check_plot_works does two plots, + # once without an ax kwarg and once with an ax kwarg and the new sharex + # behaviour does not remove the visibility of the latter axis (as ax is + # present). see: https://github.com/pydata/pandas/issues/9737 + axes = df.plot(subplots=True, title='blah') self._check_axes_shape(axes, axes_num=3, layout=(3, 1)) - #axes[0].figure.savefig("test.png") + # axes[0].figure.savefig("test.png") for ax in axes[:2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.xaxis) # xaxis must be visible for grid self._check_visible(ax.get_xticklabels(), visible=False) self._check_visible(ax.get_xticklabels(minor=True), visible=False) self._check_visible([ax.xaxis.get_label()], visible=False) @@ -1326,8 +1357,8 @@ def test_plot(self): (u('\u03b4'), 6), (u('\u03b4'), 7)], names=['i0', 'i1']) columns = MultiIndex.from_tuples([('bar', u('\u0394')), - ('bar', u('\u0395'))], names=['c0', - 'c1']) + ('bar', u('\u0395'))], names=['c0', + 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) @@ -1339,8 +1370,7 @@ def test_plot(self): axes = _check_plot_works(df.plot.bar, subplots=True) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) - axes = _check_plot_works(df.plot.bar, subplots=True, - layout=(-1, 1)) + axes = _check_plot_works(df.plot.bar, subplots=True, layout=(-1, 1)) self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) # When ax is supplied and required number of axes is 1, # passed ax should be used: @@ -1357,7 +1387,7 @@ def test_color_and_style_arguments(self): df = DataFrame({'x': [1, 2], 'y': [3, 4]}) # passing both 'color' and 'style' arguments should be allowed # if there is no color symbol in the style strings: - ax = df.plot(color = ['red', 'black'], style = ['-', '--']) + ax = df.plot(color=['red', 'black'], style=['-', '--']) # check that the linestyles are correctly set: linestyle = [line.get_linestyle() for line in ax.lines] self.assertEqual(linestyle, ['-', '--']) @@ -1367,7 +1397,7 @@ def test_color_and_style_arguments(self): # passing both 'color' and 'style' arguments should not be allowed # if there is a color symbol in the style strings: with tm.assertRaises(ValueError): - df.plot(color = ['red', 'black'], style = ['k-', 'r--']) + df.plot(color=['red', 'black'], style=['k-', 'r--']) def test_nonnumeric_exclude(self): df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}) @@ -1392,34 +1422,31 @@ def test_donot_overwrite_index_name(self): def test_plot_xy(self): # columns.inferred_type == 'string' df = self.tdf - self._check_data(df.plot(x=0, y=1), - df.set_index('A')['B'].plot()) + self._check_data(df.plot(x=0, y=1), df.set_index('A')['B'].plot()) self._check_data(df.plot(x=0), df.set_index('A').plot()) self._check_data(df.plot(y=0), df.B.plot()) - self._check_data(df.plot(x='A', y='B'), - df.set_index('A').B.plot()) + self._check_data(df.plot(x='A', y='B'), df.set_index('A').B.plot()) self._check_data(df.plot(x='A'), df.set_index('A').plot()) self._check_data(df.plot(y='B'), df.B.plot()) # columns.inferred_type == 'integer' df.columns = lrange(1, len(df.columns) + 1) - self._check_data(df.plot(x=1, y=2), - df.set_index(1)[2].plot()) + self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) self._check_data(df.plot(x=1), df.set_index(1).plot()) self._check_data(df.plot(y=1), df[1].plot()) # figsize and title ax = df.plot(x=1, y=2, title='Test', figsize=(16, 8)) self._check_text_labels(ax.title, 'Test') - self._check_axes_shape(ax, axes_num=1, layout=(1, 1), figsize=(16., 8.)) + self._check_axes_shape(ax, axes_num=1, layout=(1, 1), + figsize=(16., 8.)) # columns.inferred_type == 'mixed' # TODO add MultiIndex test @slow def test_logscales(self): - df = DataFrame({'a': np.arange(100)}, - index=np.arange(100)) + df = DataFrame({'a': np.arange(100)}, index=np.arange(100)) ax = df.plot(logy=True) self._check_ax_scales(ax, yaxis='log') @@ -1477,8 +1504,8 @@ def test_period_compat(self): tm.close() def test_unsorted_index(self): - df = DataFrame({'y': np.arange(100)}, - index=np.arange(99, -1, -1), dtype=np.int64) + df = DataFrame({'y': np.arange(100)}, index=np.arange(99, -1, -1), + dtype=np.int64) ax = df.plot() l = ax.get_lines()[0] rs = l.get_xydata() @@ -1504,12 +1531,14 @@ def test_subplots(self): self.assertEqual(axes.shape, (3, )) for ax, column in zip(axes, df.columns): - self._check_legend_labels(ax, labels=[com.pprint_thing(column)]) + self._check_legend_labels(ax, + labels=[com.pprint_thing(column)]) for ax in axes[:-2]: - self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.xaxis) # xaxis must be visible for grid self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible( + ax.get_xticklabels(minor=True), visible=False) self._check_visible(ax.xaxis.get_label(), visible=False) self._check_visible(ax.get_yticklabels()) @@ -1542,9 +1571,10 @@ def test_subplots_timeseries(self): for ax in axes[:-2]: # GH 7801 - self._check_visible(ax.xaxis) # xaxis must be visible for grid + self._check_visible(ax.xaxis) # xaxis must be visible for grid self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible( + ax.get_xticklabels(minor=True), visible=False) self._check_visible(ax.xaxis.get_label(), visible=False) self._check_visible(ax.get_yticklabels()) @@ -1555,14 +1585,16 @@ def test_subplots_timeseries(self): self._check_visible(axes[-1].get_yticklabels()) self._check_ticks_props(axes, xrot=0) - axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, fontsize=7) + axes = df.plot(kind=kind, subplots=True, sharex=False, rot=45, + fontsize=7) for ax in axes: self._check_visible(ax.xaxis) self._check_visible(ax.get_xticklabels()) self._check_visible(ax.get_xticklabels(minor=True)) self._check_visible(ax.xaxis.get_label()) self._check_visible(ax.get_yticklabels()) - self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + self._check_ticks_props(ax, xlabelsize=7, xrot=45, + ylabelsize=7) @slow def test_subplots_layout(self): @@ -1632,12 +1664,14 @@ def test_subplots_multiple_axes(self): df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) - returned = df.plot(subplots=True, ax=axes[0], sharex=False, sharey=False) + returned = df.plot(subplots=True, ax=axes[0], sharex=False, + sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) self.assertEqual(returned.shape, (3, )) self.assertIs(returned[0].figure, fig) # draw on second row - returned = df.plot(subplots=True, ax=axes[1], sharex=False, sharey=False) + returned = df.plot(subplots=True, ax=axes[1], sharex=False, + sharey=False) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) self.assertEqual(returned.shape, (3, )) self.assertIs(returned[0].figure, fig) @@ -1687,23 +1721,25 @@ def test_subplots_ts_share_axes(self): # GH 3964 fig, axes = self.plt.subplots(3, 3, sharex=True, sharey=True) self.plt.subplots_adjust(left=0.05, right=0.95, hspace=0.3, wspace=0.3) - df = DataFrame(np.random.randn(10, 9), index=date_range(start='2014-07-01', freq='M', periods=10)) + df = DataFrame( + np.random.randn(10, 9), + index=date_range(start='2014-07-01', freq='M', periods=10)) for i, ax in enumerate(axes.ravel()): df[i].plot(ax=ax, fontsize=5) - #Rows other than bottom should not be visible + # Rows other than bottom should not be visible for ax in axes[0:-1].ravel(): self._check_visible(ax.get_xticklabels(), visible=False) - #Bottom row should be visible + # Bottom row should be visible for ax in axes[-1].ravel(): self._check_visible(ax.get_xticklabels(), visible=True) - #First column should be visible + # First column should be visible for ax in axes[[0, 1, 2], [0]].ravel(): self._check_visible(ax.get_yticklabels(), visible=True) - #Other columns should not be visible + # Other columns should not be visible for ax in axes[[0, 1, 2], [1]].ravel(): self._check_visible(ax.get_yticklabels(), visible=False) for ax in axes[[0, 1, 2], [2]].ravel(): @@ -1746,8 +1782,8 @@ def test_subplots_dup_columns(self): def test_negative_log(self): df = - DataFrame(rand(6, 4), - index=list(string.ascii_letters[:6]), - columns=['x', 'y', 'z', 'four']) + index=list(string.ascii_letters[:6]), + columns=['x', 'y', 'z', 'four']) with tm.assertRaises(ValueError): df.plot.area(logy=True) @@ -1757,20 +1793,22 @@ def test_negative_log(self): def _compare_stacked_y_cood(self, normal_lines, stacked_lines): base = np.zeros(len(normal_lines[0].get_data()[1])) for nl, sl in zip(normal_lines, stacked_lines): - base += nl.get_data()[1] # get y coodinates + base += nl.get_data()[1] # get y coodinates sy = sl.get_data()[1] self.assert_numpy_array_equal(base, sy) def test_line_area_stacked(self): with tm.RNGContext(42): - df = DataFrame(rand(6, 4), - columns=['w', 'x', 'y', 'z']) - neg_df = - df + df = DataFrame(rand(6, 4), columns=['w', 'x', 'y', 'z']) + neg_df = -df # each column has either positive or negative value - sep_df = DataFrame({'w': rand(6), 'x': rand(6), - 'y': - rand(6), 'z': - rand(6)}) + sep_df = DataFrame({'w': rand(6), + 'x': rand(6), + 'y': -rand(6), + 'z': -rand(6)}) # each column has positive-negative mixed value - mixed_df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), + mixed_df = DataFrame(randn(6, 4), + index=list(string.ascii_letters[:6]), columns=['w', 'x', 'y', 'z']) for kind in ['line', 'area']: @@ -1797,28 +1835,35 @@ def test_line_area_nan_df(self): values1 = [1, 2, np.nan, 3] values2 = [3, np.nan, 2, 1] df = DataFrame({'a': values1, 'b': values2}) - tdf = DataFrame({'a': values1, 'b': values2}, index=tm.makeDateIndex(k=4)) + tdf = DataFrame({'a': values1, + 'b': values2}, index=tm.makeDateIndex(k=4)) for d in [df, tdf]: ax = _check_plot_works(d.plot) masked1 = ax.lines[0].get_ydata() masked2 = ax.lines[1].get_ydata() # remove nan for comparison purpose - self.assert_numpy_array_equal(np.delete(masked1.data, 2), np.array([1, 2, 3])) - self.assert_numpy_array_equal(np.delete(masked2.data, 1), np.array([3, 2, 1])) - self.assert_numpy_array_equal(masked1.mask, np.array([False, False, True, False])) - self.assert_numpy_array_equal(masked2.mask, np.array([False, True, False, False])) + self.assert_numpy_array_equal( + np.delete(masked1.data, 2), np.array([1, 2, 3])) + self.assert_numpy_array_equal( + np.delete(masked2.data, 1), np.array([3, 2, 1])) + self.assert_numpy_array_equal( + masked1.mask, np.array([False, False, True, False])) + self.assert_numpy_array_equal( + masked2.mask, np.array([False, True, False, False])) expected1 = np.array([1, 2, 0, 3]) expected2 = np.array([3, 0, 2, 1]) ax = _check_plot_works(d.plot, stacked=True) self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + self.assert_numpy_array_equal(ax.lines[1].get_ydata(), + expected1 + expected2) ax = _check_plot_works(d.plot.area) self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) - self.assert_numpy_array_equal(ax.lines[1].get_ydata(), expected1 + expected2) + self.assert_numpy_array_equal(ax.lines[1].get_ydata(), + expected1 + expected2) ax = _check_plot_works(d.plot.area, stacked=False) self.assert_numpy_array_equal(ax.lines[0].get_ydata(), expected1) @@ -1849,10 +1894,9 @@ def test_line_lim(self): self.assertEqual(xmax, lines[0].get_data()[0][-1]) def test_area_lim(self): - df = DataFrame(rand(6, 4), - columns=['x', 'y', 'z', 'four']) + df = DataFrame(rand(6, 4), columns=['x', 'y', 'z', 'four']) - neg_df = - df + neg_df = -df for stacked in [True, False]: ax = _check_plot_works(df.plot.area, stacked=stacked) xmin, xmax = ax.get_xlim() @@ -1964,12 +2008,18 @@ def test_bar_barwidth(self): @slow def test_bar_barwidth_position(self): df = DataFrame(randn(5, 5)) - self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, position=0.2) - self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, position=0.2) - self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, position=0.2) - self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, position=0.2) - self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, position=0.2) - self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, position=0.2) + self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, + position=0.2) + self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, + position=0.2) + self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, + position=0.2) + self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, + position=0.2) + self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, + position=0.2) + self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, + position=0.2) @slow def test_bar_bottom_left(self): @@ -2002,7 +2052,8 @@ def test_bar_bottom_left(self): @slow def test_bar_nan(self): - df = DataFrame({'A': [10, np.nan, 20], 'B': [5, 10, 20], + df = DataFrame({'A': [10, np.nan, 20], + 'B': [5, 10, 20], 'C': [1, 2, 3]}) ax = df.plot.bar() expected = [10, 0, 20, 5, 10, 20, 1, 2, 3] @@ -2081,10 +2132,8 @@ def test_plot_scatter_with_c(self): # identical to the values we supplied, normally we'd be on shaky ground # comparing floats for equality but here we expect them to be # identical. - self.assertTrue( - np.array_equal( - ax.collections[0].get_facecolor(), - rgba_array)) + self.assertTrue(np.array_equal(ax.collections[0].get_facecolor(), + rgba_array)) # we don't test the colors of the faces in this next plot because they # are dependent on the spring colormap, which may change its colors # later. @@ -2134,12 +2183,11 @@ def test_plot_bar(self): self._check_ticks_props(ax, yrot=55, ylabelsize=11, xlabelsize=11) def _check_bar_alignment(self, df, kind='bar', stacked=False, - subplots=False, align='center', - width=0.5, position=0.5): + subplots=False, align='center', width=0.5, + position=0.5): axes = df.plot(kind=kind, stacked=stacked, subplots=subplots, - align=align, width=width, position=position, - grid=True) + align=align, width=width, position=position, grid=True) axes = self._flatten_visible(axes) @@ -2153,7 +2201,8 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, axis = ax.yaxis ax_min, ax_max = ax.get_ylim() min_edge = min([p.get_y() for p in ax.patches]) - max_edge = max([p.get_y() + p.get_height() for p in ax.patches]) + max_edge = max([p.get_y() + p.get_height() for p in ax.patches + ]) else: raise ValueError @@ -2173,7 +2222,8 @@ def _check_bar_alignment(self, df, kind='bar', stacked=False, center = p.get_y() + p.get_height() * position edge = p.get_y() elif kind == 'barh' and stacked is False: - center = p.get_y() + p.get_height() * len(df.columns) * position + center = p.get_y() + p.get_height() * len( + df.columns) * position edge = p.get_y() else: raise ValueError @@ -2232,25 +2282,25 @@ def test_bar_edge(self): df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) self._check_bar_alignment(df, kind='bar', stacked=True, align='edge') - self._check_bar_alignment(df, kind='bar', stacked=True, - width=0.9, align='edge') + self._check_bar_alignment(df, kind='bar', stacked=True, width=0.9, + align='edge') self._check_bar_alignment(df, kind='barh', stacked=True, align='edge') - self._check_bar_alignment(df, kind='barh', stacked=True, - width=0.9, align='edge') + self._check_bar_alignment(df, kind='barh', stacked=True, width=0.9, + align='edge') self._check_bar_alignment(df, kind='bar', stacked=False, align='edge') - self._check_bar_alignment(df, kind='bar', stacked=False, - width=0.9, align='edge') + self._check_bar_alignment(df, kind='bar', stacked=False, width=0.9, + align='edge') self._check_bar_alignment(df, kind='barh', stacked=False, align='edge') - self._check_bar_alignment(df, kind='barh', stacked=False, - width=0.9, align='edge') + self._check_bar_alignment(df, kind='barh', stacked=False, width=0.9, + align='edge') self._check_bar_alignment(df, kind='bar', subplots=True, align='edge') - self._check_bar_alignment(df, kind='bar', subplots=True, - width=0.9, align='edge') + self._check_bar_alignment(df, kind='bar', subplots=True, width=0.9, + align='edge') self._check_bar_alignment(df, kind='barh', subplots=True, align='edge') - self._check_bar_alignment(df, kind='barh', subplots=True, - width=0.9, align='edge') + self._check_bar_alignment(df, kind='barh', subplots=True, width=0.9, + align='edge') @slow def test_bar_log_no_subplots(self): @@ -2272,8 +2322,8 @@ def test_bar_log_subplots(self): if not self.mpl_le_1_2_1: expected = np.hstack((.1, expected, 1e4)) - ax = DataFrame([Series([200, 300]), - Series([300, 500])]).plot.bar(log=True, subplots=True) + ax = DataFrame([Series([200, 300]), Series([300, 500])]).plot.bar( + log=True, subplots=True) tm.assert_numpy_array_equal(ax[0].yaxis.get_ticklocs(), expected) tm.assert_numpy_array_equal(ax[1].yaxis.get_ticklocs(), expected) @@ -2288,14 +2338,12 @@ def test_boxplot(self): ax = _check_plot_works(df.plot.box) self._check_text_labels(ax.get_xticklabels(), labels) tm.assert_numpy_array_equal(ax.xaxis.get_ticklocs(), - np.arange(1, len(numeric_cols) + 1)) - self.assertEqual(len(ax.lines), - self.bp_n_objects * len(numeric_cols)) + np.arange(1, len(numeric_cols) + 1)) + self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) # different warning on py3 if not PY3: - axes = _check_plot_works(df.plot.box, - subplots=True, logy=True) + axes = _check_plot_works(df.plot.box, subplots=True, logy=True) self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) self._check_ax_scales(axes, yaxis='log') @@ -2329,8 +2377,8 @@ def test_boxplot_vertical(self): self._check_text_labels(ax.get_yticklabels(), labels) self.assertEqual(len(ax.lines), self.bp_n_objects * len(numeric_cols)) - axes = _check_plot_works(df.plot.box, filterwarnings='ignore', subplots=True, - vert=False, logx=True) + axes = _check_plot_works(df.plot.box, filterwarnings='ignore', + subplots=True, vert=False, logx=True) self._check_axes_shape(axes, axes_num=3, layout=(1, 3)) self._check_ax_scales(axes, xaxis='log') for ax, label in zip(axes, labels): @@ -2367,14 +2415,15 @@ def test_boxplot_subplots_return_type(self): # normal style: return_type=None result = df.plot.box(subplots=True) self.assertIsInstance(result, np.ndarray) - self._check_box_return_type(result, None, - expected_keys=['height', 'weight', 'category']) + self._check_box_return_type(result, None, expected_keys=[ + 'height', 'weight', 'category']) for t in ['dict', 'axes', 'both']: returned = df.plot.box(return_type=t, subplots=True) - self._check_box_return_type(returned, t, - expected_keys=['height', 'weight', 'category'], - check_ax_title=False) + self._check_box_return_type( + returned, t, + expected_keys=['height', 'weight', 'category'], + check_ax_title=False) @slow def test_kde_df(self): @@ -2389,7 +2438,8 @@ def test_kde_df(self): ax = df.plot(kind='kde', rot=20, fontsize=5) self._check_ticks_props(ax, xrot=20, xlabelsize=5, ylabelsize=5) - axes = _check_plot_works(df.plot, filterwarnings='ignore', kind='kde', subplots=True) + axes = _check_plot_works(df.plot, filterwarnings='ignore', kind='kde', + subplots=True) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) axes = df.plot(kind='kde', logy=True, subplots=True) @@ -2401,7 +2451,7 @@ def test_kde_missing_vals(self): _skip_if_no_scipy_gaussian_kde() df = DataFrame(np.random.uniform(size=(100, 4))) df.loc[0, 0] = np.nan - ax = _check_plot_works(df.plot, kind='kde') + _check_plot_works(df.plot, kind='kde') @slow def test_hist_df(self): @@ -2416,7 +2466,8 @@ def test_hist_df(self): expected = [com.pprint_thing(c) for c in df.columns] self._check_legend_labels(ax, labels=expected) - axes = _check_plot_works(df.plot.hist, filterwarnings='ignore', subplots=True, logy=True) + axes = _check_plot_works(df.plot.hist, filterwarnings='ignore', + subplots=True, logy=True) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) self._check_ax_scales(axes, yaxis='log') @@ -2464,7 +2515,7 @@ def test_hist_df_coord(self): np.array([8, 8, 8, 8, 8])), 'C': np.repeat(np.array([1, 2, 3, 4, 5]), np.array([6, 7, 8, 9, 10]))}, - columns=['A', 'B', 'C']) + columns=['A', 'B', 'C']) nan_df = DataFrame({'A': np.repeat(np.array([np.nan, 1, 2, 3, 4, 5]), np.array([3, 10, 9, 8, 7, 6])), @@ -2476,55 +2527,74 @@ def test_hist_df_coord(self): for df in [normal_df, nan_df]: ax = df.plot.hist(bins=5) - self._check_box_coord(ax.patches[:5], expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([10, 9, 8, 7, 6])) - self._check_box_coord(ax.patches[5:10], expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[5:10], + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([8, 8, 8, 8, 8])) - self._check_box_coord(ax.patches[10:], expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[10:], + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([6, 7, 8, 9, 10])) ax = df.plot.hist(bins=5, stacked=True) - self._check_box_coord(ax.patches[:5], expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[:5], + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([10, 9, 8, 7, 6])) - self._check_box_coord(ax.patches[5:10], expected_y=np.array([10, 9, 8, 7, 6]), + self._check_box_coord(ax.patches[5:10], + expected_y=np.array([10, 9, 8, 7, 6]), expected_h=np.array([8, 8, 8, 8, 8])) - self._check_box_coord(ax.patches[10:], expected_y=np.array([18, 17, 16, 15, 14]), + self._check_box_coord(ax.patches[10:], + expected_y=np.array([18, 17, 16, 15, 14]), expected_h=np.array([6, 7, 8, 9, 10])) axes = df.plot.hist(bins=5, stacked=True, subplots=True) - self._check_box_coord(axes[0].patches, expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(axes[0].patches, + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([10, 9, 8, 7, 6])) - self._check_box_coord(axes[1].patches, expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(axes[1].patches, + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([8, 8, 8, 8, 8])) - self._check_box_coord(axes[2].patches, expected_y=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(axes[2].patches, + expected_y=np.array([0, 0, 0, 0, 0]), expected_h=np.array([6, 7, 8, 9, 10])) if self.mpl_ge_1_3_1: # horizontal ax = df.plot.hist(bins=5, orientation='horizontal') - self._check_box_coord(ax.patches[:5], expected_x=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([10, 9, 8, 7, 6])) - self._check_box_coord(ax.patches[5:10], expected_x=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[5:10], + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([8, 8, 8, 8, 8])) - self._check_box_coord(ax.patches[10:], expected_x=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[10:], + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([6, 7, 8, 9, 10])) - ax = df.plot.hist(bins=5, stacked=True, orientation='horizontal') - self._check_box_coord(ax.patches[:5], expected_x=np.array([0, 0, 0, 0, 0]), + ax = df.plot.hist(bins=5, stacked=True, + orientation='horizontal') + self._check_box_coord(ax.patches[:5], + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([10, 9, 8, 7, 6])) - self._check_box_coord(ax.patches[5:10], expected_x=np.array([10, 9, 8, 7, 6]), + self._check_box_coord(ax.patches[5:10], + expected_x=np.array([10, 9, 8, 7, 6]), expected_w=np.array([8, 8, 8, 8, 8])) - self._check_box_coord(ax.patches[10:], expected_x=np.array([18, 17, 16, 15, 14]), - expected_w=np.array([6, 7, 8, 9, 10])) - - axes = df.plot.hist(bins=5, stacked=True, - subplots=True, orientation='horizontal') - self._check_box_coord(axes[0].patches, expected_x=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(ax.patches[10:], expected_x=np.array( + [18, 17, 16, 15, 14]), + expected_w=np.array([6, 7, 8, 9, 10])) + + axes = df.plot.hist(bins=5, stacked=True, subplots=True, + orientation='horizontal') + self._check_box_coord(axes[0].patches, + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([10, 9, 8, 7, 6])) - self._check_box_coord(axes[1].patches, expected_x=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(axes[1].patches, + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([8, 8, 8, 8, 8])) - self._check_box_coord(axes[2].patches, expected_x=np.array([0, 0, 0, 0, 0]), + self._check_box_coord(axes[2].patches, + expected_x=np.array([0, 0, 0, 0, 0]), expected_w=np.array([6, 7, 8, 9, 10])) @slow @@ -2554,7 +2624,8 @@ def test_df_legend_labels(self): self._check_legend_labels(ax, labels=df.columns.union(df3.columns)) ax = df4.plot(kind=kind, legend='reverse', ax=ax) - expected = list(df.columns.union(df3.columns)) + list(reversed(df4.columns)) + expected = list(df.columns.union(df3.columns)) + list(reversed( + df4.columns)) self._check_legend_labels(ax, labels=expected) # Secondary Y @@ -2563,7 +2634,8 @@ def test_df_legend_labels(self): ax = df2.plot(legend=False, ax=ax) self._check_legend_labels(ax, labels=['a', 'b (right)', 'c']) ax = df3.plot(kind='bar', legend=True, secondary_y='h', ax=ax) - self._check_legend_labels(ax, labels=['a', 'b (right)', 'c', 'g', 'h (right)', 'i']) + self._check_legend_labels( + ax, labels=['a', 'b (right)', 'c', 'g', 'h (right)', 'i']) # Time Series ind = date_range('1/1/2014', periods=3) @@ -2575,13 +2647,13 @@ def test_df_legend_labels(self): ax = df2.plot(legend=False, ax=ax) self._check_legend_labels(ax, labels=['a', 'b (right)', 'c']) ax = df3.plot(legend=True, ax=ax) - self._check_legend_labels(ax, labels=['a', 'b (right)', 'c', 'g', 'h', 'i']) + self._check_legend_labels( + ax, labels=['a', 'b (right)', 'c', 'g', 'h', 'i']) # scatter ax = df.plot.scatter(x='a', y='b', label='data1') self._check_legend_labels(ax, labels=['data1']) - ax = df2.plot.scatter(x='d', y='e', legend=False, - label='data2', ax=ax) + ax = df2.plot.scatter(x='d', y='e', legend=False, label='data2', ax=ax) self._check_legend_labels(ax, labels=['data1']) ax = df3.plot.scatter(x='g', y='h', label='data3', ax=ax) self._check_legend_labels(ax, labels=['data1', 'data3']) @@ -2596,9 +2668,8 @@ def test_df_legend_labels(self): self._check_legend_labels(ax, labels=['LABEL_b']) self._check_text_labels(ax.xaxis.get_label(), 'a') ax = df5.plot(y='c', label='LABEL_c', ax=ax) - self._check_legend_labels(ax, labels=['LABEL_b','LABEL_c']) - self.assertTrue(df5.columns.tolist() == ['b','c']) - + self._check_legend_labels(ax, labels=['LABEL_b', 'LABEL_c']) + self.assertTrue(df5.columns.tolist() == ['b', 'c']) def test_legend_name(self): multi = DataFrame(randn(4, 4), @@ -2642,10 +2713,10 @@ def test_style_by_column(self): fig = plt.gcf() df = DataFrame(randn(100, 3)) - for markers in [{0: '^', 1: '+', 2: 'o'}, - {0: '^', 1: '+'}, - ['^', '+', 'o'], - ['^', '+']]: + for markers in [{0: '^', + 1: '+', + 2: 'o'}, {0: '^', + 1: '+'}, ['^', '+', 'o'], ['^', '+']]: fig.clf() fig.add_subplot(111) ax = df.plot(style=markers) @@ -2659,8 +2730,7 @@ def test_line_label_none(self): self.assertEqual(ax.get_legend(), None) ax = s.plot(legend=True) - self.assertEqual(ax.get_legend().get_texts()[0].get_text(), - 'None') + self.assertEqual(ax.get_legend().get_texts()[0].get_text(), 'None') @slow def test_line_colors(self): @@ -2855,19 +2925,19 @@ def test_hist_colors(self): tm.close() custom_colors = 'rgcby' - ax = df.plot.hist( color=custom_colors) + ax = df.plot.hist(color=custom_colors) self._check_colors(ax.patches[::10], facecolors=custom_colors) tm.close() from matplotlib import cm # Test str -> colormap functionality - ax = df.plot.hist( colormap='jet') + ax = df.plot.hist(colormap='jet') rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) self._check_colors(ax.patches[::10], facecolors=rgba_colors) tm.close() # Test colormap functionality - ax = df.plot.hist( colormap=cm.jet) + ax = df.plot.hist(colormap=cm.jet) rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) self._check_colors(ax.patches[::10], facecolors=rgba_colors) tm.close() @@ -2944,7 +3014,8 @@ def test_kde_colors_and_styles_subplots(self): # make color a list if plotting one column frame # handles cases like df.plot(color='DodgerBlue') - axes = df.ix[:, [0]].plot(kind='kde', color='DodgerBlue', subplots=True) + axes = df.ix[:, [0]].plot(kind='kde', color='DodgerBlue', + subplots=True) self._check_colors(axes[0].lines, linecolors=['DodgerBlue']) # single character style @@ -2962,19 +3033,25 @@ def test_kde_colors_and_styles_subplots(self): @slow def test_boxplot_colors(self): - - def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', fliers_c='b'): - self._check_colors(bp['boxes'], linecolors=[box_c] * len(bp['boxes'])) - self._check_colors(bp['whiskers'], linecolors=[whiskers_c] * len(bp['whiskers'])) - self._check_colors(bp['medians'], linecolors=[medians_c] * len(bp['medians'])) - self._check_colors(bp['fliers'], linecolors=[fliers_c] * len(bp['fliers'])) - self._check_colors(bp['caps'], linecolors=[caps_c] * len(bp['caps'])) + def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', + fliers_c='b'): + self._check_colors(bp['boxes'], + linecolors=[box_c] * len(bp['boxes'])) + self._check_colors(bp['whiskers'], + linecolors=[whiskers_c] * len(bp['whiskers'])) + self._check_colors(bp['medians'], + linecolors=[medians_c] * len(bp['medians'])) + self._check_colors(bp['fliers'], + linecolors=[fliers_c] * len(bp['fliers'])) + self._check_colors(bp['caps'], + linecolors=[caps_c] * len(bp['caps'])) default_colors = self._maybe_unpack_cycler(self.plt.rcParams) df = DataFrame(randn(5, 5)) bp = df.plot.box(return_type='dict') - _check_colors(bp, default_colors[0], default_colors[0], default_colors[2]) + _check_colors(bp, default_colors[0], default_colors[0], + default_colors[2]) tm.close() dict_colors = dict(boxes='#572923', whiskers='#982042', @@ -3009,7 +3086,8 @@ def _check_colors(bp, box_c, whiskers_c, medians_c, caps_c='k', fliers_c='b'): # tuple is also applied to all artists except fliers bp = df.plot.box(color=(0, 1, 0), sym='#123456', return_type='dict') - _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), (0, 1, 0), '#123456') + _check_colors(bp, (0, 1, 0), (0, 1, 0), (0, 1, 0), + (0, 1, 0), '#123456') with tm.assertRaises(ValueError): # Color contains invalid key results in ValueError @@ -3096,7 +3174,8 @@ def test_hexbin_basic(self): # GH 6951 axes = df.plot.hexbin(x='A', y='B', subplots=True) - # hexbin should have 2 axes in the figure, 1 for plotting and another is colorbar + # hexbin should have 2 axes in the figure, 1 for plotting and another + # is colorbar self.assertEqual(len(axes[0].figure.axes), 2) # return value is single axes self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @@ -3138,8 +3217,7 @@ def test_allow_cmap(self): self.assertEqual(ax.collections[0].cmap.name, 'YlGn') with tm.assertRaises(TypeError): - df.plot.hexbin(x='A', y='B', cmap='YlGn', - colormap='BuGn') + df.plot.hexbin(x='A', y='B', cmap='YlGn', colormap='BuGn') @slow def test_pie_df(self): @@ -3154,7 +3232,8 @@ def test_pie_df(self): ax = _check_plot_works(df.plot.pie, y=2) self._check_text_labels(ax.texts, df.index) - axes = _check_plot_works(df.plot.pie, filterwarnings='ignore', subplots=True) + axes = _check_plot_works(df.plot.pie, filterwarnings='ignore', + subplots=True) self.assertEqual(len(axes), len(df.columns)) for ax in axes: self._check_text_labels(ax.texts, df.index) @@ -3163,8 +3242,9 @@ def test_pie_df(self): labels = ['A', 'B', 'C', 'D', 'E'] color_args = ['r', 'g', 'b', 'c', 'm'] - axes = _check_plot_works(df.plot.pie, filterwarnings='ignore', subplots=True, - labels=labels, colors=color_args) + axes = _check_plot_works(df.plot.pie, filterwarnings='ignore', + subplots=True, labels=labels, + colors=color_args) self.assertEqual(len(axes), len(df.columns)) for ax in axes: @@ -3189,13 +3269,13 @@ def test_pie_df_nan(self): # see https://github.com/pydata/pandas/issues/8390 self.assertEqual([x.get_text() for x in ax.get_legend().get_texts()], - base_expected[:i] + base_expected[i+1:]) + base_expected[:i] + base_expected[i + 1:]) @slow def test_errorbar_plot(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} df = DataFrame(d) - d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4} + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} df_err = DataFrame(d_err) # check line plots @@ -3212,23 +3292,27 @@ def test_errorbar_plot(self): self._check_has_errorbars(ax, xerr=0, yerr=2) ax = _check_plot_works(df.plot, yerr=d_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, kind=kind) + ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err, + kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - ax = _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], kind=kind) + ax = _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'], + kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind) self._check_has_errorbars(ax, xerr=2, yerr=2) - axes = _check_plot_works(df.plot, filterwarnings='ignore', yerr=df_err, - xerr=df_err, subplots=True, kind=kind) + axes = _check_plot_works(df.plot, filterwarnings='ignore', + yerr=df_err, xerr=df_err, subplots=True, + kind=kind) self._check_has_errorbars(axes, xerr=1, yerr=1) - ax = _check_plot_works((df+1).plot, yerr=df_err, xerr=df_err, kind='bar', log=True) + ax = _check_plot_works((df + 1).plot, yerr=df_err, + xerr=df_err, kind='bar', log=True) self._check_has_errorbars(ax, xerr=2, yerr=2) # yerr is raw error values - ax = _check_plot_works(df['y'].plot, yerr=np.ones(12)*0.4) + ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4) self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(df.plot, yerr=np.ones((2, 12))*0.4) + ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4) self._check_has_errorbars(ax, xerr=0, yerr=2) # yerr is iterator @@ -3239,7 +3323,7 @@ def test_errorbar_plot(self): # yerr is column name for yerr in ['yerr', u('誤差')]: s_df = df.copy() - s_df[yerr] = np.ones(12)*0.2 + s_df[yerr] = np.ones(12) * 0.2 ax = _check_plot_works(s_df.plot, yerr=yerr) self._check_has_errorbars(ax, xerr=0, yerr=2) ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr) @@ -3248,7 +3332,7 @@ def test_errorbar_plot(self): with tm.assertRaises(ValueError): df.plot(yerr=np.random.randn(11)) - df_err = DataFrame({'x': ['zzz']*12, 'y': ['zzz']*12}) + df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12}) with tm.assertRaises((ValueError, TypeError)): df.plot(yerr=df_err) @@ -3279,7 +3363,7 @@ def test_errorbar_with_partial_columns(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} df = DataFrame(d) - d_err = {'x': np.ones(12)*0.2, 'z': np.ones(12)*0.4} + d_err = {'x': np.ones(12) * 0.2, 'z': np.ones(12) * 0.4} df_err = DataFrame(d_err) for err in [d_err, df_err]: ax = _check_plot_works(df.plot, yerr=err) @@ -3289,7 +3373,7 @@ def test_errorbar_with_partial_columns(self): def test_errorbar_timeseries(self): d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)} - d_err = {'x': np.ones(12)*0.2, 'y': np.ones(12)*0.4} + d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4} # check time-series plots ix = date_range('1/1/2000', '1/1/2001', freq='M') @@ -3302,14 +3386,15 @@ def test_errorbar_timeseries(self): self._check_has_errorbars(ax, xerr=0, yerr=2) ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], kind=kind) + ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'], + kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=1) ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind) self._check_has_errorbars(ax, xerr=0, yerr=2) - axes = _check_plot_works(tdf.plot, filterwarnings='ignore', kind=kind, - yerr=tdf_err, subplots=True) + axes = _check_plot_works(tdf.plot, filterwarnings='ignore', + kind=kind, yerr=tdf_err, subplots=True) self._check_has_errorbars(axes, xerr=0, yerr=1) def test_errorbar_asymmetrical(self): @@ -3320,13 +3405,13 @@ def test_errorbar_asymmetrical(self): data = np.random.randn(5, 3) df = DataFrame(data) - ax = df.plot(yerr=err, xerr=err/2) + ax = df.plot(yerr=err, xerr=err / 2) - self.assertEqual(ax.lines[7].get_ydata()[0], data[0,1]-err[1,0,0]) - self.assertEqual(ax.lines[8].get_ydata()[0], data[0,1]+err[1,1,0]) + self.assertEqual(ax.lines[7].get_ydata()[0], data[0, 1] - err[1, 0, 0]) + self.assertEqual(ax.lines[8].get_ydata()[0], data[0, 1] + err[1, 1, 0]) - self.assertEqual(ax.lines[5].get_xdata()[0], -err[1,0,0]/2) - self.assertEqual(ax.lines[6].get_xdata()[0], err[1,1,0]/2) + self.assertEqual(ax.lines[5].get_xdata()[0], -err[1, 0, 0] / 2) + self.assertEqual(ax.lines[6].get_xdata()[0], err[1, 1, 0] / 2) with tm.assertRaises(ValueError): df.plot(yerr=err.T) @@ -3345,7 +3430,8 @@ def test_table(self): self.assertTrue(len(ax.tables) == 1) def test_errorbar_scatter(self): - df = DataFrame(np.random.randn(5, 2), index=range(5), columns=['x', 'y']) + df = DataFrame( + np.random.randn(5, 2), index=range(5), columns=['x', 'y']) df_err = DataFrame(np.random.randn(5, 2) / 5, index=range(5), columns=['x', 'y']) @@ -3356,16 +3442,18 @@ def test_errorbar_scatter(self): ax = _check_plot_works(df.plot.scatter, x='x', y='y', yerr=df_err) self._check_has_errorbars(ax, xerr=0, yerr=1) - ax = _check_plot_works(df.plot.scatter, x='x', y='y', - xerr=df_err, yerr=df_err) + ax = _check_plot_works(df.plot.scatter, x='x', y='y', xerr=df_err, + yerr=df_err) self._check_has_errorbars(ax, xerr=1, yerr=1) def _check_errorbar_color(containers, expected, has_err='has_xerr'): - errs = [c.lines[1][0] for c in ax.containers if getattr(c, has_err, False)] + errs = [c.lines[1][0] + for c in ax.containers if getattr(c, has_err, False)] self._check_colors(errs, linecolors=[expected] * len(errs)) # GH 8081 - df = DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e']) + df = DataFrame( + np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e']) ax = df.plot.scatter(x='a', y='b', xerr='d', yerr='e', c='red') self._check_has_errorbars(ax, xerr=1, yerr=1) _check_errorbar_color(ax.containers, 'red', has_err='has_xerr') @@ -3377,9 +3465,9 @@ def _check_errorbar_color(containers, expected, has_err='has_xerr'): @slow def test_sharex_and_ax(self): - # https://github.com/pydata/pandas/issues/9737 - # using gridspec, the axis in fig.get_axis() are sorted differently than pandas expected - # them, so make sure that only the right ones are removed + # https://github.com/pydata/pandas/issues/9737 using gridspec, the axis + # in fig.get_axis() are sorted differently than pandas expected them, + # so make sure that only the right ones are removed import matplotlib.pyplot as plt plt.close('all') gs, axes = _generate_4_axes_via_gridspec() @@ -3395,10 +3483,12 @@ def _check(axes): self._check_visible(ax.get_yticklabels(), visible=True) for ax in [axes[0], axes[2]]: self._check_visible(ax.get_xticklabels(), visible=False) - self._check_visible(ax.get_xticklabels(minor=True), visible=False) + self._check_visible( + ax.get_xticklabels(minor=True), visible=False) for ax in [axes[1], axes[3]]: self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) + self._check_visible( + ax.get_xticklabels(minor=True), visible=True) for ax in axes: df.plot(x="a", y="b", title="title", ax=ax, sharex=True) @@ -3427,9 +3517,9 @@ def _check(axes): @slow def test_sharey_and_ax(self): - # https://github.com/pydata/pandas/issues/9737 - # using gridspec, the axis in fig.get_axis() are sorted differently than pandas expected - # them, so make sure that only the right ones are removed + # https://github.com/pydata/pandas/issues/9737 using gridspec, the axis + # in fig.get_axis() are sorted differently than pandas expected them, + # so make sure that only the right ones are removed import matplotlib.pyplot as plt gs, axes = _generate_4_axes_via_gridspec() @@ -3443,7 +3533,8 @@ def _check(axes): for ax in axes: self.assertEqual(len(ax.lines), 1) self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) + self._check_visible( + ax.get_xticklabels(minor=True), visible=True) for ax in [axes[0], axes[1]]: self._check_visible(ax.get_yticklabels(), visible=True) for ax in [axes[2], axes[3]]: @@ -3586,7 +3677,8 @@ def _get_horizontal_grid(): for ax in [ax1, ax2]: self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) + self._check_visible( + ax.get_xticklabels(minor=True), visible=True) tm.close() # subplots=True @@ -3597,14 +3689,15 @@ def _get_horizontal_grid(): for ax in axes: self._check_visible(ax.get_yticklabels(), visible=True) self._check_visible(ax.get_xticklabels(), visible=True) - self._check_visible(ax.get_xticklabels(minor=True), visible=True) + self._check_visible( + ax.get_xticklabels(minor=True), visible=True) tm.close() # vertical / subplots / sharex=True / sharey=True ax1, ax2 = _get_vertical_grid() with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=[ax1, ax2], - sharex=True, sharey=True) + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, + sharey=True) self.assertEqual(len(axes[0].lines), 1) self.assertEqual(len(axes[1].lines), 1) for ax in [ax1, ax2]: @@ -3620,8 +3713,8 @@ def _get_horizontal_grid(): # horizontal / subplots / sharex=True / sharey=True ax1, ax2 = _get_horizontal_grid() with tm.assert_produces_warning(UserWarning): - axes = df.plot(subplots=True, ax=[ax1, ax2], - sharex=True, sharey=True) + axes = df.plot(subplots=True, ax=[ax1, ax2], sharex=True, + sharey=True) self.assertEqual(len(axes[0].lines), 1) self.assertEqual(len(axes[1].lines), 1) self._check_visible(axes[0].get_yticklabels(), visible=True) @@ -3635,7 +3728,7 @@ def _get_horizontal_grid(): # boxed def _get_boxed_grid(): - gs = gridspec.GridSpec(3,3) + gs = gridspec.GridSpec(3, 3) fig = plt.figure() ax1 = fig.add_subplot(gs[:2, :2]) ax2 = fig.add_subplot(gs[:2, 2]) @@ -3645,7 +3738,7 @@ def _get_boxed_grid(): axes = _get_boxed_grid() df = DataFrame(np.random.randn(10, 4), - index=ts.index, columns=list('ABCD')) + index=ts.index, columns=list('ABCD')) axes = df.plot(subplots=True, ax=axes) for ax in axes: self.assertEqual(len(ax.lines), 1) @@ -3661,14 +3754,14 @@ def _get_boxed_grid(): axes = df.plot(subplots=True, ax=axes, sharex=True, sharey=True) for ax in axes: self.assertEqual(len(ax.lines), 1) - for ax in [axes[0], axes[2]]: # left column + for ax in [axes[0], axes[2]]: # left column self._check_visible(ax.get_yticklabels(), visible=True) - for ax in [axes[1], axes[3]]: # right column + for ax in [axes[1], axes[3]]: # right column self._check_visible(ax.get_yticklabels(), visible=False) - for ax in [axes[0], axes[1]]: # top row + for ax in [axes[0], axes[1]]: # top row self._check_visible(ax.get_xticklabels(), visible=False) self._check_visible(ax.get_xticklabels(minor=True), visible=False) - for ax in [axes[2], axes[3]]: # bottom row + for ax in [axes[2], axes[3]]: # bottom row self._check_visible(ax.get_xticklabels(), visible=True) self._check_visible(ax.get_xticklabels(minor=True), visible=True) tm.close() @@ -3676,8 +3769,9 @@ def _get_boxed_grid(): @slow def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 - self._check_grid_settings(DataFrame({'a':[1,2,3],'b':[2,3,4]}), - plotting._dataframe_kinds, kws={'x':'a','y':'b'}) + self._check_grid_settings( + DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), + plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_option_mpl_style(self): set_option('display.mpl_style', 'default') @@ -3705,7 +3799,7 @@ def test_plain_axes(self): # a new ax is created for the colorbar -> also multiples axes (GH11520) df = DataFrame({'a': randn(8), 'b': randn(8)}) fig = self.plt.figure() - ax = fig.add_axes((0,0,1,1)) + ax = fig.add_axes((0, 0, 1, 1)) df.plot(kind='scatter', ax=ax, x='a', y='b', c='a', cmap='hsv') # other examples @@ -3726,19 +3820,22 @@ def test_passed_bar_colors(self): import matplotlib as mpl color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] colormap = mpl.colors.ListedColormap(color_tuples) - barplot = pd.DataFrame([[1,2,3]]).plot(kind="bar", cmap=colormap) - self.assertEqual(color_tuples, [c.get_facecolor() for c in barplot.patches]) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar", cmap=colormap) + self.assertEqual(color_tuples, [c.get_facecolor() + for c in barplot.patches]) def test_rcParams_bar_colors(self): import matplotlib as mpl color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - try: # mpl 1.5 - with mpl.rc_context(rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1,2,3]]).plot(kind="bar") - except (AttributeError, KeyError): # mpl 1.4 + try: # mpl 1.5 + with mpl.rc_context( + rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}): + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + except (AttributeError, KeyError): # mpl 1.4 with mpl.rc_context(rc={'axes.color_cycle': color_tuples}): - barplot = pd.DataFrame([[1,2,3]]).plot(kind="bar") - self.assertEqual(color_tuples, [c.get_facecolor() for c in barplot.patches]) + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + self.assertEqual(color_tuples, [c.get_facecolor() + for c in barplot.patches]) @tm.mplskip @@ -3755,15 +3852,15 @@ def test_series_groupby_plotting_nominally_works(self): tm.close() height.groupby(gender).hist() tm.close() - #Regression test for GH8733 + # Regression test for GH8733 height.groupby(gender).plot(alpha=0.5) tm.close() def test_plotting_with_float_index_works(self): # GH 7025 - df = DataFrame({'def': [1,1,1,2,2,2,3,3,3], + df = DataFrame({'def': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'val': np.random.randn(9)}, - index=[1.0,2.0,3.0,1.0,2.0,3.0,1.0,2.0,3.0]) + index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]) df.groupby('def')['val'].plot() tm.close() @@ -3773,7 +3870,9 @@ def test_plotting_with_float_index_works(self): def test_hist_single_row(self): # GH10214 bins = np.arange(80, 100 + 2, 1) - df = DataFrame({"Name": ["AAA", "BBB"], "ByCol": [1, 2], "Mark": [85, 89]}) + df = DataFrame({"Name": ["AAA", "BBB"], + "ByCol": [1, 2], + "Mark": [85, 89]}) df["Mark"].hist(by=df["ByCol"], bins=bins) df = DataFrame({"Name": ["AAA"], "ByCol": [1], "Mark": [85]}) df["Mark"].hist(by=df["ByCol"], bins=bins) @@ -3812,9 +3911,9 @@ def assert_is_valid_plot_return_object(objs): ''.format(el.__class__.__name__)) else: assert isinstance(objs, (plt.Artist, tuple, dict)), \ - ('objs is neither an ndarray of Artist instances nor a ' - 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' - ''.format(objs.__class__.__name__)) + ('objs is neither an ndarray of Artist instances nor a ' + 'single Artist instance, tuple, or dict, "objs" is a {0!r} ' + ''.format(objs.__class__.__name__)) def _check_plot_works(f, filterwarnings='always', **kwargs): @@ -3830,7 +3929,7 @@ def _check_plot_works(f, filterwarnings='always', **kwargs): plt.clf() - ax = kwargs.get('ax', fig.add_subplot(211)) + ax = kwargs.get('ax', fig.add_subplot(211)) # noqa ret = f(**kwargs) assert_is_valid_plot_return_object(ret) @@ -3850,16 +3949,17 @@ def _check_plot_works(f, filterwarnings='always', **kwargs): return ret + def _generate_4_axes_via_gridspec(): import matplotlib.pyplot as plt import matplotlib as mpl - import matplotlib.gridspec + import matplotlib.gridspec # noqa gs = mpl.gridspec.GridSpec(2, 2) - ax_tl = plt.subplot(gs[0,0]) - ax_ll = plt.subplot(gs[1,0]) - ax_tr = plt.subplot(gs[0,1]) - ax_lr = plt.subplot(gs[1,1]) + ax_tl = plt.subplot(gs[0, 0]) + ax_ll = plt.subplot(gs[1, 0]) + ax_tr = plt.subplot(gs[0, 1]) + ax_lr = plt.subplot(gs[1, 1]) return gs, [ax_tl, ax_ll, ax_tr, ax_lr] diff --git a/pandas/tests/test_graphics_others.py b/pandas/tests/test_graphics_others.py index 0fb1864f998b2..7301edcd52c3c 100644 --- a/pandas/tests/test_graphics_others.py +++ b/pandas/tests/test_graphics_others.py @@ -8,24 +8,14 @@ import warnings from distutils.version import LooseVersion -from datetime import datetime, date - -from pandas import (Series, DataFrame, MultiIndex, PeriodIndex, date_range, - bdate_range) -from pandas.compat import (range, lrange, StringIO, lmap, lzip, u, zip, - iteritems, OrderedDict, PY3) -from pandas.util.decorators import cache_readonly -import pandas.core.common as com +from pandas import Series, DataFrame, MultiIndex +from pandas.compat import range, lmap, lzip import pandas.util.testing as tm -from pandas.util.testing import ensure_clean -from pandas.core.config import set_option - import numpy as np from numpy import random -from numpy.random import rand, randn +from numpy.random import randn -from numpy.testing import assert_array_equal, assert_allclose from numpy.testing.decorators import slow import pandas.tools.plotting as plotting @@ -115,20 +105,25 @@ def test_hist_layout_with_by(self): axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1)) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - axes = _check_plot_works(df.height.hist, by=df.category, layout=(2, -1)) + axes = _check_plot_works( + df.height.hist, by=df.category, layout=(2, -1)) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) - axes = _check_plot_works(df.height.hist, by=df.category, layout=(3, -1)) + axes = _check_plot_works( + df.height.hist, by=df.category, layout=(3, -1)) self._check_axes_shape(axes, axes_num=4, layout=(3, 2)) - axes = _check_plot_works(df.height.hist, by=df.category, layout=(-1, 4)) + axes = _check_plot_works( + df.height.hist, by=df.category, layout=(-1, 4)) self._check_axes_shape(axes, axes_num=4, layout=(1, 4)) - axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2)) + axes = _check_plot_works( + df.height.hist, by=df.classroom, layout=(2, 2)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7)) - self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) + self._check_axes_shape( + axes, axes_num=4, layout=(4, 2), figsize=(12, 7)) @slow def test_hist_no_overlap(self): @@ -146,7 +141,7 @@ def test_hist_no_overlap(self): @slow def test_hist_by_no_extra_plots(self): df = self.hist_df - axes = df.height.hist(by=df.gender) + axes = df.height.hist(by=df.gender) # noqa self.assertEqual(len(self.plt.get_fignums()), 1) @slow @@ -188,9 +183,10 @@ def setUp(self): mpl.rcdefaults() self.tdf = tm.makeTimeDataFrame() - self.hexbin_df = DataFrame({"A": np.random.uniform(size=20), - "B": np.random.uniform(size=20), - "C": np.arange(20) + np.random.uniform(size=20)}) + self.hexbin_df = DataFrame({ + "A": np.random.uniform(size=20), + "B": np.random.uniform(size=20), + "C": np.arange(20) + np.random.uniform(size=20)}) from pandas import read_csv path = os.path.join(curpath(), 'data', 'iris.csv') @@ -205,7 +201,8 @@ def test_boxplot_legacy(self): df['indic2'] = ['foo', 'bar', 'foo'] * 2 _check_plot_works(df.boxplot, return_type='dict') - _check_plot_works(df.boxplot, column=['one', 'two'], return_type='dict') + _check_plot_works(df.boxplot, column=[ + 'one', 'two'], return_type='dict') _check_plot_works(df.boxplot, column=['one', 'two'], by='indic') _check_plot_works(df.boxplot, column='one', by=['indic', 'indic2']) _check_plot_works(df.boxplot, by='indic') @@ -231,10 +228,12 @@ def test_boxplot_legacy(self): # Multiple columns with an ax argument should use same figure fig, ax = self.plt.subplots() - axes = df.boxplot(column=['Col1', 'Col2'], by='X', ax=ax, return_type='axes') + axes = df.boxplot(column=['Col1', 'Col2'], + by='X', ax=ax, return_type='axes') self.assertIs(axes['Col1'].get_figure(), fig) - # When by is None, check that all relevant lines are present in the dict + # When by is None, check that all relevant lines are present in the + # dict fig, ax = self.plt.subplots() d = df.boxplot(ax=ax, return_type='dict') lines = list(itertools.chain.from_iterable(d.values())) @@ -243,7 +242,7 @@ def test_boxplot_legacy(self): @slow def test_boxplot_return_type_legacy(self): # API change in https://github.com/pydata/pandas/pull/7096 - import matplotlib as mpl + import matplotlib as mpl # noqa df = DataFrame(randn(6, 4), index=list(string.ascii_letters[:6]), @@ -426,21 +425,23 @@ def test_scatter_matrix_axis(self): with tm.RNGContext(42): df = DataFrame(randn(100, 3)) - axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, - range_padding=.1) + axes = _check_plot_works(scatter_matrix, filterwarnings='always', + frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() # GH 5662 expected = ['-2', '-1', '0', '1', '2'] self._check_text_labels(axes0_labels, expected) - self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + self._check_ticks_props( + axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) df[0] = ((df[0] - 2) / 3) - axes = _check_plot_works(scatter_matrix, filterwarnings='always', frame=df, - range_padding=.1) + axes = _check_plot_works(scatter_matrix, filterwarnings='always', + frame=df, range_padding=.1) axes0_labels = axes[0][0].yaxis.get_majorticklabels() expected = ['-1.2', '-1.0', '-0.8', '-0.6', '-0.4', '-0.2', '0.0'] self._check_text_labels(axes0_labels, expected) - self._check_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + self._check_ticks_props( + axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) @slow def test_andrews_curves(self): @@ -452,16 +453,22 @@ def test_andrews_curves(self): _check_plot_works(andrews_curves, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') - ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba) - self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) + ax = _check_plot_works(andrews_curves, frame=df, + class_column='Name', color=rgba) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] - ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames) - self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) + ax = _check_plot_works(andrews_curves, frame=df, + class_column='Name', color=cnames) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) - ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet) + ax = _check_plot_works(andrews_curves, frame=df, + class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) - self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) length = 10 df = DataFrame({"A": random.rand(length), @@ -472,16 +479,22 @@ def test_andrews_curves(self): _check_plot_works(andrews_curves, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') - ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=rgba) - self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) + ax = _check_plot_works(andrews_curves, frame=df, + class_column='Name', color=rgba) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] - ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', color=cnames) - self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) + ax = _check_plot_works(andrews_curves, frame=df, + class_column='Name', color=cnames) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) - ax = _check_plot_works(andrews_curves, frame=df, class_column='Name', colormap=cm.jet) + ax = _check_plot_works(andrews_curves, frame=df, + class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) - self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) colors = ['b', 'g', 'r'] df = DataFrame({"A": [1, 2, 3], @@ -502,23 +515,31 @@ def test_parallel_coordinates(self): df = self.iris - ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name') + ax = _check_plot_works(parallel_coordinates, + frame=df, class_column='Name') nlines = len(ax.get_lines()) nxticks = len(ax.xaxis.get_ticklabels()) rgba = ('#556270', '#4ECDC4', '#C7F464') - ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', color=rgba) - self._check_colors(ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) + ax = _check_plot_works(parallel_coordinates, + frame=df, class_column='Name', color=rgba) + self._check_colors( + ax.get_lines()[:10], linecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] - ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', color=cnames) - self._check_colors(ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) + ax = _check_plot_works(parallel_coordinates, + frame=df, class_column='Name', color=cnames) + self._check_colors( + ax.get_lines()[:10], linecolors=cnames, mapping=df['Name'][:10]) - ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', colormap=cm.jet) + ax = _check_plot_works(parallel_coordinates, + frame=df, class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) - self._check_colors(ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) + self._check_colors( + ax.get_lines()[:10], linecolors=cmaps, mapping=df['Name'][:10]) - ax = _check_plot_works(parallel_coordinates, frame=df, class_column='Name', axvlines=False) + ax = _check_plot_works(parallel_coordinates, + frame=df, class_column='Name', axvlines=False) assert len(ax.get_lines()) == (nlines - nxticks) colors = ['b', 'g', 'r'] @@ -544,17 +565,20 @@ def test_radviz(self): _check_plot_works(radviz, frame=df, class_column='Name') rgba = ('#556270', '#4ECDC4', '#C7F464') - ax = _check_plot_works(radviz, frame=df, class_column='Name', color=rgba) + ax = _check_plot_works( + radviz, frame=df, class_column='Name', color=rgba) # skip Circle drawn as ticks patches = [p for p in ax.patches[:20] if p.get_label() != ''] - self._check_colors(patches[:10], facecolors=rgba, mapping=df['Name'][:10]) + self._check_colors( + patches[:10], facecolors=rgba, mapping=df['Name'][:10]) cnames = ['dodgerblue', 'aquamarine', 'seagreen'] _check_plot_works(radviz, frame=df, class_column='Name', color=cnames) patches = [p for p in ax.patches[:20] if p.get_label() != ''] self._check_colors(patches, facecolors=cnames, mapping=df['Name'][:10]) - _check_plot_works(radviz, frame=df, class_column='Name', colormap=cm.jet) + _check_plot_works(radviz, frame=df, + class_column='Name', colormap=cm.jet) cmaps = lmap(cm.jet, np.linspace(0, 1, df['Name'].nunique())) patches = [p for p in ax.patches[:20] if p.get_label() != ''] self._check_colors(patches, facecolors=cmaps, mapping=df['Name'][:10]) @@ -656,7 +680,8 @@ def test_grouped_hist_legacy(self): xrot, yrot = 30, 40 axes = plotting.grouped_hist(df.A, by=df.C, normed=True, cumulative=True, bins=4, - xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot) + xlabelsize=xf, xrot=xrot, + ylabelsize=yf, yrot=yrot) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -700,13 +725,15 @@ def test_grouped_box_return_type(self): # old style: return_type=None result = df.boxplot(by='gender') self.assertIsInstance(result, np.ndarray) - self._check_box_return_type(result, None, - expected_keys=['height', 'weight', 'category']) + self._check_box_return_type( + result, None, + expected_keys=['height', 'weight', 'category']) # now for groupby with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = df.groupby('gender').boxplot() - self._check_box_return_type(result, 'dict', expected_keys=['Male', 'Female']) + self._check_box_return_type( + result, 'dict', expected_keys=['Male', 'Female']) columns2 = 'X B C D A G Y N Q O'.split() df2 = DataFrame(random.randn(50, 10), columns=columns2) @@ -715,11 +742,13 @@ def test_grouped_box_return_type(self): for t in ['dict', 'axes', 'both']: returned = df.groupby('classroom').boxplot(return_type=t) - self._check_box_return_type(returned, t, expected_keys=['A', 'B', 'C']) + self._check_box_return_type( + returned, t, expected_keys=['A', 'B', 'C']) returned = df.boxplot(by='classroom', return_type=t) - self._check_box_return_type(returned, t, - expected_keys=['height', 'weight', 'category']) + self._check_box_return_type( + returned, t, + expected_keys=['height', 'weight', 'category']) returned = df2.groupby('category').boxplot(return_type=t) self._check_box_return_type(returned, t, expected_keys=categories2) @@ -733,7 +762,8 @@ def test_grouped_box_layout(self): self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], by=df.gender, layout=(1, 1)) - self.assertRaises(ValueError, df.boxplot, column=['height', 'weight', 'category'], + self.assertRaises(ValueError, df.boxplot, + column=['height', 'weight', 'category'], layout=(2, 1), return_type='dict') self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], by=df.gender, layout=(-1, -1)) @@ -742,7 +772,8 @@ def test_grouped_box_layout(self): return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2)) - box = _check_plot_works(df.groupby('category').boxplot, column='height', + box = _check_plot_works(df.groupby('category').boxplot, + column='height', return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) @@ -766,10 +797,12 @@ def test_grouped_box_layout(self): column=['height', 'weight', 'category'], return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) - box = _check_plot_works(df.groupby('category').boxplot, column='height', + box = _check_plot_works(df.groupby('category').boxplot, + column='height', layout=(3, 2), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) - box = _check_plot_works(df.groupby('category').boxplot, column='height', + box = _check_plot_works(df.groupby('category').boxplot, + column='height', layout=(3, -1), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) @@ -786,7 +819,7 @@ def test_grouped_box_layout(self): return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4)) - box = df.groupby('classroom').boxplot( + box = df.groupby('classroom').boxplot( # noqa column=['height', 'weight', 'category'], layout=(1, -1), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) @@ -803,8 +836,10 @@ def test_grouped_box_multiple_axes(self): # which has earlier alphabetical order with tm.assert_produces_warning(UserWarning): fig, axes = self.plt.subplots(2, 2) - df.groupby('category').boxplot(column='height', return_type='axes', ax=axes) - self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) + df.groupby('category').boxplot( + column='height', return_type='axes', ax=axes) + self._check_axes_shape(self.plt.gcf().axes, + axes_num=4, layout=(2, 2)) fig, axes = self.plt.subplots(2, 3) with warnings.catch_warnings(): @@ -856,12 +891,15 @@ def test_grouped_hist_layout(self): axes = df.hist(column='height', by=df.category, layout=(-1, 1)) self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) - axes = df.hist(column='height', by=df.category, layout=(4, 2), figsize=(12, 8)) - self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8)) + axes = df.hist(column='height', by=df.category, + layout=(4, 2), figsize=(12, 8)) + self._check_axes_shape( + axes, axes_num=4, layout=(4, 2), figsize=(12, 8)) tm.close() # GH 6769 - axes = _check_plot_works(df.hist, column='height', by='classroom', layout=(2, 2)) + axes = _check_plot_works( + df.hist, column='height', by='classroom', layout=(2, 2)) self._check_axes_shape(axes, axes_num=3, layout=(2, 2)) # without column diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 5eb8606f4c30c..7e40885fdacb5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -7,20 +7,18 @@ from datetime import datetime from numpy import nan -from pandas import date_range,bdate_range, Timestamp -from pandas.core.index import Index, MultiIndex, Int64Index, CategoricalIndex +from pandas import date_range, bdate_range, Timestamp +from pandas.core.index import Index, MultiIndex, CategoricalIndex from pandas.core.api import Categorical, DataFrame -from pandas.core.groupby import (SpecificationError, DataError, - _nargsort, _lexsort_indexer) +from pandas.core.groupby import (SpecificationError, DataError, _nargsort, + _lexsort_indexer) from pandas.core.series import Series from pandas.core.config import option_context from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal, assert_index_equal, assertRaisesRegexp) -from pandas.compat import( - range, long, lrange, StringIO, lmap, lzip, map, - zip, builtins, OrderedDict, product as cart_product -) +from pandas.compat import (range, long, lrange, StringIO, lmap, lzip, map, zip, + builtins, OrderedDict, product as cart_product) from pandas import compat from pandas.core.panel import Panel from pandas.tools.merge import concat @@ -36,24 +34,6 @@ from numpy.testing import assert_equal -def commonSetUp(self): - self.dateRange = bdate_range('1/1/2005', periods=250) - self.stringIndex = Index([rands(8).upper() for x in range(250)]) - - self.groupId = Series([x[0] for x in self.stringIndex], - index=self.stringIndex) - self.groupDict = dict((k, v) for k, v in compat.iteritems(self.groupId)) - - self.columnIndex = Index(['A', 'B', 'C', 'D', 'E']) - - randMat = np.random.randn(250, 5) - self.stringMatrix = DataFrame(randMat, columns=self.columnIndex, - index=self.stringIndex) - - self.timeMatrix = DataFrame(randMat, columns=self.columnIndex, - index=self.dateRange) - - class TestGroupBy(tm.TestCase): _multiprocess_can_split_ = True @@ -66,44 +46,39 @@ def setUp(self): self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) - self.df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8)}) - - self.df_mixed_floats = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.array(np.random.randn(8), - dtype='float32')}) - - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], + self.df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + + self.df_mixed_floats = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.array( + np.random.randn(8), dtype='float32')}) + + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) - self.three_group = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) + self.three_group = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) def test_basic(self): - def checkit(dtype): data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) @@ -134,14 +109,9 @@ def checkit(dtype): # complex agg agged = grouped.aggregate([np.mean, np.std]) - agged = grouped.aggregate({'one': np.mean, - 'two': np.std}) - - group_constants = { - 0: 10, - 1: 20, - 2: 30 - } + agged = grouped.aggregate({'one': np.mean, 'two': np.std}) + + group_constants = {0: 10, 1: 20, 2: 30} agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) self.assertEqual(agged[1], 21) @@ -166,8 +136,8 @@ def test_first_last_nth(self): # tests for first / last / nth grouped = self.df.groupby('A') first = grouped.first() - expected = self.df.ix[[1, 0], ['B','C','D']] - expected.index = Index(['bar', 'foo'],name='A') + expected = self.df.ix[[1, 0], ['B', 'C', 'D']] + expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(first, expected) @@ -175,16 +145,16 @@ def test_first_last_nth(self): assert_frame_equal(nth, expected) last = grouped.last() - expected = self.df.ix[[5, 7], ['B','C','D']] - expected.index = Index(['bar', 'foo'],name='A') + expected = self.df.ix[[5, 7], ['B', 'C', 'D']] + expected.index = Index(['bar', 'foo'], name='A') assert_frame_equal(last, expected) nth = grouped.nth(-1) assert_frame_equal(nth, expected) nth = grouped.nth(1) - expected = self.df.ix[[2, 3],['B','C','D']].copy() - expected.index = Index(['foo', 'bar'],name='A') + expected = self.df.ix[[2, 3], ['B', 'C', 'D']].copy() + expected.index = Index(['foo', 'bar'], name='A') expected = expected.sort_index() assert_frame_equal(nth, expected) @@ -196,17 +166,18 @@ def test_first_last_nth(self): self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan self.assertTrue(com.isnull(grouped['B'].first()['foo'])) self.assertTrue(com.isnull(grouped['B'].last()['foo'])) - self.assertTrue(com.isnull(grouped['B'].nth(0)[0])) # not sure what this is testing + self.assertTrue(com.isnull(grouped['B'].nth(0)[0]) + ) # not sure what this is testing # v0.14.0 whatsnew df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) g = df.groupby('A') result = g.first() - expected = df.iloc[[1,2]].set_index('A') + expected = df.iloc[[1, 2]].set_index('A') assert_frame_equal(result, expected) - expected = df.iloc[[1,2]].set_index('A') - result = g.nth(0,dropna='any') + expected = df.iloc[[1, 2]].set_index('A') + result = g.nth(0, dropna='any') assert_frame_equal(result, expected) def test_first_last_nth_dtypes(self): @@ -230,7 +201,7 @@ def test_first_last_nth_dtypes(self): assert_frame_equal(last, expected) nth = grouped.nth(1) - expected = df.ix[[3, 2],['B', 'C', 'D', 'E', 'F']] + expected = df.ix[[3, 2], ['B', 'C', 'D', 'E', 'F']] expected.index = Index(['bar', 'foo'], name='A') expected = expected.sort_index() assert_frame_equal(nth, expected) @@ -249,13 +220,14 @@ def test_nth(self): assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) - assert_frame_equal(g.nth(2), df.loc[[],['B']]) + assert_frame_equal(g.nth(2), df.loc[[], ['B']]) assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) - assert_frame_equal(g.nth(-3), df.loc[[],['B']]) + assert_frame_equal(g.nth(-3), df.loc[[], ['B']]) assert_series_equal(g.B.nth(0), df.B.iloc[[0, 2]]) assert_series_equal(g.B.nth(1), df.B.iloc[[1]]) - assert_frame_equal(g[['B']].nth(0), df.ix[[0, 2], ['A', 'B']].set_index('A')) + assert_frame_equal(g[['B']].nth(0), + df.ix[[0, 2], ['A', 'B']].set_index('A')) exp = df.set_index('A') assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) @@ -267,22 +239,39 @@ def test_nth(self): # out of bounds, regression from 0.13.1 # GH 6621 - df = DataFrame({'color': {0: 'green', 1: 'green', 2: 'red', 3: 'red', 4: 'red'}, - 'food': {0: 'ham', 1: 'eggs', 2: 'eggs', 3: 'ham', 4: 'pork'}, - 'two': {0: 1.5456590000000001, 1: -0.070345000000000005, 2: -2.4004539999999999, 3: 0.46206000000000003, 4: 0.52350799999999997}, - 'one': {0: 0.56573799999999996, 1: -0.9742360000000001, 2: 1.033801, 3: -0.78543499999999999, 4: 0.70422799999999997}}).set_index(['color', 'food']) + df = DataFrame({'color': {0: 'green', + 1: 'green', + 2: 'red', + 3: 'red', + 4: 'red'}, + 'food': {0: 'ham', + 1: 'eggs', + 2: 'eggs', + 3: 'ham', + 4: 'pork'}, + 'two': {0: 1.5456590000000001, + 1: -0.070345000000000005, + 2: -2.4004539999999999, + 3: 0.46206000000000003, + 4: 0.52350799999999997}, + 'one': {0: 0.56573799999999996, + 1: -0.9742360000000001, + 2: 1.033801, + 3: -0.78543499999999999, + 4: 0.70422799999999997}}).set_index(['color', + 'food']) result = df.groupby(level=0).nth(2) expected = df.iloc[[-1]] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) result = df.groupby(level=0).nth(3) expected = df.loc[[]] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # GH 7559 # from the vbench - df = DataFrame(np.random.randint(1, 10, (100, 2)),dtype='int64') + df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') s = df[1] g = df[0] expected = s.groupby(g).first() @@ -292,15 +281,15 @@ def test_nth(self): self.assertEqual(expected.name, 1) # validate first - v = s[g==1].iloc[0] - self.assertEqual(expected.iloc[0],v) - self.assertEqual(expected2.iloc[0],v) + v = s[g == 1].iloc[0] + self.assertEqual(expected.iloc[0], v) + self.assertEqual(expected2.iloc[0], v) # this is NOT the same as .first (as sorted is default!) # as it keeps the order in the series (and not the group order) # related GH 7287 - expected = s.groupby(g,sort=False).first() - expected.index = pd.Index(range(1,10), name=0) + expected = s.groupby(g, sort=False).first() + expected.index = pd.Index(range(1, 10), name=0) result = s.groupby(g).nth(0, dropna='all') assert_series_equal(result, expected) @@ -309,7 +298,7 @@ def test_nth(self): g = df.groupby('A') result = g.B.nth(0, dropna=True) expected = g.B.first() - assert_series_equal(result,expected) + assert_series_equal(result, expected) # test multiple nth values df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], @@ -319,19 +308,25 @@ def test_nth(self): assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) - assert_frame_equal(g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) - assert_frame_equal(g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) - assert_frame_equal(g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) + assert_frame_equal( + g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) - assert_frame_equal(g.nth([3, 4]), df.loc[[],['B']]) + assert_frame_equal(g.nth([3, 4]), df.loc[[], ['B']]) - business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', freq='B') + business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', + freq='B') df = DataFrame(1, index=business_dates, columns=['a', 'b']) # get the first, fourth and last two business days for each month - result = df.groupby((df.index.year, df.index.month)).nth([0, 3, -2, -1]) - expected_dates = pd.to_datetime(['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', - '2014/5/1', '2014/5/6', '2014/5/29', '2014/5/30', - '2014/6/2', '2014/6/5', '2014/6/27', '2014/6/30']) + result = df.groupby((df.index.year, df.index.month)).nth([0, 3, -2, -1 + ]) + expected_dates = pd.to_datetime( + ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', + '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', + '2014/6/27', '2014/6/30']) expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) assert_frame_equal(result, expected) @@ -343,34 +338,32 @@ def test_nth_multi_index(self): expected = grouped.first() assert_frame_equal(result, expected) - def test_nth_multi_index_as_expected(self): # PR 9090, related to issue 8979 # test nth on MultiIndex - three_group = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny']}) + three_group = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny']}) grouped = three_group.groupby(['A', 'B']) result = grouped.nth(0) - expected = DataFrame({'C': ['dull', 'dull', 'dull', 'dull']}, - index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], ['one', 'two', 'one', 'two']], - names=['A', 'B'])) + expected = DataFrame( + {'C': ['dull', 'dull', 'dull', 'dull']}, + index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], + ['one', 'two', 'one', 'two']], + names=['A', 'B'])) assert_frame_equal(result, expected) - def test_grouper_index_types(self): # related GH5375 # groupby misbehaving when using a Floatlike index - df = DataFrame(np.arange(10).reshape(5,2),columns=list('AB')) - for index in [ tm.makeFloatIndex, tm.makeStringIndex, - tm.makeUnicodeIndex, tm.makeIntIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: + df = DataFrame(np.arange(10).reshape(5, 2), columns=list('AB')) + for index in [tm.makeFloatIndex, tm.makeStringIndex, + tm.makeUnicodeIndex, tm.makeIntIndex, tm.makeDateIndex, + tm.makePeriodIndex]: df.index = index(len(df)) df.groupby(list('abcde')).apply(lambda x: x) @@ -385,28 +378,29 @@ def test_grouper_multilevel_freq(self): from datetime import date, timedelta d0 = date.today() - timedelta(days=14) dates = date_range(d0, date.today()) - date_index = pd.MultiIndex.from_product([dates, dates], names=['foo', 'bar']) + date_index = pd.MultiIndex.from_product( + [dates, dates], names=['foo', 'bar']) df = pd.DataFrame(np.random.randint(0, 100, 225), index=date_index) # Check string level - expected = df.reset_index().groupby([pd.Grouper(key='foo', freq='W'), - pd.Grouper(key='bar', freq='W')]).sum() + expected = df.reset_index().groupby([pd.Grouper( + key='foo', freq='W'), pd.Grouper(key='bar', freq='W')]).sum() # reset index changes columns dtype to object expected.columns = pd.Index([0], dtype='int64') - result = df.groupby([pd.Grouper(level='foo', freq='W'), - pd.Grouper(level='bar', freq='W')]).sum() + result = df.groupby([pd.Grouper(level='foo', freq='W'), pd.Grouper( + level='bar', freq='W')]).sum() assert_frame_equal(result, expected) # Check integer level - result = df.groupby([pd.Grouper(level=0, freq='W'), - pd.Grouper(level=1, freq='W')]).sum() + result = df.groupby([pd.Grouper(level=0, freq='W'), pd.Grouper( + level=1, freq='W')]).sum() assert_frame_equal(result, expected) def test_grouper_creation_bug(self): # GH 8795 - df = DataFrame({'A':[0,0,1,1,2,2], 'B':[1,2,3,4,5,6]}) + df = DataFrame({'A': [0, 0, 1, 1, 2, 2], 'B': [1, 2, 3, 4, 5, 6]}) g = df.groupby('A') expected = g.sum() @@ -417,18 +411,19 @@ def test_grouper_creation_bug(self): result = g.apply(lambda x: x.sum()) assert_frame_equal(result, expected) - g = df.groupby(pd.Grouper(key='A',axis=0)) + g = df.groupby(pd.Grouper(key='A', axis=0)) result = g.sum() assert_frame_equal(result, expected) # GH8866 - s = Series(np.arange(8,dtype='int64'), - index=pd.MultiIndex.from_product([list('ab'), - range(2), - date_range('20130101',periods=2)], - names=['one','two','three'])) - result = s.groupby(pd.Grouper(level='three',freq='M')).sum() - expected = Series([28],index=Index([Timestamp('2013-01-31')],freq='M',name='three')) + s = Series(np.arange(8, dtype='int64'), + index=pd.MultiIndex.from_product( + [list('ab'), range(2), + date_range('20130101', periods=2)], + names=['one', 'two', 'three'])) + result = s.groupby(pd.Grouper(level='three', freq='M')).sum() + expected = Series([28], index=Index( + [Timestamp('2013-01-31')], freq='M', name='three')) assert_series_equal(result, expected) # just specifying a level breaks @@ -441,14 +436,16 @@ def test_grouper_getting_correct_binner(self): # GH 10063 # using a non-time-based grouper and a time-based grouper # and specifying levels - df = DataFrame({'A' : 1 }, - index=pd.MultiIndex.from_product([list('ab'), - date_range('20130101',periods=80)], - names=['one','two'])) - result = df.groupby([pd.Grouper(level='one'),pd.Grouper(level='two',freq='M')]).sum() - expected = DataFrame({'A' : [31,28,21,31,28,21]}, - index=MultiIndex.from_product([list('ab'),date_range('20130101',freq='M',periods=3)], - names=['one','two'])) + df = DataFrame({'A': 1}, index=pd.MultiIndex.from_product( + [list('ab'), date_range('20130101', periods=80)], names=['one', + 'two'])) + result = df.groupby([pd.Grouper(level='one'), pd.Grouper( + level='two', freq='M')]).sum() + expected = DataFrame({'A': [31, 28, 21, 31, 28, 21]}, + index=MultiIndex.from_product( + [list('ab'), + date_range('20130101', freq='M', periods=3)], + names=['one', 'two'])) assert_frame_equal(result, expected) def test_grouper_iter(self): @@ -467,8 +464,8 @@ def test_groupby_grouper(self): def test_groupby_duplicated_column_errormsg(self): # GH7511 - df = DataFrame(columns=['A','B','A','C'], \ - data=[range(4), range(2,6), range(0, 8, 2)]) + df = DataFrame(columns=['A', 'B', 'A', 'C'], + data=[range(4), range(2, 6), range(0, 8, 2)]) self.assertRaises(ValueError, df.groupby, 'A') self.assertRaises(ValueError, df.groupby, ['A', 'B']) @@ -500,9 +497,9 @@ def test_groupby_dict_mapping(self): def test_groupby_bounds_check(self): # groupby_X is code-generated, so if one variant # does, the rest probably do to - a = np.array([1,2],dtype='object') - b = np.array([1,2,3],dtype='object') - self.assertRaises(AssertionError, pd.algos.groupby_object,a, b) + a = np.array([1, 2], dtype='object') + b = np.array([1, 2, 3], dtype='object') + self.assertRaises(AssertionError, pd.algos.groupby_object, a, b) def test_groupby_grouper_f_sanity_checked(self): dates = date_range('01-Jan-2013', periods=12, freq='MS') @@ -517,7 +514,7 @@ def test_groupby_grouper_f_sanity_checked(self): # when the elements are Timestamp. # the result is Index[0:6], very confusing. - self.assertRaises(AssertionError, ts.groupby,lambda key: key[0:6]) + self.assertRaises(AssertionError, ts.groupby, lambda key: key[0:6]) def test_groupby_nonobject_dtype(self): key = self.mframe.index.labels[0] @@ -536,84 +533,101 @@ def max_value(group): applied = df.groupby('A').apply(max_value) result = applied.get_dtype_counts().sort_values() - expected = Series({ 'object' : 2, 'float64' : 2, 'int64' : 1 }).sort_values() - assert_series_equal(result,expected) + expected = Series({'object': 2, + 'float64': 2, + 'int64': 1}).sort_values() + assert_series_equal(result, expected) def test_groupby_return_type(self): # GH2893, return a reduced type - df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, - {"val1":2, "val2": 27}, {"val1":2, "val2": 12}]) + df1 = DataFrame([{"val1": 1, + "val2": 20}, {"val1": 1, + "val2": 19}, {"val1": 2, + "val2": 27}, {"val1": 2, + "val2": 12} + ]) def func(dataf): - return dataf["val2"] - dataf["val2"].mean() + return dataf["val2"] - dataf["val2"].mean() result = df1.groupby("val1", squeeze=True).apply(func) - tm.assertIsInstance(result,Series) + tm.assertIsInstance(result, Series) + + df2 = DataFrame([{"val1": 1, + "val2": 20}, {"val1": 1, + "val2": 19}, {"val1": 1, + "val2": 27}, {"val1": 1, + "val2": 12} + ]) - df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, - {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) def func(dataf): - return dataf["val2"] - dataf["val2"].mean() + return dataf["val2"] - dataf["val2"].mean() result = df2.groupby("val1", squeeze=True).apply(func) - tm.assertIsInstance(result,Series) + tm.assertIsInstance(result, Series) # GH3596, return a consistent type (regression in 0.11 from 0.10.1) - df = DataFrame([[1,1],[1,1]],columns=['X','Y']) - result = df.groupby('X',squeeze=False).count() - tm.assertIsInstance(result,DataFrame) + df = DataFrame([[1, 1], [1, 1]], columns=['X', 'Y']) + result = df.groupby('X', squeeze=False).count() + tm.assertIsInstance(result, DataFrame) # GH5592 # inconcistent return type - df = DataFrame(dict(A = [ 'Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', 'Pony', 'Pony' ], - B = Series(np.arange(7),dtype='int64'), - C = date_range('20130101',periods=7))) + df = DataFrame(dict(A=['Tiger', 'Tiger', 'Tiger', 'Lamb', 'Lamb', + 'Pony', 'Pony'], B=Series( + np.arange(7), dtype='int64'), C=date_range( + '20130101', periods=7))) def f(grp): return grp.iloc[0] + expected = df.groupby('A').first()[['B']] result = df.groupby('A').apply(f)[['B']] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def f(grp): if grp.name == 'Tiger': return None return grp.iloc[0] + result = df.groupby('A').apply(f)[['B']] e = expected.copy() e.loc['Tiger'] = np.nan - assert_frame_equal(result,e) + assert_frame_equal(result, e) def f(grp): if grp.name == 'Pony': return None return grp.iloc[0] + result = df.groupby('A').apply(f)[['B']] e = expected.copy() e.loc['Pony'] = np.nan - assert_frame_equal(result,e) + assert_frame_equal(result, e) # 5592 revisited, with datetimes def f(grp): if grp.name == 'Pony': return None return grp.iloc[0] + result = df.groupby('A').apply(f)[['C']] e = df.groupby('A').first()[['C']] e.loc['Pony'] = pd.NaT - assert_frame_equal(result,e) + assert_frame_equal(result, e) # scalar outputs def f(grp): if grp.name == 'Pony': return None return grp.iloc[0].loc['C'] + result = df.groupby('A').apply(f) e = df.groupby('A').first()['C'].copy() e.loc['Pony'] = np.nan e.name = None - assert_series_equal(result,e) + assert_series_equal(result, e) def test_agg_api(self): @@ -621,19 +635,19 @@ def test_agg_api(self): # http://stackoverflow.com/questions/21706030/pandas-groupby-agg-function-column-dtype-error # different api for agg when passed custom function with mixed frame - df = DataFrame({'data1':np.random.randn(5), - 'data2':np.random.randn(5), - 'key1':['a','a','b','b','a'], - 'key2':['one','two','one','two','one']}) + df = DataFrame({'data1': np.random.randn(5), + 'data2': np.random.randn(5), + 'key1': ['a', 'a', 'b', 'b', 'a'], + 'key2': ['one', 'two', 'one', 'two', 'one']}) grouped = df.groupby('key1') def peak_to_peak(arr): return arr.max() - arr.min() expected = grouped.agg([peak_to_peak]) - expected.columns=['data1','data2'] + expected.columns = ['data1', 'data2'] result = grouped.agg(peak_to_peak) - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) @@ -642,16 +656,14 @@ def test_agg_regression1(self): assert_frame_equal(result, expected) def test_agg_datetimes_mixed(self): - data = [[1, '2012-01-01', 1.0], - [2, '2012-01-02', 2.0], - [3, None, 3.0]] + data = [[1, '2012-01-01', 1.0], [2, '2012-01-02', 2.0], [3, None, 3.0]] df1 = DataFrame({'key': [x[0] for x in data], 'date': [x[1] for x in data], 'value': [x[2] for x in data]}) - data = [[row[0], datetime.strptime(row[1], '%Y-%m-%d').date() - if row[1] else None, row[2]] for row in data] + data = [[row[0], datetime.strptime(row[1], '%Y-%m-%d').date() if row[1] + else None, row[2]] for row in data] df2 = DataFrame({'key': [x[0] for x in data], 'date': [x[1] for x in data], @@ -663,7 +675,7 @@ def test_agg_datetimes_mixed(self): df2['weights'] = df1['value'] / df1['value'].sum() gb2 = df2.groupby('date').aggregate(np.sum) - assert(len(gb1) == len(gb2)) + assert (len(gb1) == len(gb2)) def test_agg_period_index(self): from pandas import period_range, PeriodIndex @@ -676,7 +688,7 @@ def test_agg_period_index(self): index = period_range(start='1999-01', periods=5, freq='M') s1 = Series(np.random.rand(len(index)), index=index) s2 = Series(np.random.rand(len(index)), index=index) - series = [('s1', s1), ('s2',s2)] + series = [('s1', s1), ('s2', s2)] df = DataFrame.from_items(series) grouped = df.groupby(df.index.month) list(grouped) @@ -687,7 +699,9 @@ def test_agg_must_agg(self): self.assertRaises(Exception, grouped.agg, lambda x: x.index[:2]) def test_agg_ser_multi_key(self): - ser = self.df.C + # TODO(wesm): unused + ser = self.df.C # noqa + f = lambda x: x.sum() results = self.df.C.groupby([self.df.A, self.df.B]).aggregate(f) expected = self.df.groupby(['A', 'B']).sum()['C'] @@ -703,48 +717,49 @@ def test_get_group(self): # GH 5267 # be datelike friendly - df = DataFrame({'DATE' : pd.to_datetime(['10-Oct-2013', '10-Oct-2013', '10-Oct-2013', - '11-Oct-2013', '11-Oct-2013', '11-Oct-2013']), - 'label' : ['foo','foo','bar','foo','foo','bar'], - 'VAL' : [1,2,3,4,5,6]}) + df = DataFrame({'DATE': pd.to_datetime( + ['10-Oct-2013', '10-Oct-2013', '10-Oct-2013', '11-Oct-2013', + '11-Oct-2013', '11-Oct-2013']), + 'label': ['foo', 'foo', 'bar', 'foo', 'foo', 'bar'], + 'VAL': [1, 2, 3, 4, 5, 6]}) g = df.groupby('DATE') key = list(g.groups)[0] result1 = g.get_group(key) result2 = g.get_group(Timestamp(key).to_datetime()) result3 = g.get_group(str(Timestamp(key))) - assert_frame_equal(result1,result2) - assert_frame_equal(result1,result3) + assert_frame_equal(result1, result2) + assert_frame_equal(result1, result3) - g = df.groupby(['DATE','label']) + g = df.groupby(['DATE', 'label']) key = list(g.groups)[0] result1 = g.get_group(key) - result2 = g.get_group((Timestamp(key[0]).to_datetime(),key[1])) - result3 = g.get_group((str(Timestamp(key[0])),key[1])) - assert_frame_equal(result1,result2) - assert_frame_equal(result1,result3) + result2 = g.get_group((Timestamp(key[0]).to_datetime(), key[1])) + result3 = g.get_group((str(Timestamp(key[0])), key[1])) + assert_frame_equal(result1, result2) + assert_frame_equal(result1, result3) # must pass a same-length tuple with multiple keys - self.assertRaises(ValueError, lambda : g.get_group('foo')) - self.assertRaises(ValueError, lambda : g.get_group(('foo'))) - self.assertRaises(ValueError, lambda : g.get_group(('foo','bar','baz'))) + self.assertRaises(ValueError, lambda: g.get_group('foo')) + self.assertRaises(ValueError, lambda: g.get_group(('foo'))) + self.assertRaises(ValueError, + lambda: g.get_group(('foo', 'bar', 'baz'))) def test_get_group_grouped_by_tuple(self): # GH 8121 - df = DataFrame([[(1,), (1, 2), (1,), (1, 2)]], - index=['ids']).T + df = DataFrame([[(1, ), (1, 2), (1, ), (1, 2)]], index=['ids']).T gr = df.groupby('ids') - expected = DataFrame({'ids': [(1,), (1,)]}, index=[0, 2]) - result = gr.get_group((1,)) + expected = DataFrame({'ids': [(1, ), (1, )]}, index=[0, 2]) + result = gr.get_group((1, )) assert_frame_equal(result, expected) dt = pd.to_datetime(['2010-01-01', '2010-01-02', '2010-01-01', - '2010-01-02']) - df = DataFrame({'ids': [(x,) for x in dt]}) + '2010-01-02']) + df = DataFrame({'ids': [(x, ) for x in dt]}) gr = df.groupby('ids') - result = gr.get_group(('2010-01-01',)) - expected = DataFrame({'ids': [(dt[0],), (dt[0],)]}, index=[0, 2]) + result = gr.get_group(('2010-01-01', )) + expected = DataFrame({'ids': [(dt[0], ), (dt[0], )]}, index=[0, 2]) assert_frame_equal(result, expected) def test_agg_apply_corner(self): @@ -753,7 +768,8 @@ def test_agg_apply_corner(self): self.assertEqual(self.ts.dtype, np.float64) # groupby float64 values results in Float64Index - exp = Series([], dtype=np.float64, index=pd.Index([], dtype=np.float64)) + exp = Series([], dtype=np.float64, index=pd.Index( + [], dtype=np.float64)) assert_series_equal(grouped.sum(), exp) assert_series_equal(grouped.agg(np.sum), exp) assert_series_equal(grouped.apply(np.sum), exp, check_index_type=False) @@ -761,7 +777,8 @@ def test_agg_apply_corner(self): # DataFrame grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan) exp_df = DataFrame(columns=self.tsframe.columns, dtype=float, - index=pd.Index([], dtype=np.float64)) + index=pd.Index( + [], dtype=np.float64)) assert_frame_equal(grouped.sum(), exp_df, check_names=False) assert_frame_equal(grouped.agg(np.sum), exp_df, check_names=False) assert_frame_equal(grouped.apply(np.sum), DataFrame({}, dtype=float)) @@ -787,8 +804,8 @@ def test_agg_grouping_is_list_tuple(self): def test_grouping_error_on_multidim_input(self): from pandas.core.groupby import Grouping - self.assertRaises(ValueError, \ - Grouping, self.df.index, self.df[['A','A']]) + self.assertRaises(ValueError, + Grouping, self.df.index, self.df[['A', 'A']]) def test_agg_python_multiindex(self): grouped = self.mframe.groupby(['A', 'B']) @@ -799,12 +816,12 @@ def test_agg_python_multiindex(self): def test_apply_describe_bug(self): grouped = self.mframe.groupby(level='first') - result = grouped.describe() # it works! + grouped.describe() # it works! def test_apply_issues(self): # GH 5788 - s="""2011.05.16,00:00,1.40893 + s = """2011.05.16,00:00,1.40893 2011.05.16,01:00,1.40760 2011.05.16,02:00,1.40750 2011.05.16,03:00,1.40649 @@ -817,27 +834,34 @@ def test_apply_issues(self): 2011.05.18,04:00,1.40750 2011.05.18,05:00,1.40649""" - df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value'], parse_dates=[['date', 'time']]) + df = pd.read_csv( + StringIO(s), header=None, names=['date', 'time', 'value'], + parse_dates=[['date', 'time']]) df = df.set_index('date_time') expected = df.groupby(df.index.date).idxmax() result = df.groupby(df.index.date).apply(lambda x: x.idxmax()) - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # GH 5789 # don't auto coerce dates - df = pd.read_csv(StringIO(s), header=None, names=['date', 'time', 'value']) - exp_idx = pd.Index(['2011.05.16','2011.05.17','2011.05.18'], dtype=object, name='date') - expected = Series(['00:00','02:00','02:00'], index=exp_idx) - result = df.groupby('date').apply(lambda x: x['time'][x['value'].idxmax()]) + df = pd.read_csv( + StringIO(s), header=None, names=['date', 'time', 'value']) + exp_idx = pd.Index( + ['2011.05.16', '2011.05.17', '2011.05.18' + ], dtype=object, name='date') + expected = Series(['00:00', '02:00', '02:00'], index=exp_idx) + result = df.groupby('date').apply( + lambda x: x['time'][x['value'].idxmax()]) assert_series_equal(result, expected) def test_time_field_bug(self): - # Test a fix for the following error related to GH issue 11324 - # When non-key fields in a group-by dataframe contained time-based fields that - # were not returned by the apply function, an exception would be raised. + # Test a fix for the following error related to GH issue 11324 When + # non-key fields in a group-by dataframe contained time-based fields + # that were not returned by the apply function, an exception would be + # raised. - df = pd.DataFrame({'a': 1,'b': [datetime.now() for nn in range(10)]}) + df = pd.DataFrame({'a': 1, 'b': [datetime.now() for nn in range(10)]}) def func_with_no_date(batch): return pd.Series({'c': 2}) @@ -850,7 +874,9 @@ def func_with_date(batch): dfg_no_conversion_expected.index.name = 'a' dfg_conversion = df.groupby(by=['a']).apply(func_with_date) - dfg_conversion_expected = pd.DataFrame({'b': datetime(2015, 1, 1), 'c': 2}, index=[1]) + dfg_conversion_expected = pd.DataFrame( + {'b': datetime(2015, 1, 1), + 'c': 2}, index=[1]) dfg_conversion_expected.index.name = 'a' self.assert_frame_equal(dfg_no_conversion, dfg_no_conversion_expected) @@ -858,18 +884,16 @@ def func_with_date(batch): def test_len(self): df = tm.makeTimeDataFrame() - grouped = df.groupby([lambda x: x.year, - lambda x: x.month, + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) self.assertEqual(len(grouped), len(df)) - grouped = df.groupby([lambda x: x.year, - lambda x: x.month]) + grouped = df.groupby([lambda x: x.year, lambda x: x.month]) expected = len(set([(x.year, x.month) for x in df.index])) self.assertEqual(len(grouped), expected) # issue 11016 - df = pd.DataFrame(dict(a=[np.nan]*3, b=[1,2,3])) + df = pd.DataFrame(dict(a=[np.nan] * 3, b=[1, 2, 3])) self.assertEqual(len(df.groupby(('a'))), 0) self.assertEqual(len(df.groupby(('b'))), 3) self.assertEqual(len(df.groupby(('a', 'b'))), 3) @@ -890,7 +914,6 @@ def test_groups(self): self.assertTrue((self.df.ix[v]['B'] == k[1]).all()) def test_aggregate_str_func(self): - def _check_results(grouped): # single series result = grouped['A'].agg('std') @@ -903,14 +926,11 @@ def _check_results(grouped): assert_frame_equal(result, expected) # group frame by function dict - result = grouped.agg(OrderedDict([['A', 'var'], - ['B', 'std'], - ['C', 'mean'], - ['D', 'sem']])) - expected = DataFrame(OrderedDict([['A', grouped['A'].var()], - ['B', grouped['B'].std()], - ['C', grouped['C'].mean()], - ['D', grouped['D'].sem()]])) + result = grouped.agg(OrderedDict([['A', 'var'], ['B', 'std'], + ['C', 'mean'], ['D', 'sem']])) + expected = DataFrame(OrderedDict([['A', grouped['A'].var( + )], ['B', grouped['B'].std()], ['C', grouped['C'].mean()], + ['D', grouped['D'].sem()]])) assert_frame_equal(result, expected) by_weekday = self.tsframe.groupby(lambda x: x.weekday()) @@ -940,11 +960,14 @@ def test_aggregate_item_by_item(self): # GH5782 # odd comparisons can result here, so cast to make easy - assert_almost_equal(result.xs('foo'), np.array([foo] * K).astype('float64')) - assert_almost_equal(result.xs('bar'), np.array([bar] * K).astype('float64')) + assert_almost_equal( + result.xs('foo'), np.array([foo] * K).astype('float64')) + assert_almost_equal( + result.xs('bar'), np.array([bar] * K).astype('float64')) def aggfun(ser): return ser.size + result = DataFrame().groupby(self.df.A).agg(aggfun) tm.assertIsInstance(result, DataFrame) self.assertEqual(len(result), 0) @@ -959,15 +982,14 @@ def raiseException(df): com.pprint_thing(df.to_string()) raise TypeError - self.assertRaises(TypeError, df.groupby(0).agg, - raiseException) + self.assertRaises(TypeError, df.groupby(0).agg, raiseException) def test_basic_regression(self): # regression T = [1.0 * x for x in lrange(1, 10) * 10][:1095] result = Series(T, lrange(0, len(T))) - groupings = np.random.random((1100,)) + groupings = np.random.random((1100, )) groupings = Series(groupings, lrange(0, len(groupings))) * 10. grouped = result.groupby(groupings) @@ -988,10 +1010,14 @@ def test_transform(self): # GH 8046 # make sure that we preserve the input order - df = DataFrame(np.arange(6,dtype='int64').reshape(3,2), columns=["a","b"], index=[0,2,1]) - key = [0,0,1] - expected = df.sort_index().groupby(key).transform(lambda x: x-x.mean()).groupby(key).mean() - result = df.groupby(key).transform(lambda x: x-x.mean()).groupby(key).mean() + df = DataFrame( + np.arange(6, dtype='int64').reshape( + 3, 2), columns=["a", "b"], index=[0, 2, 1]) + key = [0, 0, 1] + expected = df.sort_index().groupby(key).transform( + lambda x: x - x.mean()).groupby(key).mean() + result = df.groupby(key).transform(lambda x: x - x.mean()).groupby( + key).mean() assert_frame_equal(result, expected) def demean(arr): @@ -1008,28 +1034,29 @@ def demean(arr): # GH 8430 df = tm.makeTimeDataFrame() g = df.groupby(pd.TimeGrouper('M')) - g.transform(lambda x: x-1) + g.transform(lambda x: x - 1) # GH 9700 - df = DataFrame({'a' : range(5, 10), 'b' : range(5)}) + df = DataFrame({'a': range(5, 10), 'b': range(5)}) result = df.groupby('a').transform(max) - expected = DataFrame({'b' : range(5)}) + expected = DataFrame({'b': range(5)}) tm.assert_frame_equal(result, expected) def test_transform_fast(self): - df = DataFrame( { 'id' : np.arange( 100000 ) / 3, - 'val': np.random.randn( 100000) } ) + df = DataFrame({'id': np.arange(100000) / 3, + 'val': np.random.randn(100000)}) - grp=df.groupby('id')['val'] + grp = df.groupby('id')['val'] - values = np.repeat(grp.mean().values, com._ensure_platform_int(grp.count().values)) - expected = pd.Series(values,index=df.index) + values = np.repeat(grp.mean().values, + com._ensure_platform_int(grp.count().values)) + expected = pd.Series(values, index=df.index) result = grp.transform(np.mean) - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = grp.transform('mean') - assert_series_equal(result,expected) + assert_series_equal(result, expected) def test_transform_broadcast(self): grouped = self.ts.groupby(lambda x: x.month) @@ -1071,16 +1098,17 @@ def test_transform_dtype(self): def test_transform_bug(self): # GH 5712 # transforming on a datetime column - df = DataFrame(dict(A = Timestamp('20130101'), B = np.arange(5))) - result = df.groupby('A')['B'].transform(lambda x: x.rank(ascending=False)) - expected = Series(np.arange(5,0,step=-1),name='B') - assert_series_equal(result,expected) + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) + result = df.groupby('A')['B'].transform( + lambda x: x.rank(ascending=False)) + expected = Series(np.arange(5, 0, step=-1), name='B') + assert_series_equal(result, expected) def test_transform_multiple(self): grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month]) - transformed = grouped.transform(lambda x: x * 2) - broadcasted = grouped.transform(np.mean) + grouped.transform(lambda x: x * 2) + grouped.transform(np.mean) def test_dispatch_transform(self): df = self.tsframe[::5].reindex(self.tsframe.index) @@ -1125,10 +1153,12 @@ def test_transform_function_aliases(self): def test_transform_length(self): # GH 9697 - df = pd.DataFrame({'col1':[1,1,2,2], 'col2':[1,2,3,np.nan]}) - expected = pd.Series([3.0]*4) + df = pd.DataFrame({'col1': [1, 1, 2, 2], 'col2': [1, 2, 3, np.nan]}) + expected = pd.Series([3.0] * 4) + def nsum(x): return np.nansum(x) + results = [df.groupby('col1').transform(sum)['col2'], df.groupby('col1')['col2'].transform(sum), df.groupby('col1').transform(nsum)['col2'], @@ -1139,19 +1169,19 @@ def nsum(x): def test_with_na(self): index = Index(np.arange(10)) - for dtype in ['float64','float32','int64','int32','int16','int8']: + for dtype in ['float64', 'float32', 'int64', 'int32', 'int16', 'int8']: values = Series(np.ones(10), index, dtype=dtype) labels = Series([nan, 'foo', 'bar', 'bar', nan, nan, 'bar', 'bar', nan, 'foo'], index=index) - # this SHOULD be an int grouped = values.groupby(labels) agged = grouped.agg(len) expected = Series([4, 2], index=['bar', 'foo']) assert_series_equal(agged, expected, check_dtype=False) - #self.assertTrue(issubclass(agged.dtype.type, np.integer)) + + # self.assertTrue(issubclass(agged.dtype.type, np.integer)) # explicity return a float from my function def f(x): @@ -1168,85 +1198,74 @@ def test_groupby_transform_with_int(self): # GH 3740, make sure that we might upcast on item-by-item transform # floats - df = DataFrame(dict(A = [1,1,1,2,2,2], B = Series(1,dtype='float64'), C = Series([1,2,3,1,2,3],dtype='float64'), D = 'foo')) - result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std()) - expected = DataFrame(dict(B = np.nan, C = Series([-1,0,1,-1,0,1],dtype='float64'))) - assert_frame_equal(result,expected) + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=Series(1, dtype='float64'), + C=Series( + [1, 2, 3, 1, 2, 3], dtype='float64'), D='foo')) + result = df.groupby('A').transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=Series( + [-1, 0, 1, -1, 0, 1], dtype='float64'))) + assert_frame_equal(result, expected) # int case - df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = [1,2,3,1,2,3], D = 'foo')) - result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std()) - expected = DataFrame(dict(B = np.nan, C = [-1,0,1,-1,0,1])) - assert_frame_equal(result,expected) + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, + C=[1, 2, 3, 1, 2, 3], D='foo')) + result = df.groupby('A').transform(lambda x: (x - x.mean()) / x.std()) + expected = DataFrame(dict(B=np.nan, C=[-1, 0, 1, -1, 0, 1])) + assert_frame_equal(result, expected) # int that needs float conversion - s = Series([2,3,4,10,5,-1]) - df = DataFrame(dict(A = [1,1,1,2,2,2], B = 1, C = s, D = 'foo')) - result = df.groupby('A').transform(lambda x: (x-x.mean())/x.std()) + s = Series([2, 3, 4, 10, 5, -1]) + df = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=1, C=s, D='foo')) + result = df.groupby('A').transform(lambda x: (x - x.mean()) / x.std()) s1 = s.iloc[0:3] - s1 = (s1-s1.mean())/s1.std() + s1 = (s1 - s1.mean()) / s1.std() s2 = s.iloc[3:6] - s2 = (s2-s2.mean())/s2.std() - expected = DataFrame(dict(B = np.nan, C = concat([s1,s2]))) - assert_frame_equal(result,expected) + s2 = (s2 - s2.mean()) / s2.std() + expected = DataFrame(dict(B=np.nan, C=concat([s1, s2]))) + assert_frame_equal(result, expected) # int downcasting - result = df.groupby('A').transform(lambda x: x*2/2) - expected = DataFrame(dict(B = 1, C = [2,3,4,10,5,-1])) - assert_frame_equal(result,expected) + result = df.groupby('A').transform(lambda x: x * 2 / 2) + expected = DataFrame(dict(B=1, C=[2, 3, 4, 10, 5, -1])) + assert_frame_equal(result, expected) def test_indices_concatenation_order(self): # GH 2808 def f1(x): - y = x[(x.b % 2) == 1]**2 + y = x[(x.b % 2) == 1] ** 2 if y.empty: - multiindex = MultiIndex( - levels = [[]]*2, - labels = [[]]*2, - names = ['b', 'c'] - ) - res = DataFrame(None, - columns=['a'], - index=multiindex) + multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2, + names=['b', 'c']) + res = DataFrame(None, columns=['a'], index=multiindex) return res else: - y = y.set_index(['b','c']) + y = y.set_index(['b', 'c']) return y def f2(x): - y = x[(x.b % 2) == 1]**2 + y = x[(x.b % 2) == 1] ** 2 if y.empty: return DataFrame() else: - y = y.set_index(['b','c']) + y = y.set_index(['b', 'c']) return y def f3(x): - y = x[(x.b % 2) == 1]**2 + y = x[(x.b % 2) == 1] ** 2 if y.empty: - multiindex = MultiIndex( - levels = [[]]*2, - labels = [[]]*2, - names = ['foo', 'bar'] - ) - res = DataFrame(None, - columns=['a','b'], - index=multiindex) + multiindex = MultiIndex(levels=[[]] * 2, labels=[[]] * 2, + names=['foo', 'bar']) + res = DataFrame(None, columns=['a', 'b'], index=multiindex) return res else: return y - df = DataFrame({'a':[1,2,2,2], - 'b':lrange(4), - 'c':lrange(5,9)}) - - df2 = DataFrame({'a':[3,2,2,2], - 'b':lrange(4), - 'c':lrange(5,9)}) + df = DataFrame({'a': [1, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)}) + df2 = DataFrame({'a': [3, 2, 2, 2], 'b': lrange(4), 'c': lrange(5, 9)}) # correct result result1 = df.groupby('a').apply(f1) @@ -1307,21 +1326,19 @@ def test_series_agg_multikey(self): assert_series_equal(result, expected) def test_series_agg_multi_pure_python(self): - data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) + data = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) def bad(x): - assert(len(x.base) > 0) + assert (len(x.base) > 0) return 'foo' result = data.groupby(['A', 'B']).agg(bad) @@ -1334,8 +1351,7 @@ def test_series_index_name(self): self.assertEqual(result.index.name, 'A') def test_frame_describe_multikey(self): - grouped = self.tsframe.groupby([lambda x: x.year, - lambda x: x.month]) + grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() for col in self.tsframe: @@ -1391,17 +1407,15 @@ def test_frame_groupby(self): def test_grouping_is_iterable(self): # this code path isn't used anywhere else # not sure it's useful - grouped = self.tsframe.groupby([lambda x: x.weekday(), - lambda x: x.year]) + grouped = self.tsframe.groupby([lambda x: x.weekday(), lambda x: x.year + ]) # test it works for g in grouped.grouper.groupings[0]: pass def test_frame_groupby_columns(self): - mapping = { - 'A': 0, 'B': 0, 'C': 1, 'D': 1 - } + mapping = {'A': 0, 'B': 0, 'C': 1, 'D': 1} grouped = self.tsframe.groupby(mapping, axis=1) # aggregate @@ -1448,41 +1462,41 @@ def test_aggregate_api_consistency(self): # make sure that the aggregates via dict # are consistent - def compare(result, expected): - # if we ar passin dicts then ordering is not guaranteed for output columns + # if we ar passin dicts then ordering is not guaranteed for output + # columns assert_frame_equal(result.reindex_like(expected), expected) - - df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) grouped = df.groupby(['A', 'B']) - result = grouped[['D','C']].agg({'r':np.sum, 'r2':np.mean}) - expected = pd.concat([grouped[['D','C']].sum(), - grouped[['D','C']].mean()], - keys=['r','r2'], + result = grouped[['D', 'C']].agg({'r': np.sum, 'r2': np.mean}) + expected = pd.concat([grouped[['D', 'C']].sum(), + grouped[['D', 'C']].mean()], + keys=['r', 'r2'], axis=1).stack(level=1) compare(result, expected) - result = grouped[['D','C']].agg({'r': { 'C' : np.sum }, 'r2' : { 'D' : np.mean }}) + result = grouped[['D', 'C']].agg({'r': {'C': np.sum}, + 'r2': {'D': np.mean}}) expected = pd.concat([grouped[['C']].sum(), grouped[['D']].mean()], axis=1) - expected.columns = MultiIndex.from_tuples([('r','C'),('r2','D')]) + expected.columns = MultiIndex.from_tuples([('r', 'C'), ('r2', 'D')]) compare(result, expected) - result = grouped[['D','C']].agg([np.sum, np.mean]) + result = grouped[['D', 'C']].agg([np.sum, np.mean]) expected = pd.concat([grouped['D'].sum(), grouped['D'].mean(), grouped['C'].sum(), grouped['C'].mean()], axis=1) - expected.columns = MultiIndex.from_product([['D','C'],['sum','mean']]) + expected.columns = MultiIndex.from_product([['D', 'C'], ['sum', 'mean'] + ]) compare(result, expected) def test_multi_iter(self): @@ -1493,10 +1507,8 @@ def test_multi_iter(self): grouped = s.groupby([k1, k2]) iterated = list(grouped) - expected = [('a', '1', s[[0, 2]]), - ('a', '2', s[[1]]), - ('b', '1', s[[4]]), - ('b', '2', s[[3, 5]])] + expected = [('a', '1', s[[0, 2]]), ('a', '2', s[[1]]), + ('b', '1', s[[4]]), ('b', '2', s[[3, 5]])] for i, ((one, two), three) in enumerate(iterated): e1, e2, e3 = expected[i] self.assertEqual(e1, one) @@ -1547,7 +1559,8 @@ def test_multi_iter_panel(self): axis=1) for (month, wd), group in grouped: - exp_axis = [x for x in wp.major_axis + exp_axis = [x + for x in wp.major_axis if x.month == month and x.weekday() == wd] expected = wp.reindex(major=exp_axis) assert_panel_equal(group, expected) @@ -1559,8 +1572,7 @@ def test_multi_func(self): grouped = self.df.groupby([col1.get, col2.get]) agged = grouped.mean() expected = self.df.groupby(['A', 'B']).mean() - assert_frame_equal(agged.ix[:, ['C', 'D']], - expected.ix[:, ['C', 'D']], + assert_frame_equal(agged.ix[:, ['C', 'D']], expected.ix[:, ['C', 'D']], check_names=False) # TODO groupby get drops names # some "groups" with no data @@ -1582,18 +1594,16 @@ def test_multi_key_multiple_functions(self): assert_frame_equal(agged, expected) def test_frame_multi_key_function_list(self): - data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', - 'bar', 'bar', 'bar', 'bar', - 'foo', 'foo', 'foo'], - 'B': ['one', 'one', 'one', 'two', - 'one', 'one', 'one', 'two', - 'two', 'two', 'one'], - 'C': ['dull', 'dull', 'shiny', 'dull', - 'dull', 'shiny', 'shiny', 'dull', - 'shiny', 'shiny', 'shiny'], - 'D': np.random.randn(11), - 'E': np.random.randn(11), - 'F': np.random.randn(11)}) + data = DataFrame( + {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', + 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', + 'two', 'two', 'one'], + 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', + 'dull', 'shiny', 'shiny', 'shiny'], + 'D': np.random.randn(11), + 'E': np.random.randn(11), + 'F': np.random.randn(11)}) grouped = data.groupby(['A', 'B']) funcs = [np.mean, np.std] @@ -1601,8 +1611,8 @@ def test_frame_multi_key_function_list(self): expected = concat([grouped['D'].agg(funcs), grouped['E'].agg(funcs), grouped['F'].agg(funcs)], keys=['D', 'E', 'F'], axis=1) - assert(isinstance(agged.index, MultiIndex)) - assert(isinstance(expected.index, MultiIndex)) + assert (isinstance(agged.index, MultiIndex)) + assert (isinstance(expected.index, MultiIndex)) assert_frame_equal(agged, expected) def test_groupby_multiple_columns(self): @@ -1617,7 +1627,8 @@ def _check_op(op): for n1, gp1 in data.groupby('A'): for n2, gp2 in gp1.groupby('B'): expected[n1][n2] = op(gp2.ix[:, ['C', 'D']]) - expected = dict((k, DataFrame(v)) for k, v in compat.iteritems(expected)) + expected = dict((k, DataFrame(v)) + for k, v in compat.iteritems(expected)) expected = Panel.fromDict(expected).swapaxes(0, 1) expected.major_axis.name, expected.minor_axis.name = 'A', 'B' @@ -1683,7 +1694,7 @@ def test_groupby_as_index_agg(self): ts = Series(np.random.randint(5, 10, 50), name='jim') gr = df.groupby(ts) - _ = gr.nth(0) # invokes _set_selection_from_grouper internally + gr.nth(0) # invokes set_selection_from_grouper internally assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum)) for attr in ['mean', 'max', 'count', 'idxmax', 'cumsum', 'all']: @@ -1711,11 +1722,13 @@ def check_nunique(df, keys): days = date_range('2015-08-23', periods=10) - for n, m in product(10**np.arange(2, 6), (10, 100, 1000)): + for n, m in product(10 ** np.arange(2, 6), (10, 100, 1000)): frame = DataFrame({ - 'jim':np.random.choice(list(ascii_lowercase), n), - 'joe':np.random.choice(days, n), - 'julie':np.random.randint(0, m, n)}) + 'jim': np.random.choice( + list(ascii_lowercase), n), + 'joe': np.random.choice(days, n), + 'julie': np.random.randint(0, m, n) + }) check_nunique(frame, ['jim']) check_nunique(frame, ['jim', 'joe']) @@ -1743,8 +1756,7 @@ def check_value_counts(df, keys, bins): in product((False, True), repeat=5): kwargs = dict(normalize=normalize, sort=sort, - ascending=ascending, dropna=dropna, - bins=bins) + ascending=ascending, dropna=dropna, bins=bins) gr = df.groupby(keys, sort=isort) left = gr['3rd'].value_counts(**kwargs) @@ -1754,7 +1766,7 @@ def check_value_counts(df, keys, bins): right.index.names = right.index.names[:-1] + ['3rd'] # have to sort on index because of unstable sort on values - left, right = map(rebuild_index, (left, right)) # xref GH9212 + left, right = map(rebuild_index, (left, right)) # xref GH9212 assert_series_equal(left.sort_index(), right.sort_index()) def loop(df): @@ -1767,9 +1779,11 @@ def loop(df): for n, m in product((100, 10000), (5, 20)): frame = DataFrame({ - '1st':np.random.choice(list('abcd'), n), - '2nd':np.random.choice(days, n), - '3rd':np.random.randint(1, m + 1, n)}) + '1st': np.random.choice( + list('abcd'), n), + '2nd': np.random.choice(days, n), + '3rd': np.random.randint(1, m + 1, n) + }) loop(frame) @@ -1785,10 +1799,10 @@ def test_mulitindex_passthru(self): # GH 7997 # regression from 0.14.1 - df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]]) - df.columns = pd.MultiIndex.from_tuples([(0,1),(1,1),(2,1)]) + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df.columns = pd.MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) - result = df.groupby(axis=1, level=[0,1]).first() + result = df.groupby(axis=1, level=[0, 1]).first() assert_frame_equal(result, df) def test_multifunc_select_col_integer_cols(self): @@ -1796,7 +1810,7 @@ def test_multifunc_select_col_integer_cols(self): df.columns = np.arange(len(df.columns)) # it works! - result = df.groupby(1, as_index=False)[2].agg({'Q': np.mean}) + df.groupby(1, as_index=False)[2].agg({'Q': np.mean}) def test_as_index_series_return_frame(self): grouped = self.df.groupby('A', as_index=False) @@ -1823,8 +1837,7 @@ def test_as_index_series_return_frame(self): assert_frame_equal(result2, expected2) # corner case - self.assertRaises(Exception, grouped['C'].__getitem__, - 'D') + self.assertRaises(Exception, grouped['C'].__getitem__, 'D') def test_groupby_as_index_cython(self): data = self.df @@ -1858,16 +1871,16 @@ def test_groupby_as_index_series_scalar(self): assert_frame_equal(result, expected) def test_groupby_as_index_corner(self): - self.assertRaises(TypeError, self.ts.groupby, - lambda x: x.weekday(), as_index=False) + self.assertRaises(TypeError, self.ts.groupby, lambda x: x.weekday(), + as_index=False) - self.assertRaises(ValueError, self.df.groupby, - lambda x: x.lower(), as_index=False, axis=1) + self.assertRaises(ValueError, self.df.groupby, lambda x: x.lower(), + as_index=False, axis=1) def test_groupby_as_index_apply(self): # GH #4648 and #3417 df = DataFrame({'item_id': ['b', 'b', 'a', 'c', 'a', 'b'], - 'user_id': [1,2,1,1,3,1], + 'user_id': [1, 2, 1, 1, 3, 1], 'time': range(6)}) g_as = df.groupby('user_id', as_index=True) @@ -1884,7 +1897,8 @@ def test_groupby_as_index_apply(self): # apply doesn't maintain the original ordering # changed in GH5610 as the as_index=False returns a MI here - exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), (2, 4)]) + exp_not_as_apply = MultiIndex.from_tuples([(0, 0), (0, 2), (1, 1), ( + 2, 4)]) tp = [(1, 0), (1, 2), (2, 1), (3, 4)] exp_as_apply = MultiIndex.from_tuples(tp, names=['user_id', None]) @@ -1905,8 +1919,8 @@ def test_groupby_head_tail(self): assert_frame_equal(df.loc[[0, 2]], g_not_as.head(1)) assert_frame_equal(df.loc[[1, 2]], g_not_as.tail(1)) - empty_not_as = DataFrame(columns=df.columns, index=pd.Index([], - dtype=df.index.dtype)) + empty_not_as = DataFrame(columns=df.columns, index=pd.Index( + [], dtype=df.index.dtype)) empty_not_as['A'] = empty_not_as['A'].astype(df.A.dtype) empty_not_as['B'] = empty_not_as['B'].astype(df.B.dtype) assert_frame_equal(empty_not_as, g_not_as.head(0)) @@ -1914,7 +1928,7 @@ def test_groupby_head_tail(self): assert_frame_equal(empty_not_as, g_not_as.head(-1)) assert_frame_equal(empty_not_as, g_not_as.tail(-1)) - assert_frame_equal(df, g_not_as.head(7)) # contains all + assert_frame_equal(df, g_not_as.head(7)) # contains all assert_frame_equal(df, g_not_as.tail(7)) # as_index=True, (used to be different) @@ -1931,24 +1945,23 @@ def test_groupby_head_tail(self): assert_frame_equal(empty_as, g_as.head(-1)) assert_frame_equal(empty_as, g_as.tail(-1)) - assert_frame_equal(df_as, g_as.head(7)) # contains all + assert_frame_equal(df_as, g_as.head(7)) # contains all assert_frame_equal(df_as, g_as.tail(7)) # test with selection - assert_frame_equal(g_as[[]].head(1), df_as.loc[[0,2], []]) - assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0,2], ['A']]) - assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0,2], ['B']]) - assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0,2]]) + assert_frame_equal(g_as[[]].head(1), df_as.loc[[0, 2], []]) + assert_frame_equal(g_as[['A']].head(1), df_as.loc[[0, 2], ['A']]) + assert_frame_equal(g_as[['B']].head(1), df_as.loc[[0, 2], ['B']]) + assert_frame_equal(g_as[['A', 'B']].head(1), df_as.loc[[0, 2]]) - assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0,2], []]) - assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0,2], ['A']]) - assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0,2], ['B']]) - assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0,2]]) + assert_frame_equal(g_not_as[[]].head(1), df_as.loc[[0, 2], []]) + assert_frame_equal(g_not_as[['A']].head(1), df_as.loc[[0, 2], ['A']]) + assert_frame_equal(g_not_as[['B']].head(1), df_as.loc[[0, 2], ['B']]) + assert_frame_equal(g_not_as[['A', 'B']].head(1), df_as.loc[[0, 2]]) def test_groupby_multiple_key(self): df = tm.makeTimeDataFrame() - grouped = df.groupby([lambda x: x.year, - lambda x: x.month, + grouped = df.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]) agged = grouped.sum() assert_almost_equal(df.values, agged.values) @@ -2062,20 +2075,21 @@ def test_nonsense_func(self): df = DataFrame([0]) self.assertRaises(Exception, df.groupby, lambda x: x + 'foo') - def test_builtins_apply(self): # GH8155 + def test_builtins_apply(self): # GH8155 df = pd.DataFrame(np.random.randint(1, 50, (1000, 2)), columns=['jim', 'joe']) df['jolie'] = np.random.randn(1000) for keys in ['jim', ['jim', 'joe']]: # single key & multi-key - if keys == 'jim': continue + if keys == 'jim': + continue for f in [max, min, sum]: fname = f.__name__ result = df.groupby(keys).apply(f) - _shape = result.shape + result.shape ngroups = len(df.drop_duplicates(subset=keys)) assert result.shape == (ngroups, 3), 'invalid frame shape: '\ - '{} (expected ({}, 3))'.format(result.shape, ngroups) + '{} (expected ({}, 3))'.format(result.shape, ngroups) assert_frame_equal(result, # numpy's equivalent function df.groupby(keys).apply(getattr(np, fname))) @@ -2093,11 +2107,11 @@ def test_cythonized_aggers(self): 'B': ['A', 'B'] * 6, 'C': np.random.randn(12)} df = DataFrame(data) - df.loc[2:10:2,'C'] = nan + df.loc[2:10:2, 'C'] = nan def _testit(name): - op = lambda x: getattr(x,name)() + op = lambda x: getattr(x, name)() # single column grouped = df.drop(['B'], axis=1).groupby('A') @@ -2135,7 +2149,9 @@ def _testit(name): def test_max_min_non_numeric(self): # #2700 - aa = DataFrame({'nn':[11,11,22,22],'ii':[1,2,3,4],'ss':4*['mama']}) + aa = DataFrame({'nn': [11, 11, 22, 22], + 'ii': [1, 2, 3, 4], + 'ss': 4 * ['mama']}) result = aa.groupby('nn').max() self.assertTrue('ss' in result) @@ -2171,7 +2187,9 @@ def test_cython_agg_nothing_to_agg_with_dates(self): def test_groupby_timedelta_cython_count(self): df = DataFrame({'g': list('ab' * 2), 'delt': np.arange(4).astype('timedelta64[ns]')}) - expected = Series([2, 2], index=pd.Index(['a', 'b'], name='g'), name='delt') + expected = Series([ + 2, 2 + ], index=pd.Index(['a', 'b'], name='g'), name='delt') result = df.groupby('g').delt.count() tm.assert_series_equal(expected, result) @@ -2179,10 +2197,10 @@ def test_cython_agg_frame_columns(self): # #2113 df = DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) - result = df.groupby(level=0, axis='columns').mean() - result = df.groupby(level=0, axis='columns').mean() - result = df.groupby(level=0, axis='columns').mean() - _ = df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() + df.groupby(level=0, axis='columns').mean() def test_wrap_aggregated_output_multindex(self): df = self.mframe.T @@ -2197,6 +2215,7 @@ def aggfun(ser): raise TypeError else: return ser.sum() + agged2 = df.groupby(keys).aggregate(aggfun) self.assertEqual(len(agged2.columns) + 1, len(df.columns)) @@ -2237,19 +2256,17 @@ def test_groupby_level(self): # raise exception for non-MultiIndex self.assertRaises(ValueError, self.df.groupby, level=1) - - - def test_groupby_level_index_names(self): - ## GH4014 this used to raise ValueError since 'exp'>1 (in py2) - df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : lrange(6),}).set_index('exp') + # GH4014 this used to raise ValueError since 'exp'>1 (in py2) + df = DataFrame({'exp': ['A'] * 3 + ['B'] * 3, + 'var1': lrange(6), }).set_index('exp') df.groupby(level='exp') self.assertRaises(ValueError, df.groupby, level='foo') def test_groupby_level_with_nas(self): index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, 0, 0, 0, 0], - [0, 1, 2, 3, 0, 1, 2, 3]]) + labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1, + 2, 3]]) # factorizing doesn't confuse things s = Series(np.arange(8.), index=index) @@ -2258,8 +2275,8 @@ def test_groupby_level_with_nas(self): assert_series_equal(result, expected) index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, -1, 0, 0, 0], - [0, 1, 2, 3, 0, 1, 2, 3]]) + labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, + 1, 2, 3]]) # factorizing doesn't confuse things s = Series(np.arange(8.), index=index) @@ -2279,22 +2296,28 @@ def test_groupby_level_apply(self): self.assertEqual(result.index.name, 'first') def test_groupby_args(self): - #PR8618 and issue 8015 + # PR8618 and issue 8015 frame = self.mframe + def j(): - frame.groupby() - self.assertRaisesRegexp(TypeError, "You have to supply one of 'by' and 'level'", j) + frame.groupby() + + self.assertRaisesRegexp(TypeError, + "You have to supply one of 'by' and 'level'", + j) def k(): frame.groupby(by=None, level=None) - self.assertRaisesRegexp(TypeError, "You have to supply one of 'by' and 'level'", k) + + self.assertRaisesRegexp(TypeError, + "You have to supply one of 'by' and 'level'", + k) def test_groupby_level_mapper(self): frame = self.mframe deleveled = frame.reset_index() - mapper0 = {'foo': 0, 'bar': 0, - 'baz': 1, 'qux': 1} + mapper0 = {'foo': 0, 'bar': 0, 'baz': 1, 'qux': 1} mapper1 = {'one': 0, 'two': 0, 'three': 1} result0 = frame.groupby(mapper0, level=0).sum() @@ -2312,7 +2335,7 @@ def test_groupby_level_mapper(self): def test_groupby_level_0_nonmulti(self): # #1313 a = Series([1, 2, 3, 10, 4, 5, 20, 6], Index([1, 2, 3, 1, - 4, 5, 2, 6], name='foo')) + 4, 5, 2, 6], name='foo')) result = a.groupby(level=0).sum() self.assertEqual(result.index.name, a.index.name) @@ -2386,8 +2409,7 @@ def test_apply_transform(self): assert_series_equal(result, expected) def test_apply_multikey_corner(self): - grouped = self.tsframe.groupby([lambda x: x.year, - lambda x: x.month]) + grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) def f(group): return group.sort_values('A')[-5:] @@ -2401,11 +2423,12 @@ def test_mutate_groups(self): # GH3380 mydf = DataFrame({ - 'cat1' : ['a'] * 8 + ['b'] * 6, - 'cat2' : ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + ['d'] * 2 + ['e'] * 2, - 'cat3' : lmap(lambda x: 'g%s' % x, lrange(1,15)), - 'val' : np.random.randint(100, size=14), - }) + 'cat1': ['a'] * 8 + ['b'] * 6, + 'cat2': ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + + ['d'] * 2 + ['e'] * 2, + 'cat3': lmap(lambda x: 'g%s' % x, lrange(1, 15)), + 'val': np.random.randint(100, size=14), + }) def f_copy(x): x = x.copy() @@ -2416,17 +2439,16 @@ def f_no_copy(x): x['rank'] = x.val.rank(method='min') return x.groupby('cat2')['rank'].min() - grpby_copy = mydf.groupby('cat1').apply(f_copy) + grpby_copy = mydf.groupby('cat1').apply(f_copy) grpby_no_copy = mydf.groupby('cat1').apply(f_no_copy) - assert_series_equal(grpby_copy,grpby_no_copy) + assert_series_equal(grpby_copy, grpby_no_copy) def test_no_mutate_but_looks_like(self): # GH 8467 # first show's mutation indicator # second does not, but should yield the same results - df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'value': range(9)}) + df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], 'value': range(9)}) result1 = df.groupby('key', group_keys=True).apply(lambda x: x[:].key) result2 = df.groupby('key', group_keys=True).apply(lambda x: x.key) @@ -2451,7 +2473,7 @@ def test_apply_no_name_column_conflict(self): # it works! #2605 grouped = df.groupby(['name', 'name2']) - grouped.apply(lambda x: x.sort_values('value',inplace=True)) + grouped.apply(lambda x: x.sort_values('value', inplace=True)) def test_groupby_series_indexed_differently(self): s1 = Series([5.0, -9.0, 4.0, 100., -5., 55., 6.7], @@ -2465,15 +2487,13 @@ def test_groupby_series_indexed_differently(self): assert_series_equal(agged, exp) def test_groupby_with_hier_columns(self): - tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', - 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', - 'one', 'two', 'one', 'two']])) + tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', + 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', + 'one', 'two']])) index = MultiIndex.from_tuples(tuples) - columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), - ('B', 'cat'), ('A', 'dog')]) - df = DataFrame(np.random.randn(8, 4), index=index, - columns=columns) + columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), ( + 'B', 'cat'), ('A', 'dog')]) + df = DataFrame(np.random.randn(8, 4), index=index, columns=columns) result = df.groupby(level=0).mean() self.assertTrue(result.columns.equals(columns)) @@ -2502,6 +2522,7 @@ def test_pass_args_kwargs(self): def f(x, q=None, axis=0): return percentile(x, q, axis=axis) + g = lambda x: percentile(x, 80, axis=0) # Series @@ -2537,14 +2558,6 @@ def f(x, q=None, axis=0): assert_frame_equal(agg_result, expected) assert_frame_equal(apply_result, expected) - # def test_cython_na_bug(self): - # values = np.random.randn(10) - # shape = (5, 5) - # label_list = [np.array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2], dtype=np.int32), - # np.array([1, 2, 3, 4, 0, 1, 2, 3, 3, 4], dtype=np.int32)] - - # lib.group_aggregate(values, label_list, shape) - def test_size(self): grouped = self.df.groupby(['A', 'B']) result = grouped.size() @@ -2578,15 +2591,18 @@ def test_count(self): dr = date_range('2015-08-30', periods=n // 10, freq='T') df = DataFrame({ - '1st':np.random.choice(list(ascii_lowercase), n), - '2nd':np.random.randint(0, 5, n), - '3rd':np.random.randn(n).round(3), - '4th':np.random.randint(-10, 10, n), - '5th':np.random.choice(dr, n), - '6th':np.random.randn(n).round(3), - '7th':np.random.randn(n).round(3), - '8th':np.random.choice(dr, n) - np.random.choice(dr, 1), - '9th':np.random.choice(list(ascii_lowercase), n)}) + '1st': np.random.choice( + list(ascii_lowercase), n), + '2nd': np.random.randint(0, 5, n), + '3rd': np.random.randn(n).round(3), + '4th': np.random.randint(-10, 10, n), + '5th': np.random.choice(dr, n), + '6th': np.random.randn(n).round(3), + '7th': np.random.randn(n).round(3), + '8th': np.random.choice(dr, n) - np.random.choice(dr, 1), + '9th': np.random.choice( + list(ascii_lowercase), n) + }) for col in df.columns.drop(['1st', '2nd', '4th']): df.loc[np.random.choice(n, n // 10), col] = np.nan @@ -2606,8 +2622,9 @@ def test_count(self): count_as = df.groupby('A').count() count_not_as = df.groupby('A', as_index=False).count() - expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'], index=[1,3]) - expected.index.name='A' + expected = DataFrame([[1, 2], [0, 0]], columns=['B', 'C'], + index=[1, 3]) + expected.index.name = 'A' assert_frame_equal(count_not_as, expected.reset_index()) assert_frame_equal(count_as, expected) @@ -2615,24 +2632,27 @@ def test_count(self): assert_series_equal(count_B, expected['B']) def test_count_object(self): - df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, - 'c': [2] * 3 + [3] * 3}) + df = pd.DataFrame({'a': ['a'] * 3 + ['b'] * 3, 'c': [2] * 3 + [3] * 3}) result = df.groupby('c').a.count() - expected = pd.Series([3, 3], index=pd.Index([2, 3], name='c'), name='a') + expected = pd.Series([ + 3, 3 + ], index=pd.Index([2, 3], name='c'), name='a') tm.assert_series_equal(result, expected) df = pd.DataFrame({'a': ['a', np.nan, np.nan] + ['b'] * 3, 'c': [2] * 3 + [3] * 3}) result = df.groupby('c').a.count() - expected = pd.Series([1, 3], index=pd.Index([2, 3], name='c'), name='a') + expected = pd.Series([ + 1, 3 + ], index=pd.Index([2, 3], name='c'), name='a') tm.assert_series_equal(result, expected) def test_count_cross_type(self): # GH8169 - vals = np.hstack((np.random.randint(0,5,(100,2)), - np.random.randint(0,2,(100,2)))) + vals = np.hstack((np.random.randint(0, 5, (100, 2)), np.random.randint( + 0, 2, (100, 2)))) df = pd.DataFrame(vals, columns=['a', 'b', 'c', 'd']) - df[df==2] = np.nan + df[df == 2] = np.nan expected = df.groupby(['c', 'd']).count() for t in ['float32', 'object']: @@ -2646,62 +2666,76 @@ def test_non_cython_api(self): # GH5610 # non-cython calls should not include the grouper - df = DataFrame([[1, 2, 'foo'], [1, nan, 'bar',], [3, nan, 'baz']], columns=['A', 'B','C']) + df = DataFrame( + [[1, 2, 'foo'], [1, + nan, + 'bar', ], [3, nan, 'baz'] + ], columns=['A', 'B', 'C']) g = df.groupby('A') - gni = df.groupby('A',as_index=False) + gni = df.groupby('A', as_index=False) # mad - expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3]) + expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3]) expected.index.name = 'A' result = g.mad() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) - expected = DataFrame([[0.,0.],[0,nan]],columns=['A','B'],index=[0,1]) + expected = DataFrame([[0., 0.], [0, nan]], columns=['A', 'B'], + index=[0, 1]) result = gni.mad() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # describe - expected = DataFrame(dict(B = concat([df.loc[[0,1],'B'].describe(),df.loc[[2],'B'].describe()],keys=[1,3]))) - expected.index.names = ['A',None] + expected = DataFrame(dict(B=concat( + [df.loc[[0, 1], 'B'].describe(), df.loc[[2], 'B'].describe() + ], keys=[1, 3]))) + expected.index.names = ['A', None] result = g.describe() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) - expected = concat([df.loc[[0,1],['A','B']].describe(),df.loc[[2],['A','B']].describe()],keys=[0,1]) + expected = concat( + [df.loc[[0, 1], ['A', 'B']].describe(), + df.loc[[2], ['A', 'B']].describe()], keys=[0, 1]) result = gni.describe() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # any - expected = DataFrame([[True, True],[False, True]],columns=['B','C'],index=[1,3]) + expected = DataFrame([[True, True], [False, True]], columns=['B', 'C'], + index=[1, 3]) expected.index.name = 'A' result = g.any() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # idxmax - expected = DataFrame([[0],[nan]],columns=['B'],index=[1,3]) + expected = DataFrame([[0], [nan]], columns=['B'], index=[1, 3]) expected.index.name = 'A' result = g.idxmax() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_cython_api2(self): # this takes the fast apply path # cumsum (GH5614) - df = DataFrame([[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]], columns=['A', 'B', 'C']) - expected = DataFrame([[2, np.nan], [np.nan, 9], [4, 9]], columns=['B', 'C']) + df = DataFrame( + [[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9] + ], columns=['A', 'B', 'C']) + expected = DataFrame( + [[2, np.nan], [np.nan, 9], [4, 9]], columns=['B', 'C']) result = df.groupby('A').cumsum() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # GH 5755 - cumsum is a transformer and should ignore as_index result = df.groupby('A', as_index=False).cumsum() - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_grouping_ndarray(self): grouped = self.df.groupby(self.df['A'].values) result = grouped.sum() expected = self.df.groupby('A').sum() - assert_frame_equal(result, expected, check_names=False) # Note: no names when grouping by value + assert_frame_equal(result, expected, check_names=False + ) # Note: no names when grouping by value def test_agg_consistency(self): # agg with ([]) and () not consistent @@ -2714,9 +2748,10 @@ def P1(a): return np.nan import datetime as dt - df = DataFrame({'col1':[1,2,3,4], - 'col2':[10,25,26,31], - 'date':[dt.date(2013,2,10),dt.date(2013,2,10),dt.date(2013,2,11),dt.date(2013,2,11)]}) + df = DataFrame({'col1': [1, 2, 3, 4], + 'col2': [10, 25, 26, 31], + 'date': [dt.date(2013, 2, 10), dt.date(2013, 2, 10), + dt.date(2013, 2, 11), dt.date(2013, 2, 11)]}) g = df.groupby('date') @@ -2728,7 +2763,8 @@ def P1(a): def test_apply_typecast_fail(self): df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], - 'c': np.tile(['a', 'b', 'c'], 2), + 'c': np.tile( + ['a', 'b', 'c'], 2), 'v': np.arange(1., 7.)}) def f(group): @@ -2744,8 +2780,8 @@ def f(group): assert_frame_equal(result, expected) def test_apply_multiindex_fail(self): - index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], - [1, 2, 3, 1, 2, 3]]) + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] + ]) df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile(['a', 'b', 'c'], 2), 'v': np.arange(1., 7.)}, index=index) @@ -2771,7 +2807,9 @@ def test_apply_without_copy(self): # GH 5545 # returning a non-copy in an applied function fails - data = DataFrame({'id_field' : [100, 100, 200, 300], 'category' : ['a','b','c','c'], 'value' : [1,2,3,4]}) + data = DataFrame({'id_field': [100, 100, 200, 300], + 'category': ['a', 'b', 'c', 'c'], + 'value': [1, 2, 3, 4]}) def filt1(x): if x.shape[0] == 1: @@ -2787,15 +2825,17 @@ def filt2(x): expected = data.groupby('id_field').apply(filt1) result = data.groupby('id_field').apply(filt2) - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_apply_use_categorical_name(self): from pandas import qcut cats = qcut(self.df.C, 4) def get_stats(group): - return {'min': group.min(), 'max': group.max(), - 'count': group.count(), 'mean': group.mean()} + return {'min': group.min(), + 'max': group.max(), + 'count': group.count(), + 'mean': group.mean()} result = self.df.groupby(cats).D.apply(get_stats) self.assertEqual(result.index.names[0], 'C') @@ -2805,7 +2845,8 @@ def test_apply_categorical_data(self): for ordered in [True, False]: dense = Categorical(list('abc'), ordered=ordered) # 'b' is in the categories but not in the list - missing = Categorical(list('aaa'), categories=['a', 'b'], ordered=ordered) + missing = Categorical( + list('aaa'), categories=['a', 'b'], ordered=ordered) values = np.arange(len(dense)) df = DataFrame({'missing': missing, 'dense': dense, @@ -2848,8 +2889,8 @@ def f(g): self.assertTrue('value3' in result) def test_transform_mixed_type(self): - index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], - [1, 2, 3, 1, 2, 3]]) + index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1], [1, 2, 3, 1, 2, 3] + ]) df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile(['a', 'b', 'c'], 2), 'v': np.arange(1., 7.)}, index=index) @@ -2864,7 +2905,7 @@ def f(group): self.assertEqual(result['d'].dtype, np.float64) # this is by definition a mutating operation! - with option_context('mode.chained_assignment',None): + with option_context('mode.chained_assignment', None): for key, group in grouped: res = f(group) assert_frame_equal(res, result.ix[key]) @@ -2877,6 +2918,7 @@ def test_groupby_wrong_multi_labels(self): 2,foo2,bar2,baz1,spam2,40 3,foo1,bar1,baz2,spam1,50 4,foo3,bar1,baz2,spam1,60""" + data = read_csv(StringIO(data), index_col=0) grouped = data.groupby(['foo', 'bar', 'baz', 'spam']) @@ -2904,17 +2946,13 @@ def test_seriesgroupby_name_attr(self): self.assertEqual(result.count().name, 'C') self.assertEqual(result.mean().name, 'C') - testFunc = lambda x: np.sum(x)*2 + testFunc = lambda x: np.sum(x) * 2 self.assertEqual(result.agg(testFunc).name, 'C') def test_groupby_name_propagation(self): # GH 6124 def summarize(df, name=None): - return Series({ - 'count': 1, - 'mean': 2, - 'omissions': 3, - }, name=name) + return Series({'count': 1, 'mean': 2, 'omissions': 3, }, name=name) def summarize_random_name(df): # Provide a different name for each Series. In this case, groupby @@ -2974,7 +3012,7 @@ def convert_fast(x): def convert_force_pure(x): # base will be length 0 - assert(len(x.base) > 0) + assert (len(x.base) > 0) return Decimal(str(x.mean())) grouped = s.groupby(labels) @@ -2999,6 +3037,7 @@ def test_fast_apply(self): 'key2': labels2, 'value1': np.random.randn(N), 'value2': ['foo', 'bar', 'baz', 'qux'] * (N // 4)}) + def f(g): return 1 @@ -3014,50 +3053,53 @@ def f(g): def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 - df = DataFrame({'foo1' : ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2' : np.random.randn(6)}) + df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], + 'foo2': np.random.randn(6)}) result = df.apply(lambda x: x, axis=1) assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts()) - # GH 3610 incorrect dtype conversion with as_index=False - df = DataFrame({"c1" : [1,2,6,6,8]}) - df["c2"] = df.c1/2.0 + df = DataFrame({"c1": [1, 2, 6, 6, 8]}) + df["c2"] = df.c1 / 2.0 result1 = df.groupby("c2").mean().reset_index().c2 result2 = df.groupby("c2", as_index=False).mean().c2 - assert_series_equal(result1,result2) + assert_series_equal(result1, result2) def test_groupby_aggregation_mixed_dtype(self): # GH 6212 expected = DataFrame({ - 'v1': [5,5,7,np.nan,3,3,4,1], - 'v2': [55,55,77,np.nan,33,33,44,11]}, - index=MultiIndex.from_tuples([(1,95),(1,99),(2,95),(2,99),('big','damp'), - ('blue','dry'),('red','red'),('red','wet')], - names=['by1','by2'])) + 'v1': [5, 5, 7, np.nan, 3, 3, 4, 1], + 'v2': [55, 55, 77, np.nan, 33, 33, 44, 11]}, + index=MultiIndex.from_tuples([(1, 95), (1, 99), (2, 95), (2, 99), + ('big', 'damp'), + ('blue', 'dry'), + ('red', 'red'), ('red', 'wet')], + names=['by1', 'by2'])) df = DataFrame({ - 'v1': [1,3,5,7,8,3,5,np.nan,4,5,7,9], - 'v2': [11,33,55,77,88,33,55,np.nan,44,55,77,99], - 'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12], - 'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan, - np.nan] - }) + 'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9], + 'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99], + 'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, + 12], + 'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, + np.nan, np.nan] + }) - g = df.groupby(['by1','by2']) - result = g[['v1','v2']].mean() - assert_frame_equal(result,expected) + g = df.groupby(['by1', 'by2']) + result = g[['v1', 'v2']].mean() + assert_frame_equal(result, expected) def test_groupby_dtype_inference_empty(self): # GH 6733 - df = DataFrame({'x': [], 'range': np.arange(0,dtype='int64')}) + df = DataFrame({'x': [], 'range': np.arange(0, dtype='int64')}) self.assertEqual(df['x'].dtype, np.float64) result = df.groupby('x').first() exp_index = Index([], name='x', dtype=np.float64) - expected = DataFrame({'range' : Series([], index=exp_index, dtype='int64')}) - assert_frame_equal(result,expected, by_blocks=True) + expected = DataFrame({'range': Series( + [], index=exp_index, dtype='int64')}) + assert_frame_equal(result, expected, by_blocks=True) def test_groupby_list_infer_array_like(self): result = self.df.groupby(list(self.df['A'])).mean() @@ -3067,7 +3109,8 @@ def test_groupby_list_infer_array_like(self): self.assertRaises(Exception, self.df.groupby, list(self.df['A'][:-1])) # pathological case of ambiguity - df = DataFrame({'foo': [0, 1], 'bar': [3, 4], + df = DataFrame({'foo': [0, 1], + 'bar': [3, 4], 'val': np.random.randn(2)}) result = df.groupby(['foo', 'bar']).mean() @@ -3076,10 +3119,11 @@ def test_groupby_list_infer_array_like(self): def test_groupby_keys_same_size_as_index(self): # GH 11185 freq = 's' - index = pd.date_range(start=np.datetime64( - '2015-09-29T11:34:44-0700'), periods=2, freq=freq) + index = pd.date_range(start=np.datetime64('2015-09-29T11:34:44-0700'), + periods=2, freq=freq) df = pd.DataFrame([['A', 10], ['B', 15]], columns=[ - 'metric', 'values'], index=index) + 'metric', 'values' + ], index=index) result = df.groupby([pd.Grouper(level=0, freq=freq), 'metric']).mean() expected = df.set_index([df.index, 'metric']) @@ -3094,11 +3138,12 @@ def test_groupby_one_row(self): def test_groupby_nat_exclude(self): # GH 6992 - df = pd.DataFrame({'values': np.random.randn(8), - 'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp('2013-02-01'), - np.nan, pd.Timestamp('2013-02-01'), np.nan, pd.Timestamp('2013-01-01')], - 'str': [np.nan, 'a', np.nan, 'a', - np.nan, 'a', np.nan, 'b']}) + df = pd.DataFrame( + {'values': np.random.randn(8), + 'dt': [np.nan, pd.Timestamp('2013-01-01'), np.nan, pd.Timestamp( + '2013-02-01'), np.nan, pd.Timestamp('2013-02-01'), np.nan, + pd.Timestamp('2013-01-01')], + 'str': [np.nan, 'a', np.nan, 'a', np.nan, 'a', np.nan, 'b']}) grouped = df.groupby('dt') expected = [[1, 7], [3, 5]] @@ -3117,8 +3162,10 @@ def test_groupby_nat_exclude(self): for k in grouped.indices: self.assert_numpy_array_equal(grouped.indices[k], expected[k]) - tm.assert_frame_equal(grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]]) - tm.assert_frame_equal(grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]]) + tm.assert_frame_equal( + grouped.get_group(Timestamp('2013-01-01')), df.iloc[[1, 7]]) + tm.assert_frame_equal( + grouped.get_group(Timestamp('2013-02-01')), df.iloc[[3, 5]]) self.assertRaises(KeyError, grouped.get_group, pd.NaT) @@ -3176,7 +3223,8 @@ def test_panel_groupby(self): grouped = self.panel.groupby(lambda x: x.month, axis='major') agged = grouped.mean() - self.assert_numpy_array_equal(agged.major_axis, sorted(list(set(self.panel.major_axis.month)))) + self.assert_numpy_array_equal(agged.major_axis, sorted(list(set( + self.panel.major_axis.month)))) grouped = self.panel.groupby({'A': 0, 'B': 0, 'C': 1, 'D': 1}, axis='minor') @@ -3211,11 +3259,13 @@ def test_groupby_2d_malformed(self): self.assert_numpy_array_equal(tmp.values, res_values) def test_int32_overflow(self): - B = np.concatenate((np.arange(10000), np.arange(10000), - np.arange(5000))) + B = np.concatenate((np.arange(10000), np.arange(10000), np.arange(5000) + )) A = np.arange(25000) - df = DataFrame({'A': A, 'B': B, - 'C': A, 'D': B, + df = DataFrame({'A': A, + 'B': B, + 'C': A, + 'D': B, 'E': np.random.randn(25000)}) left = df.groupby(['A', 'B', 'C', 'D']).sum() @@ -3225,13 +3275,16 @@ def test_int32_overflow(self): def test_int64_overflow(self): from pandas.core.groupby import _int64_overflow_possible - B = np.concatenate((np.arange(1000), np.arange(1000), - np.arange(500))) + B = np.concatenate((np.arange(1000), np.arange(1000), np.arange(500))) A = np.arange(2500) - df = DataFrame({'A': A, 'B': B, - 'C': A, 'D': B, - 'E': A, 'F': B, - 'G': A, 'H': B, + df = DataFrame({'A': A, + 'B': B, + 'C': A, + 'D': B, + 'E': A, + 'F': B, + 'G': A, + 'H': B, 'values': np.random.randn(2500)}) lg = df.groupby(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) @@ -3246,8 +3299,8 @@ def test_int64_overflow(self): exp_index, _ = right.index.sortlevel(0) self.assertTrue(right.index.equals(exp_index)) - tups = list(map(tuple, df[['A', 'B', 'C', 'D', - 'E', 'F', 'G', 'H']].values)) + tups = list(map(tuple, df[['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H' + ]].values)) tups = com._asarray_tuplesafe(tups) expected = df.groupby(tups).sum()['values'] @@ -3258,12 +3311,14 @@ def test_int64_overflow(self): # GH9096 values = range(55109) - data = pd.DataFrame.from_dict({'a': values, 'b': values, - 'c': values, 'd': values}) + data = pd.DataFrame.from_dict({'a': values, + 'b': values, + 'c': values, + 'd': values}) grouped = data.groupby(['a', 'b', 'c', 'd']) self.assertEqual(len(grouped), len(values)) - arr = np.random.randint(- 1 << 12, 1 << 12, (1 << 15, 5)) + arr = np.random.randint(-1 << 12, 1 << 12, (1 << 15, 5)) i = np.random.choice(len(arr), len(arr) * 4) arr = np.vstack((arr, arr[i])) # add sume duplicate rows @@ -3304,8 +3359,7 @@ def test_groupby_sort_multi(self): tups = lmap(tuple, df[['a', 'b', 'c']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['a', 'b', 'c'], sort=True).sum() - self.assert_numpy_array_equal(result.index.values, - tups[[1, 2, 0]]) + self.assert_numpy_array_equal(result.index.values, tups[[1, 2, 0]]) tups = lmap(tuple, df[['c', 'a', 'b']].values) tups = com._asarray_tuplesafe(tups) @@ -3315,8 +3369,7 @@ def test_groupby_sort_multi(self): tups = lmap(tuple, df[['b', 'c', 'a']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['b', 'c', 'a'], sort=True).sum() - self.assert_numpy_array_equal(result.index.values, - tups[[2, 1, 0]]) + self.assert_numpy_array_equal(result.index.values, tups[[2, 1, 0]]) df = DataFrame({'a': [0, 1, 2, 0, 1, 2], 'b': [0, 0, 0, 1, 1, 1], @@ -3424,6 +3477,7 @@ def func(ser): raise TypeError else: return ser.sum() + result = grouped.aggregate(func) exp_grouped = self.three_group.ix[:, self.three_group.columns != 'C'] expected = exp_grouped.groupby(['A', 'B']).aggregate(func) @@ -3453,13 +3507,12 @@ def g(group): assert_series_equal(result, expected) def test_getitem_list_of_columns(self): - df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B': ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C': np.random.randn(8), - 'D': np.random.randn(8), - 'E': np.random.randn(8)}) + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8), + 'E': np.random.randn(8)}) result = df.groupby('A')[['C', 'D']].mean() result2 = df.groupby('A')['C', 'D'].mean() @@ -3527,9 +3580,9 @@ def foo(x): def bar(x): return np.std(x, ddof=1) - d = OrderedDict([['C', np.mean], - ['D', OrderedDict([['foo', np.mean], - ['bar', np.std]])]]) + + d = OrderedDict([['C', np.mean], ['D', OrderedDict( + [['foo', np.mean], ['bar', np.std]])]]) result = grouped.aggregate(d) d = OrderedDict([['C', [np.mean]], ['D', [foo, bar]]]) @@ -3541,21 +3594,18 @@ def test_multi_function_flexible_mix(self): # GH #1268 grouped = self.df.groupby('A') - d = OrderedDict([['C', OrderedDict([['foo', 'mean'], - [ - 'bar', 'std']])], - ['D', 'sum']]) + d = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', 'sum']]) result = grouped.aggregate(d) - d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], - [ - 'bar', 'std']])], - ['D', ['sum']]]) + d2 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', ['sum']]]) result2 = grouped.aggregate(d2) - d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], - [ - 'bar', 'std']])], - ['D', {'sum': 'sum'}]]) + d3 = OrderedDict([['C', OrderedDict([['foo', 'mean'], [ + 'bar', 'std' + ]])], ['D', {'sum': 'sum'}]]) expected = grouped.aggregate(d3) assert_frame_equal(result, expected) @@ -3563,15 +3613,14 @@ def test_multi_function_flexible_mix(self): def test_agg_callables(self): # GH 7929 - df = DataFrame({'foo' : [1,2], 'bar' :[3,4]}).astype(np.int64) + df = DataFrame({'foo': [1, 2], 'bar': [3, 4]}).astype(np.int64) class fn_class(object): + def __call__(self, x): return sum(x) - equiv_callables = [sum, np.sum, - lambda x: sum(x), - lambda x: x.sum(), + equiv_callables = [sum, np.sum, lambda x: sum(x), lambda x: x.sum(), partial(sum), fn_class()] expected = df.groupby("foo").agg(sum) @@ -3612,8 +3661,8 @@ def test_no_dummy_key_names(self): result = self.df.groupby(self.df['A'].values).sum() self.assertIsNone(result.index.name) - result = self.df.groupby([self.df['A'].values, - self.df['B'].values]).sum() + result = self.df.groupby([self.df['A'].values, self.df['B'].values + ]).sum() self.assertEqual(result.index.names, (None, None)) def test_groupby_sort_categorical(self): @@ -3626,7 +3675,8 @@ def test_groupby_sort_categorical(self): ['(0, 2.5]', 1, 60], ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar']) df['range'] = Categorical(df['range'], ordered=True) - index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range') + index = CategoricalIndex( + ['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range') result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'], index=index) @@ -3636,17 +3686,19 @@ def test_groupby_sort_categorical(self): assert_frame_equal(result_sort, df.groupby(col, sort=False).first()) df['range'] = Categorical(df['range'], ordered=False) - index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range') + index = CategoricalIndex( + ['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range') result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar'], index=index) - index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]', '(0, 2.5]'], + index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', + '(5, 7.5]', '(0, 2.5]'], name='range') result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], index=index, columns=['foo', 'bar']) col = 'range' - #### this is an unordered categorical, but we allow this #### + # this is an unordered categorical, but we allow this #### assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) @@ -3667,7 +3719,8 @@ def test_groupby_sort_categorical_datetimelike(self): df['dt'] = Categorical(df['dt'], ordered=True) index = [datetime(2011, 1, 1), datetime(2011, 2, 1), datetime(2011, 5, 1), datetime(2011, 7, 1)] - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) result_sort.index = CategoricalIndex(index, name='dt', ordered=True) index = [datetime(2011, 7, 1), datetime(2011, 2, 1), @@ -3686,30 +3739,31 @@ def test_groupby_sort_categorical_datetimelike(self): df['dt'] = Categorical(df['dt'], ordered=False) index = [datetime(2011, 1, 1), datetime(2011, 2, 1), datetime(2011, 5, 1), datetime(2011, 7, 1)] - result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) + result_sort = DataFrame( + [[1, 60], [5, 30], [6, 40], [10, 10]], columns=['foo', 'bar']) result_sort.index = CategoricalIndex(index, name='dt') index = [datetime(2011, 7, 1), datetime(2011, 2, 1), datetime(2011, 5, 1), datetime(2011, 1, 1)] result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]], columns=['foo', 'bar']) - result_nosort.index = CategoricalIndex(index, categories=index, name='dt') + result_nosort.index = CategoricalIndex(index, categories=index, + name='dt') col = 'dt' assert_frame_equal(result_sort, df.groupby(col, sort=True).first()) assert_frame_equal(result_nosort, df.groupby(col, sort=False).first()) - def test_groupby_sort_multiindex_series(self): - # series multiindex groupby sort argument was not being passed through _compress_group_index + # series multiindex groupby sort argument was not being passed through + # _compress_group_index # GH 9444 index = MultiIndex(levels=[[1, 2], [1, 2]], labels=[[0, 0, 0, 0, 1, 1], [1, 1, 0, 0, 0, 0]], names=['a', 'b']) mseries = Series([0, 1, 2, 3, 4, 5], index=index) index = MultiIndex(levels=[[1, 2], [1, 2]], - labels=[[0, 0, 1], [1, 0, 0]], - names=['a', 'b']) + labels=[[0, 0, 1], [1, 0, 0]], names=['a', 'b']) mseries_result = Series([0, 2, 4], index=index) result = mseries.groupby(level=['a', 'b'], sort=False).first() @@ -3739,15 +3793,18 @@ def test_groupby_categorical(self): idx = cats.codes.argsort() ord_labels = np.asarray(cats).take(idx) ord_data = data.take(idx) - expected = ord_data.groupby(Categorical(ord_labels), sort=False).describe() + expected = ord_data.groupby( + Categorical(ord_labels), sort=False).describe() expected.index.names = [None, None] assert_frame_equal(desc_result, expected) # GH 10460 - expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) + expc = Categorical.from_codes( + np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) self.assert_index_equal(desc_result.index.get_level_values(0), exp) - exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) + exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] + * 4) self.assert_index_equal(desc_result.index.get_level_values(1), exp) def test_groupby_datetime_categorical(self): @@ -3762,7 +3819,8 @@ def test_groupby_datetime_categorical(self): expected = data.groupby(np.asarray(cats)).mean() expected = expected.reindex(levels) - expected.index = CategoricalIndex(expected.index, categories=expected.index, + expected.index = CategoricalIndex(expected.index, + categories=expected.index, ordered=True) assert_frame_equal(result, expected) @@ -3777,34 +3835,43 @@ def test_groupby_datetime_categorical(self): expected.index.names = [None, None] assert_frame_equal(desc_result, expected) tm.assert_index_equal(desc_result.index, expected.index) - tm.assert_index_equal(desc_result.index.get_level_values(0), expected.index.get_level_values(0)) + tm.assert_index_equal( + desc_result.index.get_level_values(0), + expected.index.get_level_values(0)) # GH 10460 - expc = Categorical.from_codes(np.arange(4).repeat(8), levels, ordered=True) + expc = Categorical.from_codes( + np.arange(4).repeat(8), levels, ordered=True) exp = CategoricalIndex(expc) self.assert_index_equal(desc_result.index.get_level_values(0), exp) - exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] * 4) + exp = Index(['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max'] + * 4) self.assert_index_equal(desc_result.index.get_level_values(1), exp) - def test_groupby_categorical_index(self): levels = ['foo', 'bar', 'baz', 'qux'] codes = np.random.randint(0, 4, size=20) cats = Categorical.from_codes(codes, levels, ordered=True) - df = DataFrame(np.repeat(np.arange(20),4).reshape(-1,4), columns=list('abcd')) + df = DataFrame( + np.repeat( + np.arange(20), 4).reshape(-1, 4), columns=list('abcd')) df['cats'] = cats # with a cat index result = df.set_index('cats').groupby(level=0).sum() expected = df[list('abcd')].groupby(cats.codes).sum() - expected.index = CategoricalIndex(Categorical.from_codes([0,1,2,3], levels, ordered=True),name='cats') + expected.index = CategoricalIndex( + Categorical.from_codes( + [0, 1, 2, 3], levels, ordered=True), name='cats') assert_frame_equal(result, expected) # with a cat column, should produce a cat index result = df.groupby('cats').sum() expected = df[list('abcd')].groupby(cats.codes).sum() - expected.index = CategoricalIndex(Categorical.from_codes([0,1,2,3], levels, ordered=True),name='cats') + expected.index = CategoricalIndex( + Categorical.from_codes( + [0, 1, 2, 3], levels, ordered=True), name='cats') assert_frame_equal(result, expected) def test_groupby_groups_datetimeindex(self): @@ -3822,21 +3889,25 @@ def test_groupby_groups_datetimeindex(self): def test_groupby_groups_datetimeindex_tz(self): # GH 3950 - dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00', - '2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00'] + dates = ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 09:00:00'] df = DataFrame({'label': ['a', 'a', 'a', 'b', 'b', 'b'], 'datetime': dates, - 'value1': np.arange(6,dtype='int64'), + 'value1': np.arange(6, dtype='int64'), 'value2': [1, 2] * 3}) - df['datetime'] = df['datetime'].apply(lambda d: Timestamp(d, tz='US/Pacific')) - - exp_idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00', '2011-07-19 09:00:00'], - tz='US/Pacific', name='datetime') + df['datetime'] = df['datetime'].apply( + lambda d: Timestamp(d, tz='US/Pacific')) + + exp_idx1 = pd.DatetimeIndex( + ['2011-07-19 07:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 09:00:00'], + tz='US/Pacific', name='datetime') exp_idx2 = Index(['a', 'b'] * 3, name='label') exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2]) - expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], 'value2': [1, 2, 2, 1, 1, 2]}, + expected = DataFrame({'value1': [0, 3, 1, 4, 2, 5], + 'value2': [1, 2, 2, 1, 1, 2]}, index=exp_idx, columns=['value1', 'value2']) result = df.groupby(['datetime', 'label']).sum() @@ -3844,12 +3915,13 @@ def test_groupby_groups_datetimeindex_tz(self): # by level didx = pd.DatetimeIndex(dates, tz='Asia/Tokyo') - df = DataFrame({'value1': np.arange(6,dtype='int64'), + df = DataFrame({'value1': np.arange(6, dtype='int64'), 'value2': [1, 2, 3, 1, 2, 3]}, index=didx) - exp_idx = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00'], tz='Asia/Tokyo') + exp_idx = pd.DatetimeIndex( + ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], tz='Asia/Tokyo') expected = DataFrame({'value1': [3, 5, 7], 'value2': [2, 4, 6]}, index=exp_idx, columns=['value1', 'value2']) @@ -3860,23 +3932,30 @@ def test_groupby_multi_timezone(self): # combining multiple / different timezones yields UTC - data="""0,2000-01-28 16:47:00,America/Chicago + data = """0,2000-01-28 16:47:00,America/Chicago 1,2000-01-29 16:48:00,America/Chicago 2,2000-01-30 16:49:00,America/Los_Angeles 3,2000-01-31 16:50:00,America/Chicago 4,2000-01-01 16:50:00,America/New_York""" - df = pd.read_csv(StringIO(data),header=None, names=['value','date','tz']) - result = df.groupby('tz').date.apply(lambda x: pd.to_datetime(x).dt.tz_localize(x.name)) + df = pd.read_csv( + StringIO(data), header=None, names=['value', 'date', 'tz']) + result = df.groupby('tz').date.apply( + lambda x: pd.to_datetime(x).dt.tz_localize(x.name)) - expected = pd.to_datetime(Series(['2000-01-28 22:47:00', '2000-01-29 22:48:00', '2000-01-31 00:49:00', '2000-01-31 22:50:00', '2000-01-01 21:50:00'])) + expected = pd.to_datetime(Series( + ['2000-01-28 22:47:00', '2000-01-29 22:48:00', + '2000-01-31 00:49:00', '2000-01-31 22:50:00', + '2000-01-01 21:50:00'])) assert_series_equal(result, expected) tz = 'America/Chicago' - result = pd.to_datetime(df.groupby('tz').date.get_group(tz)).dt.tz_localize(tz) - expected = pd.to_datetime(Series(['2000-01-28 16:47:00', '2000-01-29 16:48:00','2000-01-31 16:50:00'], - index=[0,1,3], - name='date')).dt.tz_localize(tz) + result = pd.to_datetime(df.groupby('tz').date.get_group( + tz)).dt.tz_localize(tz) + expected = pd.to_datetime(Series( + ['2000-01-28 16:47:00', '2000-01-29 16:48:00', + '2000-01-31 16:50:00'], index=[0, 1, 3 + ], name='date')).dt.tz_localize(tz) assert_series_equal(result, expected) def test_groupby_reindex_inside_function(self): @@ -3891,6 +3970,7 @@ def agg_before(hour, func, fix=False): """ Run an aggregate func on the subset of data. """ + def _func(data): d = data.select(lambda x: x.hour < 11).dropna() if fix: @@ -3898,6 +3978,7 @@ def _func(data): if len(d) == 0: return None return func(d) + return _func def afunc(data): @@ -3959,7 +4040,8 @@ def test_groupby_categorical_no_compress(self): result = data.groupby(cats).mean() exp = data.groupby(codes).mean() - exp.index = CategoricalIndex(exp.index,categories=cats.categories,ordered=cats.ordered) + exp.index = CategoricalIndex(exp.index, categories=cats.categories, + ordered=cats.ordered) assert_series_equal(result, exp) codes = np.array([0, 0, 0, 1, 1, 1, 3, 3, 3]) @@ -3967,35 +4049,48 @@ def test_groupby_categorical_no_compress(self): result = data.groupby(cats).mean() exp = data.groupby(codes).mean().reindex(cats.categories) - exp.index = CategoricalIndex(exp.index,categories=cats.categories,ordered=cats.ordered) + exp.index = CategoricalIndex(exp.index, categories=cats.categories, + ordered=cats.ordered) assert_series_equal(result, exp) cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"], - categories=["a","b","c","d"], ordered=True) - data = DataFrame({"a":[1,1,1,2,2,2,3,4,5], "b":cats}) + categories=["a", "b", "c", "d"], ordered=True) + data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats}) result = data.groupby("b").mean() result = result["a"].values - exp = np.array([1,2,4,np.nan]) + exp = np.array([1, 2, 4, np.nan]) self.assert_numpy_array_equal(result, exp) def test_groupby_non_arithmetic_agg_types(self): # GH9311, GH6620 - df = pd.DataFrame([{'a': 1, 'b': 1}, - {'a': 1, 'b': 2}, - {'a': 2, 'b': 3}, - {'a': 2, 'b': 4}]) - - dtypes = ['int8', 'int16', 'int32', 'int64', - 'float32', 'float64'] - - grp_exp = {'first': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'last': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'min': {'df': [{'a': 1, 'b': 1}, {'a': 2, 'b': 3}]}, - 'max': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}]}, - 'nth': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 4}], + df = pd.DataFrame([{'a': 1, + 'b': 1}, {'a': 1, + 'b': 2}, {'a': 2, + 'b': 3}, {'a': 2, + 'b': 4}]) + + dtypes = ['int8', 'int16', 'int32', 'int64', 'float32', 'float64'] + + grp_exp = {'first': {'df': [{'a': 1, + 'b': 1}, {'a': 2, + 'b': 3}]}, + 'last': {'df': [{'a': 1, + 'b': 2}, {'a': 2, + 'b': 4}]}, + 'min': {'df': [{'a': 1, + 'b': 1}, {'a': 2, + 'b': 3}]}, + 'max': {'df': [{'a': 1, + 'b': 2}, {'a': 2, + 'b': 4}]}, + 'nth': {'df': [{'a': 1, + 'b': 2}, {'a': 2, + 'b': 4}], 'args': [1]}, - 'count': {'df': [{'a': 1, 'b': 2}, {'a': 2, 'b': 2}], + 'count': {'df': [{'a': 1, + 'b': 2}, {'a': 2, + 'b': 2}], 'out_type': 'int64'}} for dtype in dtypes: @@ -4026,20 +4121,17 @@ def test_groupby_non_arithmetic_agg_intlike_precision(self): c = 24650000000000000 inputs = ((Timestamp('2011-01-15 12:50:28.502376'), - Timestamp('2011-01-20 12:50:28.593448')), - (1 + c, 2 + c)) + Timestamp('2011-01-20 12:50:28.593448')), (1 + c, 2 + c)) for i in inputs: - df = pd.DataFrame([{'a': 1, - 'b': i[0]}, - {'a': 1, - 'b': i[1]}]) + df = pd.DataFrame([{'a': 1, 'b': i[0]}, {'a': 1, 'b': i[1]}]) grp_exp = {'first': {'expected': i[0]}, 'last': {'expected': i[1]}, 'min': {'expected': i[0]}, 'max': {'expected': i[1]}, - 'nth': {'expected': i[1], 'args': [1]}, + 'nth': {'expected': i[1], + 'args': [1]}, 'count': {'expected': 2}} for method, data in compat.iteritems(grp_exp): @@ -4067,39 +4159,41 @@ def test_groupby_first_datetime64(self): def test_groupby_max_datetime64(self): # GH 5869 # datetimelike dtype conversion from int - df = DataFrame(dict(A = Timestamp('20130101'), B = np.arange(5))) + df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5))) expected = df.groupby('A')['A'].apply(lambda x: x.max()) result = df.groupby('A')['A'].max() - assert_series_equal(result,expected) + assert_series_equal(result, expected) def test_groupby_datetime64_32_bit(self): # GH 6410 / numpy 4328 # 32-bit under 1.9-dev indexing issue - df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')]*2}) + df = DataFrame({"A": range(2), "B": [pd.Timestamp('2000-01-1')] * 2}) result = df.groupby("A")["B"].transform(min) - expected = Series([pd.Timestamp('2000-01-1')]*2) - assert_series_equal(result,expected) + expected = Series([pd.Timestamp('2000-01-1')] * 2) + assert_series_equal(result, expected) def test_groupby_categorical_unequal_len(self): - #GH3011 + # GH3011 series = Series([np.nan, np.nan, 1, 1, 2, 2, 3, 3, 4, 4]) - # The raises only happens with categorical, not with series of types category - bins = pd.cut(series.dropna().values, 4) + # The raises only happens with categorical, not with series of types + # category + bins = pd.cut(series.dropna().values, 4) # len(bins) != len(series) here - self.assertRaises(ValueError,lambda : series.groupby(bins).mean()) + self.assertRaises(ValueError, lambda: series.groupby(bins).mean()) def test_groupby_multiindex_missing_pair(self): # GH9049 - df = DataFrame({'group1': ['a','a','a','b'], - 'group2': ['c','c','d','c'], - 'value': [1,1,1,5]}) + df = DataFrame({'group1': ['a', 'a', 'a', 'b'], + 'group2': ['c', 'c', 'd', 'c'], + 'value': [1, 1, 1, 5]}) df = df.set_index(['group1', 'group2']) - df_grouped = df.groupby(level=['group1','group2'], sort=True) + df_grouped = df.groupby(level=['group1', 'group2'], sort=True) res = df_grouped.agg('sum') - idx = MultiIndex.from_tuples([('a','c'), ('a','d'), ('b','c')], names=['group1', 'group2']) + idx = MultiIndex.from_tuples( + [('a', 'c'), ('a', 'd'), ('b', 'c')], names=['group1', 'group2']) exp = DataFrame([[2], [1], [5]], index=idx, columns=['value']) tm.assert_frame_equal(res, exp) @@ -4107,7 +4201,8 @@ def test_groupby_multiindex_missing_pair(self): def test_groupby_levels_and_columns(self): # GH9344, GH9049 idx_names = ['x', 'y'] - idx = pd.MultiIndex.from_tuples([(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names) + idx = pd.MultiIndex.from_tuples( + [(1, 1), (1, 2), (3, 4), (5, 6)], names=idx_names) df = pd.DataFrame(np.arange(12).reshape(-1, 3), index=idx) by_levels = df.groupby(level=idx_names).mean() @@ -4122,16 +4217,17 @@ def test_groupby_levels_and_columns(self): def test_gb_apply_list_of_unequal_len_arrays(self): # GH1738 - df = DataFrame({'group1': ['a','a','a','b','b','b','a','a','a','b','b','b'], - 'group2': ['c','c','d','d','d','e','c','c','d','d','d','e'], - 'weight': [1.1,2,3,4,5,6,2,4,6,8,1,2], - 'value': [7.1,8,9,10,11,12,8,7,6,5,4,3] - }) + df = DataFrame({'group1': ['a', 'a', 'a', 'b', 'b', 'b', 'a', 'a', 'a', + 'b', 'b', 'b'], + 'group2': ['c', 'c', 'd', 'd', 'd', 'e', 'c', 'c', 'd', + 'd', 'd', 'e'], + 'weight': [1.1, 2, 3, 4, 5, 6, 2, 4, 6, 8, 1, 2], + 'value': [7.1, 8, 9, 10, 11, 12, 8, 7, 6, 5, 4, 3]}) df = df.set_index(['group1', 'group2']) - df_grouped = df.groupby(level=['group1','group2'], sort=True) + df_grouped = df.groupby(level=['group1', 'group2'], sort=True) def noddy(value, weight): - out = np.array( value * weight ).repeat(3) + out = np.array(value * weight).repeat(3) return out # the kernel function returns arrays of unequal length @@ -4140,7 +4236,7 @@ def noddy(value, weight): # and so tries a vstack # don't die - no_toes = df_grouped.apply(lambda x: noddy(x.value, x.weight )) + df_grouped.apply(lambda x: noddy(x.value, x.weight)) def test_groupby_with_empty(self): index = pd.DatetimeIndex(()) @@ -4156,21 +4252,22 @@ def test_groupby_with_timezone_selection(self): np.random.seed(42) df = pd.DataFrame({ 'factor': np.random.randint(0, 3, size=60), - 'time': pd.date_range('01/01/2000 00:00', periods=60, freq='s', tz='UTC') + 'time': pd.date_range('01/01/2000 00:00', periods=60, + freq='s', tz='UTC') }) df1 = df.groupby('factor').max()['time'] df2 = df.groupby('factor')['time'].max() tm.assert_series_equal(df1, df2) def test_timezone_info(self): - #GH 11682 + # GH 11682 # Timezone info lost when broadcasting scalar datetime to DataFrame tm._skip_if_no_pytz() import pytz df = pd.DataFrame({'a': [1], 'b': [datetime.now(pytz.utc)]}) tm.assert_equal(df['b'][0].tzinfo, pytz.utc) - df = pd.DataFrame({'a': [1,2,3]}) + df = pd.DataFrame({'a': [1, 2, 3]}) df['b'] = datetime.now(pytz.utc) tm.assert_equal(df['b'][0].tzinfo, pytz.utc) @@ -4181,15 +4278,16 @@ def test_groupby_with_timegrouper(self): import datetime as DT df_original = DataFrame({ 'Buyer': 'Carl Carl Carl Carl Joe Carl'.split(), - 'Quantity': [18,3,5,1,9,3], - 'Date' : [ - DT.datetime(2013,9,1,13,0), - DT.datetime(2013,9,1,13,5), - DT.datetime(2013,10,1,20,0), - DT.datetime(2013,10,3,10,0), - DT.datetime(2013,12,2,12,0), - DT.datetime(2013,9,2,14,0), - ]}) + 'Quantity': [18, 3, 5, 1, 9, 3], + 'Date': [ + DT.datetime(2013, 9, 1, 13, 0), + DT.datetime(2013, 9, 1, 13, 5), + DT.datetime(2013, 10, 1, 20, 0), + DT.datetime(2013, 10, 3, 10, 0), + DT.datetime(2013, 12, 2, 12, 0), + DT.datetime(2013, 9, 2, 14, 0), + ] + }) # GH 6908 change target column's order df_reordered = df_original.sort_values(by='Quantity') @@ -4197,12 +4295,15 @@ def test_groupby_with_timegrouper(self): for df in [df_original, df_reordered]: df = df.set_index(['Date']) - expected = DataFrame({ 'Quantity' : np.nan }, - index=date_range('20130901 13:00:00','20131205 13:00:00', - freq='5D',name='Date',closed='left')) - expected.iloc[[0,6,18],0] = np.array([24.,6.,9.],dtype='float64') + expected = DataFrame( + {'Quantity': np.nan}, + index=date_range('20130901 13:00:00', + '20131205 13:00:00', freq='5D', + name='Date', closed='left')) + expected.iloc[[0, 6, 18], 0] = np.array( + [24., 6., 9.], dtype='float64') - result1 = df.resample('5D',how=sum) + result1 = df.resample('5D', how=sum) assert_frame_equal(result1, expected) df_sorted = df.sort_index() @@ -4218,17 +4319,18 @@ def test_groupby_with_timegrouper_methods(self): import datetime as DT df_original = pd.DataFrame({ - 'Branch' : 'A A A A A B'.split(), + 'Branch': 'A A A A A B'.split(), 'Buyer': 'Carl Mark Carl Joe Joe Carl'.split(), - 'Quantity': [1,3,5,8,9,3], - 'Date' : [ - DT.datetime(2013,1,1,13,0), - DT.datetime(2013,1,1,13,5), - DT.datetime(2013,10,1,20,0), - DT.datetime(2013,10,2,10,0), - DT.datetime(2013,12,2,12,0), - DT.datetime(2013,12,2,14,0), - ]}) + 'Quantity': [1, 3, 5, 8, 9, 3], + 'Date': [ + DT.datetime(2013, 1, 1, 13, 0), + DT.datetime(2013, 1, 1, 13, 5), + DT.datetime(2013, 10, 1, 20, 0), + DT.datetime(2013, 10, 2, 10, 0), + DT.datetime(2013, 12, 2, 12, 0), + DT.datetime(2013, 12, 2, 14, 0), + ] + }) df_sorted = df_original.sort_values(by='Quantity', ascending=False) @@ -4236,9 +4338,9 @@ def test_groupby_with_timegrouper_methods(self): df = df.set_index('Date', drop=False) g = df.groupby(pd.TimeGrouper('6M')) self.assertTrue(g.group_keys) - self.assertTrue(isinstance(g.grouper,pd.core.groupby.BinGrouper)) + self.assertTrue(isinstance(g.grouper, pd.core.groupby.BinGrouper)) groups = g.groups - self.assertTrue(isinstance(groups,dict)) + self.assertTrue(isinstance(groups, dict)) self.assertTrue(len(groups) == 3) def test_timegrouper_with_reg_groups(self): @@ -4249,160 +4351,184 @@ def test_timegrouper_with_reg_groups(self): import datetime as DT df_original = DataFrame({ - 'Branch' : 'A A A A A A A B'.split(), + 'Branch': 'A A A A A A A B'.split(), 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), - 'Quantity': [1,3,5,1,8,1,9,3], - 'Date' : [ - DT.datetime(2013,1,1,13,0), - DT.datetime(2013,1,1,13,5), - DT.datetime(2013,10,1,20,0), - DT.datetime(2013,10,2,10,0), - DT.datetime(2013,10,1,20,0), - DT.datetime(2013,10,2,10,0), - DT.datetime(2013,12,2,12,0), - DT.datetime(2013,12,2,14,0), - ]}).set_index('Date') + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + DT.datetime(2013, 1, 1, 13, 0), + DT.datetime(2013, 1, 1, 13, 5), + DT.datetime(2013, 10, 1, 20, 0), + DT.datetime(2013, 10, 2, 10, 0), + DT.datetime(2013, 10, 1, 20, 0), + DT.datetime(2013, 10, 2, 10, 0), + DT.datetime(2013, 12, 2, 12, 0), + DT.datetime(2013, 12, 2, 14, 0), + ] + }).set_index('Date') df_sorted = df_original.sort_values(by='Quantity', ascending=False) for df in [df_original, df_sorted]: expected = DataFrame({ 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10,18,3], - 'Date' : [ - DT.datetime(2013,12,31,0,0), - DT.datetime(2013,12,31,0,0), - DT.datetime(2013,12,31,0,0), - ]}).set_index(['Date','Buyer']) - - result = df.groupby([pd.Grouper(freq='A'),'Buyer']).sum() - assert_frame_equal(result,expected) + 'Quantity': [10, 18, 3], + 'Date': [ + DT.datetime(2013, 12, 31, 0, 0), + DT.datetime(2013, 12, 31, 0, 0), + DT.datetime(2013, 12, 31, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + + result = df.groupby([pd.Grouper(freq='A'), 'Buyer']).sum() + assert_frame_equal(result, expected) expected = DataFrame({ 'Buyer': 'Carl Mark Carl Joe'.split(), - 'Quantity': [1,3,9,18], - 'Date' : [ - DT.datetime(2013,1,1,0,0), - DT.datetime(2013,1,1,0,0), - DT.datetime(2013,7,1,0,0), - DT.datetime(2013,7,1,0,0), - ]}).set_index(['Date','Buyer']) - result = df.groupby([pd.Grouper(freq='6MS'),'Buyer']).sum() - assert_frame_equal(result,expected) + 'Quantity': [1, 3, 9, 18], + 'Date': [ + DT.datetime(2013, 1, 1, 0, 0), + DT.datetime(2013, 1, 1, 0, 0), + DT.datetime(2013, 7, 1, 0, 0), + DT.datetime(2013, 7, 1, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + result = df.groupby([pd.Grouper(freq='6MS'), 'Buyer']).sum() + assert_frame_equal(result, expected) df_original = DataFrame({ - 'Branch' : 'A A A A A A A B'.split(), + 'Branch': 'A A A A A A A B'.split(), 'Buyer': 'Carl Mark Carl Carl Joe Joe Joe Carl'.split(), - 'Quantity': [1,3,5,1,8,1,9,3], - 'Date' : [ - DT.datetime(2013,10,1,13,0), - DT.datetime(2013,10,1,13,5), - DT.datetime(2013,10,1,20,0), - DT.datetime(2013,10,2,10,0), - DT.datetime(2013,10,1,20,0), - DT.datetime(2013,10,2,10,0), - DT.datetime(2013,10,2,12,0), - DT.datetime(2013,10,2,14,0), - ]}).set_index('Date') + 'Quantity': [1, 3, 5, 1, 8, 1, 9, 3], + 'Date': [ + DT.datetime(2013, 10, 1, 13, 0), + DT.datetime(2013, 10, 1, 13, 5), + DT.datetime(2013, 10, 1, 20, 0), + DT.datetime(2013, 10, 2, 10, 0), + DT.datetime(2013, 10, 1, 20, 0), + DT.datetime(2013, 10, 2, 10, 0), + DT.datetime(2013, 10, 2, 12, 0), + DT.datetime(2013, 10, 2, 14, 0), + ] + }).set_index('Date') df_sorted = df_original.sort_values(by='Quantity', ascending=False) for df in [df_original, df_sorted]: expected = DataFrame({ 'Buyer': 'Carl Joe Mark Carl Joe'.split(), - 'Quantity': [6,8,3,4,10], - 'Date' : [ - DT.datetime(2013,10,1,0,0), - DT.datetime(2013,10,1,0,0), - DT.datetime(2013,10,1,0,0), - DT.datetime(2013,10,2,0,0), - DT.datetime(2013,10,2,0,0), - ]}).set_index(['Date','Buyer']) - - result = df.groupby([pd.Grouper(freq='1D'),'Buyer']).sum() - assert_frame_equal(result,expected) - - result = df.groupby([pd.Grouper(freq='1M'),'Buyer']).sum() + 'Quantity': [6, 8, 3, 4, 10], + 'Date': [ + DT.datetime(2013, 10, 1, 0, 0), + DT.datetime(2013, 10, 1, 0, 0), + DT.datetime(2013, 10, 1, 0, 0), + DT.datetime(2013, 10, 2, 0, 0), + DT.datetime(2013, 10, 2, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + + result = df.groupby([pd.Grouper(freq='1D'), 'Buyer']).sum() + assert_frame_equal(result, expected) + + result = df.groupby([pd.Grouper(freq='1M'), 'Buyer']).sum() expected = DataFrame({ 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10,18,3], - 'Date' : [ - DT.datetime(2013,10,31,0,0), - DT.datetime(2013,10,31,0,0), - DT.datetime(2013,10,31,0,0), - ]}).set_index(['Date','Buyer']) - assert_frame_equal(result,expected) + 'Quantity': [10, 18, 3], + 'Date': [ + DT.datetime(2013, 10, 31, 0, 0), + DT.datetime(2013, 10, 31, 0, 0), + DT.datetime(2013, 10, 31, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + assert_frame_equal(result, expected) # passing the name df = df.reset_index() - result = df.groupby([pd.Grouper(freq='1M',key='Date'),'Buyer']).sum() - assert_frame_equal(result,expected) + result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' + ]).sum() + assert_frame_equal(result, expected) - self.assertRaises(KeyError, lambda : df.groupby([pd.Grouper(freq='1M',key='foo'),'Buyer']).sum()) + with self.assertRaises(KeyError): + df.groupby([pd.Grouper(freq='1M', key='foo'), 'Buyer']).sum() # passing the level df = df.set_index('Date') - result = df.groupby([pd.Grouper(freq='1M',level='Date'),'Buyer']).sum() - assert_frame_equal(result,expected) - result = df.groupby([pd.Grouper(freq='1M',level=0),'Buyer']).sum() - assert_frame_equal(result,expected) + result = df.groupby([pd.Grouper(freq='1M', level='Date'), 'Buyer' + ]).sum() + assert_frame_equal(result, expected) + result = df.groupby([pd.Grouper(freq='1M', level=0), 'Buyer']).sum( + ) + assert_frame_equal(result, expected) - self.assertRaises(ValueError, lambda : df.groupby([pd.Grouper(freq='1M',level='foo'),'Buyer']).sum()) + with self.assertRaises(ValueError): + df.groupby([pd.Grouper(freq='1M', level='foo'), + 'Buyer']).sum() # multi names df = df.copy() df['Date'] = df.index + pd.offsets.MonthEnd(2) - result = df.groupby([pd.Grouper(freq='1M',key='Date'),'Buyer']).sum() + result = df.groupby([pd.Grouper(freq='1M', key='Date'), 'Buyer' + ]).sum() expected = DataFrame({ 'Buyer': 'Carl Joe Mark'.split(), - 'Quantity': [10,18,3], - 'Date' : [ - DT.datetime(2013,11,30,0,0), - DT.datetime(2013,11,30,0,0), - DT.datetime(2013,11,30,0,0), - ]}).set_index(['Date','Buyer']) - assert_frame_equal(result,expected) + 'Quantity': [10, 18, 3], + 'Date': [ + DT.datetime(2013, 11, 30, 0, 0), + DT.datetime(2013, 11, 30, 0, 0), + DT.datetime(2013, 11, 30, 0, 0), + ] + }).set_index(['Date', 'Buyer']) + assert_frame_equal(result, expected) # error as we have both a level and a name! - self.assertRaises(ValueError, lambda : df.groupby([pd.Grouper(freq='1M',key='Date',level='Date'),'Buyer']).sum()) - + with self.assertRaises(ValueError): + df.groupby([pd.Grouper(freq='1M', key='Date', + level='Date'), 'Buyer']).sum() # single groupers - expected = DataFrame({ 'Quantity' : [31], - 'Date' : [DT.datetime(2013,10,31,0,0)] }).set_index('Date') + expected = DataFrame({'Quantity': [31], + 'Date': [DT.datetime(2013, 10, 31, 0, 0) + ]}).set_index('Date') result = df.groupby(pd.Grouper(freq='1M')).sum() assert_frame_equal(result, expected) result = df.groupby([pd.Grouper(freq='1M')]).sum() assert_frame_equal(result, expected) - expected = DataFrame({ 'Quantity' : [31], - 'Date' : [DT.datetime(2013,11,30,0,0)] }).set_index('Date') - result = df.groupby(pd.Grouper(freq='1M',key='Date')).sum() + expected = DataFrame({'Quantity': [31], + 'Date': [DT.datetime(2013, 11, 30, 0, 0) + ]}).set_index('Date') + result = df.groupby(pd.Grouper(freq='1M', key='Date')).sum() assert_frame_equal(result, expected) - result = df.groupby([pd.Grouper(freq='1M',key='Date')]).sum() + result = df.groupby([pd.Grouper(freq='1M', key='Date')]).sum() assert_frame_equal(result, expected) # GH 6764 multiple grouping with/without sort df = DataFrame({ - 'date' : pd.to_datetime([ - '20121002','20121007','20130130','20130202','20130305','20121002', - '20121207','20130130','20130202','20130305','20130202','20130305']), - 'user_id' : [1,1,1,1,1,3,3,3,5,5,5,5], - 'whole_cost' : [1790,364,280,259,201,623,90,312,359,301,359,801], - 'cost1' : [12,15,10,24,39,1,0,90,45,34,1,12] }).set_index('date') + 'date': pd.to_datetime([ + '20121002', '20121007', '20130130', '20130202', '20130305', + '20121002', '20121207', '20130130', '20130202', '20130305', + '20130202', '20130305' + ]), + 'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], + 'whole_cost': [1790, 364, 280, 259, 201, 623, 90, 312, 359, 301, + 359, 801], + 'cost1': [12, 15, 10, 24, 39, 1, 0, 90, 45, 34, 1, 12] + }).set_index('date') for freq in ['D', 'M', 'A', 'Q-APR']: - expected = df.groupby('user_id')['whole_cost'].resample( - freq, how='sum').dropna().reorder_levels( - ['date','user_id']).sortlevel().astype('int64') + expected = df.groupby('user_id')[ + 'whole_cost'].resample( + freq, how='sum').dropna().reorder_levels( + ['date', 'user_id']).sortlevel().astype('int64') expected.name = 'whole_cost' - result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), 'user_id'])['whole_cost'].sum() + result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq), + 'user_id'])['whole_cost'].sum() assert_series_equal(result1, expected) - result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])['whole_cost'].sum() + result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[ + 'whole_cost'].sum() assert_series_equal(result2, expected) def test_timegrouper_get_group(self): @@ -4410,10 +4536,14 @@ def test_timegrouper_get_group(self): df_original = DataFrame({ 'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(), - 'Quantity': [18,3,5,1,9,3], - 'Date' : [datetime(2013,9,1,13,0), datetime(2013,9,1,13,5), - datetime(2013,10,1,20,0), datetime(2013,10,3,10,0), - datetime(2013,12,2,12,0), datetime(2013,9,2,14,0),]}) + 'Quantity': [18, 3, 5, 1, 9, 3], + 'Date': [datetime(2013, 9, 1, 13, 0), + datetime(2013, 9, 1, 13, 5), + datetime(2013, 10, 1, 20, 0), + datetime(2013, 10, 3, 10, 0), + datetime(2013, 12, 2, 12, 0), + datetime(2013, 9, 2, 14, 0), ] + }) df_reordered = df_original.sort_values(by='Quantity') # single grouping @@ -4431,7 +4561,8 @@ def test_timegrouper_get_group(self): # multiple grouping expected_list = [df_original.iloc[[1]], df_original.iloc[[3]], df_original.iloc[[4]]] - g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), ('Joe', '2013-12-31')] + g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), + ('Joe', '2013-12-31')] for df in [df_original, df_reordered]: grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')]) @@ -4468,13 +4599,15 @@ def test_cumcount_empty(self): ge = DataFrame().groupby(level=0) se = Series().groupby(level=0) - e = Series(dtype='int64') # edge case, as this is usually considered float + e = Series(dtype='int64' + ) # edge case, as this is usually considered float assert_series_equal(e, ge.cumcount()) assert_series_equal(e, se.cumcount()) def test_cumcount_dupe_index(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=[0] * 5) + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=[0] * 5) g = df.groupby('A') sg = g.A @@ -4485,7 +4618,8 @@ def test_cumcount_dupe_index(self): def test_cumcount_mi(self): mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=mi) + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=mi) g = df.groupby('A') sg = g.A @@ -4495,7 +4629,8 @@ def test_cumcount_mi(self): assert_series_equal(expected, sg.cumcount()) def test_cumcount_groupby_not_col(self): - df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=[0] * 5) + df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], + index=[0] * 5) g = df.groupby([0, 0, 0, 1, 0]) sg = g.A @@ -4535,10 +4670,10 @@ def test_filter_single_column_df(self): # Test dropna=False. assert_frame_equal( grouped.filter(lambda x: x.mean() < 10, dropna=False), - expected_odd.reindex(df.index)) + expected_odd.reindex(df.index)) assert_frame_equal( grouped.filter(lambda x: x.mean() > 10, dropna=False), - expected_even.reindex(df.index)) + expected_even.reindex(df.index)) def test_filter_multi_column_df(self): df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': [1, 1, 1, 1]}) @@ -4546,14 +4681,14 @@ def test_filter_multi_column_df(self): grouped = df.groupby(grouper) expected = pd.DataFrame({'A': [12, 12], 'B': [1, 1]}, index=[1, 2]) assert_frame_equal( - grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10), expected) + grouped.filter(lambda x: x['A'].sum() - x['B'].sum() > 10), + expected) def test_filter_mixed_df(self): df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()}) grouper = df['A'].apply(lambda x: x % 2) grouped = df.groupby(grouper) - expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, - index=[1, 2]) + expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2]) assert_frame_equal( grouped.filter(lambda x: x['A'].sum() > 10), expected) @@ -4561,8 +4696,7 @@ def test_filter_out_all_groups(self): s = pd.Series([1, 3, 20, 5, 22, 24, 7]) grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) - assert_series_equal( - grouped.filter(lambda x: x.mean() > 1000), s[[]]) + assert_series_equal(grouped.filter(lambda x: x.mean() > 1000), s[[]]) df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()}) grouper = df['A'].apply(lambda x: x % 2) grouped = df.groupby(grouper) @@ -4587,7 +4721,8 @@ def raise_if_sum_is_zero(x): raise ValueError else: return x.sum() > 0 - s = pd.Series([-1,0,1,2]) + + s = pd.Series([-1, 0, 1, 2]) grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) self.assertRaises(TypeError, @@ -4596,13 +4731,17 @@ def raise_if_sum_is_zero(x): def test_filter_with_axis_in_groupby(self): # issue 11041 index = pd.MultiIndex.from_product([range(10), [0, 1]]) - data = pd.DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype='int64') - result = data.groupby(level=0, axis=1).filter(lambda x: x.iloc[0, 0] > 10) - expected = data.iloc[:,12:20] + data = pd.DataFrame( + np.arange(100).reshape(-1, 20), columns=index, dtype='int64') + result = data.groupby(level=0, + axis=1).filter(lambda x: x.iloc[0, 0] > 10) + expected = data.iloc[:, 12:20] assert_frame_equal(result, expected) def test_filter_bad_shapes(self): - df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)}) + df = DataFrame({'A': np.arange(8), + 'B': list('aabbbbcc'), + 'C': np.arange(8)}) s = df['B'] g_df = df.groupby('B') g_s = s.groupby(s) @@ -4620,7 +4759,9 @@ def test_filter_bad_shapes(self): self.assertRaises(TypeError, lambda: g_s.filter(f)) def test_filter_nan_is_false(self): - df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)}) + df = DataFrame({'A': np.arange(8), + 'B': list('aabbbbcc'), + 'C': np.arange(8)}) s = df['B'] g_df = df.groupby(df['B']) g_s = s.groupby(s) @@ -4632,7 +4773,7 @@ def test_filter_nan_is_false(self): def test_filter_against_workaround(self): np.random.seed(0) # Series of ints - s = Series(np.random.randint(0,100,1000)) + s = Series(np.random.randint(0, 100, 1000)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -4641,7 +4782,7 @@ def test_filter_against_workaround(self): assert_series_equal(new_way.sort_values(), old_way.sort_values()) # Series of floats - s = 100*Series(np.random.random(1000)) + s = 100 * Series(np.random.random(1000)) grouper = s.apply(lambda x: np.round(x, -1)) grouped = s.groupby(grouper) f = lambda x: x.mean() > 10 @@ -4655,38 +4796,42 @@ def test_filter_against_workaround(self): N = 1000 random_letters = letters.take(np.random.randint(0, 26, N)) df = DataFrame({'ints': Series(np.random.randint(0, 100, N)), - 'floats': N/10*Series(np.random.random(N)), + 'floats': N / 10 * Series(np.random.random(N)), 'letters': Series(random_letters)}) # Group by ints; filter on floats. grouped = df.groupby('ints') - old_way = df[grouped.floats.\ - transform(lambda x: x.mean() > N/20).astype('bool')] - new_way = grouped.filter(lambda x: x['floats'].mean() > N/20) + old_way = df[grouped.floats. + transform(lambda x: x.mean() > N / 20).astype('bool')] + new_way = grouped.filter(lambda x: x['floats'].mean() > N / 20) assert_frame_equal(new_way, old_way) # Group by floats (rounded); filter on strings. grouper = df.floats.apply(lambda x: np.round(x, -1)) grouped = df.groupby(grouper) - old_way = df[grouped.letters.\ - transform(lambda x: len(x) < N/10).astype('bool')] - new_way = grouped.filter( - lambda x: len(x.letters) < N/10) + old_way = df[grouped.letters. + transform(lambda x: len(x) < N / 10).astype('bool')] + new_way = grouped.filter(lambda x: len(x.letters) < N / 10) assert_frame_equal(new_way, old_way) # Group by strings; filter on ints. grouped = df.groupby('letters') - old_way = df[grouped.ints.\ - transform(lambda x: x.mean() > N/20).astype('bool')] - new_way = grouped.filter(lambda x: x['ints'].mean() > N/20) + old_way = df[grouped.ints. + transform(lambda x: x.mean() > N / 20).astype('bool')] + new_way = grouped.filter(lambda x: x['ints'].mean() > N / 20) assert_frame_equal(new_way, old_way) def test_filter_using_len(self): # BUG GH4447 - df = DataFrame({'A': np.arange(8), 'B': list('aabbbbcc'), 'C': np.arange(8)}) + df = DataFrame({'A': np.arange(8), + 'B': list('aabbbbcc'), + 'C': np.arange(8)}) grouped = df.groupby('B') actual = grouped.filter(lambda x: len(x) > 2) - expected = DataFrame({'A': np.arange(2, 6), 'B': list('bbbb'), 'C': np.arange(2, 6)}, index=np.arange(2, 6)) + expected = DataFrame( + {'A': np.arange(2, 6), + 'B': list('bbbb'), + 'C': np.arange(2, 6)}, index=np.arange(2, 6)) assert_frame_equal(actual, expected) actual = grouped.filter(lambda x: len(x) > 4) @@ -4697,7 +4842,7 @@ def test_filter_using_len(self): s = df['B'] grouped = s.groupby(s) actual = grouped.filter(lambda x: len(x) > 2) - expected = Series(4*['b'], index=np.arange(2, 6), name='B') + expected = Series(4 * ['b'], index=np.arange(2, 6), name='B') assert_series_equal(actual, expected) actual = grouped.filter(lambda x: len(x) > 4) @@ -4706,8 +4851,8 @@ def test_filter_using_len(self): def test_filter_maintains_ordering(self): # Simple case: index is sequential. #4621 - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}) + df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], + 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}) s = df['pid'] grouped = df.groupby('tag') actual = grouped.filter(lambda x: len(x) > 1) @@ -4748,9 +4893,9 @@ def test_filter_maintains_ordering(self): def test_filter_multiple_timestamp(self): # GH 10114 - df = DataFrame({'A' : np.arange(5,dtype='int64'), - 'B' : ['foo','bar','foo','bar','bar'], - 'C' : Timestamp('20130101') }) + df = DataFrame({'A': np.arange(5, dtype='int64'), + 'B': ['foo', 'bar', 'foo', 'bar', 'bar'], + 'C': Timestamp('20130101')}) grouped = df.groupby(['B', 'C']) @@ -4765,18 +4910,18 @@ def test_filter_multiple_timestamp(self): assert_frame_equal(df, result) result = grouped.transform('sum') - expected = DataFrame({'A' : [2, 8, 2, 8, 8]}) + expected = DataFrame({'A': [2, 8, 2, 8, 8]}) assert_frame_equal(result, expected) result = grouped.transform(len) - expected = DataFrame({'A' : [2, 3, 2, 3, 3]}) + expected = DataFrame({'A': [2, 3, 2, 3, 3]}) assert_frame_equal(result, expected) def test_filter_and_transform_with_non_unique_int_index(self): # GH4620 index = [1, 1, 1, 2, 1, 1, 0, 1] - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}, index=index) + df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], + 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index) grouped_df = df.groupby('tag') ser = df['pid'] grouped_ser = ser.groupby(df['tag']) @@ -4799,7 +4944,7 @@ def test_filter_and_transform_with_non_unique_int_index(self): actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) NA = np.nan - expected = Series([NA,1,1,NA,2,NA,NA,3], index, name='pid') + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid') # ^ made manually because this can get confusing! assert_series_equal(actual, expected) @@ -4815,8 +4960,8 @@ def test_filter_and_transform_with_non_unique_int_index(self): def test_filter_and_transform_with_multiple_non_unique_int_index(self): # GH4620 index = [1, 1, 1, 2, 0, 0, 0, 1] - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}, index=index) + df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], + 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index) grouped_df = df.groupby('tag') ser = df['pid'] grouped_ser = ser.groupby(df['tag']) @@ -4839,7 +4984,7 @@ def test_filter_and_transform_with_multiple_non_unique_int_index(self): actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) NA = np.nan - expected = Series([NA,1,1,NA,2,NA,NA,3], index, name='pid') + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid') # ^ made manually because this can get confusing! assert_series_equal(actual, expected) @@ -4855,48 +5000,8 @@ def test_filter_and_transform_with_multiple_non_unique_int_index(self): def test_filter_and_transform_with_non_unique_float_index(self): # GH4620 index = np.array([1, 1, 1, 2, 1, 1, 0, 1], dtype=float) - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}, index=index) - grouped_df = df.groupby('tag') - ser = df['pid'] - grouped_ser = ser.groupby(df['tag']) - expected_indexes = [1, 2, 4, 7] - - # Filter DataFrame - actual = grouped_df.filter(lambda x: len(x) > 1) - expected = df.iloc[expected_indexes] - assert_frame_equal(actual, expected) - - actual = grouped_df.filter(lambda x: len(x) > 1, dropna=False) - expected = df.copy() - expected.iloc[[0, 3, 5, 6]] = np.nan - assert_frame_equal(actual, expected) - - # Filter Series - actual = grouped_ser.filter(lambda x: len(x) > 1) - expected = ser.take(expected_indexes) - assert_series_equal(actual, expected) - - actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) - NA = np.nan - expected = Series([NA,1,1,NA,2,NA,NA,3], index, name='pid') - # ^ made manually because this can get confusing! - assert_series_equal(actual, expected) - - # Transform Series - actual = grouped_ser.transform(len) - expected = Series([1, 2, 2, 1, 2, 1, 1, 2], index) - assert_series_equal(actual, expected) - - # Transform (a column from) DataFrameGroupBy - actual = grouped_df.pid.transform(len) - assert_series_equal(actual, expected) - - def test_filter_and_transform_with_non_unique_float_index(self): - # GH4620 - index = np.array([1, 1, 1, 2, 0, 0, 0, 1], dtype=float) - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}, index=index) + df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], + 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index) grouped_df = df.groupby('tag') ser = df['pid'] grouped_ser = ser.groupby(df['tag']) @@ -4919,7 +5024,7 @@ def test_filter_and_transform_with_non_unique_float_index(self): actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) NA = np.nan - expected = Series([NA,1,1,NA,2,NA,NA,3], index, name='pid') + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid') # ^ made manually because this can get confusing! assert_series_equal(actual, expected) @@ -4938,8 +5043,8 @@ def test_filter_and_transform_with_non_unique_timestamp_index(self): t1 = Timestamp('2013-10-30 00:05:00') t2 = Timestamp('2013-11-30 00:05:00') index = [t1, t1, t1, t2, t1, t1, t0, t1] - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}, index=index) + df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], + 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index) grouped_df = df.groupby('tag') ser = df['pid'] grouped_ser = ser.groupby(df['tag']) @@ -4962,7 +5067,7 @@ def test_filter_and_transform_with_non_unique_timestamp_index(self): actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) NA = np.nan - expected = Series([NA,1,1,NA,2,NA,NA,3], index, name='pid') + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid') # ^ made manually because this can get confusing! assert_series_equal(actual, expected) @@ -4978,8 +5083,8 @@ def test_filter_and_transform_with_non_unique_timestamp_index(self): def test_filter_and_transform_with_non_unique_string_index(self): # GH4620 index = list('bbbcbbab') - df = DataFrame({'pid' : [1,1,1,2,2,3,3,3], - 'tag' : [23,45,62,24,45,34,25,62]}, index=index) + df = DataFrame({'pid': [1, 1, 1, 2, 2, 3, 3, 3], + 'tag': [23, 45, 62, 24, 45, 34, 25, 62]}, index=index) grouped_df = df.groupby('tag') ser = df['pid'] grouped_ser = ser.groupby(df['tag']) @@ -5002,7 +5107,7 @@ def test_filter_and_transform_with_non_unique_string_index(self): actual = grouped_ser.filter(lambda x: len(x) > 1, dropna=False) NA = np.nan - expected = Series([NA,1,1,NA,2,NA,NA,3], index, name='pid') + expected = Series([NA, 1, 1, NA, 2, NA, NA, 3], index, name='pid') # ^ made manually because this can get confusing! assert_series_equal(actual, expected) @@ -5023,27 +5128,27 @@ def test_filter_has_access_to_grouped_cols(self): assert_frame_equal(filt, df.iloc[[0, 1]]) def test_filter_enforces_scalarness(self): - df = pd.DataFrame([ + df = pd.DataFrame([ ['best', 'a', 'x'], ['worst', 'b', 'y'], ['best', 'c', 'x'], - ['best','d', 'y'], - ['worst','d', 'y'], - ['worst','d', 'y'], - ['best','d', 'z'], + ['best', 'd', 'y'], + ['worst', 'd', 'y'], + ['worst', 'd', 'y'], + ['best', 'd', 'z'], ], columns=['a', 'b', 'c']) with tm.assertRaisesRegexp(TypeError, 'filter function returned a.*'): df.groupby('c').filter(lambda g: g['a'] == 'best') def test_filter_non_bool_raises(self): - df = pd.DataFrame([ + df = pd.DataFrame([ ['best', 'a', 1], ['worst', 'b', 1], ['best', 'c', 1], - ['best','d', 1], - ['worst','d', 1], - ['worst','d', 1], - ['best','d', 1], + ['best', 'd', 1], + ['worst', 'd', 1], + ['worst', 'd', 1], + ['best', 'd', 1], ], columns=['a', 'b', 'c']) with tm.assertRaisesRegexp(TypeError, 'filter function returned a.*'): df.groupby('a').filter(lambda g: g.c.mean()) @@ -5053,11 +5158,14 @@ def test_fill_constistency(self): # GH9221 # pass thru keyword arguments to the generated wrapper # are set if the passed kw is None (only) - df = DataFrame(index=pd.MultiIndex.from_product([['value1','value2'], - date_range('2014-01-01','2014-01-06')]), - columns=Index(['1','2'], name='id')) - df['1'] = [np.nan, 1, np.nan, np.nan, 11, np.nan, np.nan, 2, np.nan, np.nan, 22, np.nan] - df['2'] = [np.nan, 3, np.nan, np.nan, 33, np.nan, np.nan, 4, np.nan, np.nan, 44, np.nan] + df = DataFrame(index=pd.MultiIndex.from_product( + [['value1', 'value2'], date_range('2014-01-01', '2014-01-06')]), + columns=Index( + ['1', '2'], name='id')) + df['1'] = [np.nan, 1, np.nan, np.nan, 11, np.nan, np.nan, 2, np.nan, + np.nan, 22, np.nan] + df['2'] = [np.nan, 3, np.nan, np.nan, 33, np.nan, np.nan, 4, np.nan, + np.nan, 44, np.nan] expected = df.groupby(level=0, axis=0).fillna(method='ffill') result = df.T.groupby(level=0, axis=1).fillna(method='ffill').T @@ -5103,17 +5211,22 @@ def test_groupby_selection_with_methods(self): # methods which are called as .foo() methods = ['count', 'corr', - 'cummax', 'cummin', 'cumprod', - 'describe', 'rank', + 'cummax', + 'cummin', + 'cumprod', + 'describe', + 'rank', 'quantile', - 'diff', 'shift', - 'all', 'any', - 'idxmin', 'idxmax', - 'ffill', 'bfill', + 'diff', + 'shift', + 'all', + 'any', + 'idxmin', + 'idxmax', + 'ffill', + 'bfill', 'pct_change', - 'tshift', - #'ohlc' - ] + 'tshift'] for m in methods: res = getattr(g, m)() @@ -5143,52 +5256,89 @@ def test_groupby_whitelist(self): s = df.floats df_whitelist = frozenset([ - 'last', 'first', - 'mean', 'sum', 'min', 'max', - 'head', 'tail', - 'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount', + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumsum', + 'cumprod', + 'cummin', + 'cummax', + 'cumcount', 'resample', 'describe', - 'rank', 'quantile', + 'rank', + 'quantile', 'fillna', 'mad', - 'any', 'all', + 'any', + 'all', 'take', - 'idxmax', 'idxmin', - 'shift', 'tshift', - 'ffill', 'bfill', - 'pct_change', 'skew', - 'plot', 'boxplot', 'hist', - 'median', 'dtypes', - 'corrwith', 'corr', 'cov', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'boxplot', + 'hist', + 'median', + 'dtypes', + 'corrwith', + 'corr', + 'cov', 'diff', ]) s_whitelist = frozenset([ - 'last', 'first', - 'mean', 'sum', 'min', 'max', - 'head', 'tail', - 'cumsum', 'cumprod', 'cummin', 'cummax', 'cumcount', + 'last', + 'first', + 'mean', + 'sum', + 'min', + 'max', + 'head', + 'tail', + 'cumsum', + 'cumprod', + 'cummin', + 'cummax', + 'cumcount', 'resample', 'describe', - 'rank', 'quantile', + 'rank', + 'quantile', 'fillna', 'mad', - 'any', 'all', + 'any', + 'all', 'take', - 'idxmax', 'idxmin', - 'shift', 'tshift', - 'ffill', 'bfill', - 'pct_change', 'skew', - 'plot', 'hist', - 'median', 'dtype', - 'corr', 'cov', + 'idxmax', + 'idxmin', + 'shift', + 'tshift', + 'ffill', + 'bfill', + 'pct_change', + 'skew', + 'plot', + 'hist', + 'median', + 'dtype', + 'corr', + 'cov', 'diff', 'unique', # 'nlargest', 'nsmallest', ]) - for obj, whitelist in zip((df, s), - (df_whitelist, s_whitelist)): + for obj, whitelist in zip((df, s), (df_whitelist, s_whitelist)): gb = obj.groupby(df.letters) self.assertEqual(whitelist, gb._apply_whitelist) for m in whitelist: @@ -5212,38 +5362,39 @@ def test_groupby_whitelist_deprecations(self): with tm.assert_produces_warning(FutureWarning): df.groupby('letters').floats.irow(0) - def test_regression_whitelist_methods(self) : + def test_regression_whitelist_methods(self): # GH6944 # explicity test the whitelest methods - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) raw_frame = DataFrame(np.random.randn(10, 3), index=index, - columns=Index(['A', 'B', 'C'], name='exp')) + columns=Index(['A', 'B', 'C'], name='exp')) raw_frame.ix[1, [1, 2]] = np.nan raw_frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), lrange(2), - [True,False]) : + [True, False]): - if axis == 0 : + if axis == 0: frame = raw_frame - else : + else: frame = raw_frame.T - if op in self.AGG_FUNCTIONS_WITH_SKIPNA : - grouped = frame.groupby(level=level,axis=axis) - result = getattr(grouped,op)(skipna=skipna) - expected = getattr(frame,op)(level=level,axis=axis,skipna=skipna) + if op in self.AGG_FUNCTIONS_WITH_SKIPNA: + grouped = frame.groupby(level=level, axis=axis) + result = getattr(grouped, op)(skipna=skipna) + expected = getattr(frame, op)(level=level, axis=axis, + skipna=skipna) assert_frame_equal(result, expected) - else : - grouped = frame.groupby(level=level,axis=axis) - result = getattr(grouped,op)() - expected = getattr(frame,op)(level=level,axis=axis) + else: + grouped = frame.groupby(level=level, axis=axis) + result = getattr(grouped, op)() + expected = getattr(frame, op)(level=level, axis=axis) assert_frame_equal(result, expected) def test_groupby_blacklist(self): @@ -5281,22 +5432,20 @@ def test_groupby_blacklist(self): def test_tab_completion(self): grp = self.mframe.groupby(level='second') results = set([v for v in dir(grp) if not v.startswith('_')]) - expected = set(['A','B','C', - 'agg','aggregate','apply','boxplot','filter','first','get_group', - 'groups','hist','indices','last','max','mean','median', - 'min','name','ngroups','nth','ohlc','plot', 'prod', - 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', 'head', - 'irow', - 'describe', 'cummax', 'quantile', 'rank', 'cumprod', 'tail', - 'resample', 'cummin', 'fillna', 'cumsum', 'cumcount', - 'all', 'shift', 'skew', 'bfill', 'ffill', - 'take', 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', - 'cov', 'dtypes', 'diff', 'idxmax', 'idxmin' - ]) + expected = set( + ['A', 'B', 'C', 'agg', 'aggregate', 'apply', 'boxplot', 'filter', + 'first', 'get_group', 'groups', 'hist', 'indices', 'last', 'max', + 'mean', 'median', 'min', 'name', 'ngroups', 'nth', 'ohlc', 'plot', + 'prod', 'size', 'std', 'sum', 'transform', 'var', 'sem', 'count', + 'head', 'irow', 'describe', 'cummax', 'quantile', 'rank', + 'cumprod', 'tail', 'resample', 'cummin', 'fillna', 'cumsum', + 'cumcount', 'all', 'shift', 'skew', 'bfill', 'ffill', 'take', + 'tshift', 'pct_change', 'any', 'mad', 'corr', 'corrwith', 'cov', + 'dtypes', 'diff', 'idxmax', 'idxmin']) self.assertEqual(results, expected) def test_lexsort_indexer(self): - keys = [[nan]*5 + list(range(100)) + [nan]*5] + keys = [[nan] * 5 + list(range(100)) + [nan] * 5] # orders=True, na_position='last' result = _lexsort_indexer(keys, orders=True, na_position='last') expected = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) @@ -5309,17 +5458,19 @@ def test_lexsort_indexer(self): # orders=False, na_position='last' result = _lexsort_indexer(keys, orders=False, na_position='last') - expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, + 110)) assert_equal(result, expected) # orders=False, na_position='first' result = _lexsort_indexer(keys, orders=False, na_position='first') - expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, + -1)) assert_equal(result, expected) def test_nargsort(self): # np.argsort(items) places NaNs last - items = [nan]*5 + list(range(100)) + [nan]*5 + items = [nan] * 5 + list(range(100)) + [nan] * 5 # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype='O') @@ -5327,78 +5478,86 @@ def test_nargsort(self): # GH 2785; due to a regression in NumPy1.6.2 np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) np.argsort(items2, kind='mergesort') - except TypeError as err: + except TypeError: raise nose.SkipTest('requested sort not available for type') - # mergesort is the most difficult to get right because we want it to be stable. + # mergesort is the most difficult to get right because we want it to be + # stable. - # According to numpy/core/tests/test_multiarray, """The number - # of sorted items must be greater than ~50 to check the actual algorithm + # According to numpy/core/tests/test_multiarray, """The number of + # sorted items must be greater than ~50 to check the actual algorithm # because quick and merge sort fall over to insertion sort for small # arrays.""" - # mergesort, ascending=True, na_position='last' - result = _nargsort( - items, kind='mergesort', ascending=True, na_position='last') + result = _nargsort(items, kind='mergesort', ascending=True, + na_position='last') expected = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) assert_equal(result, expected) # mergesort, ascending=True, na_position='first' - result = _nargsort( - items, kind='mergesort', ascending=True, na_position='first') + result = _nargsort(items, kind='mergesort', ascending=True, + na_position='first') expected = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) assert_equal(result, expected) # mergesort, ascending=False, na_position='last' - result = _nargsort( - items, kind='mergesort', ascending=False, na_position='last') - expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + result = _nargsort(items, kind='mergesort', ascending=False, + na_position='last') + expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, + 110)) assert_equal(result, expected) # mergesort, ascending=False, na_position='first' - result = _nargsort( - items, kind='mergesort', ascending=False, na_position='first') - expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + result = _nargsort(items, kind='mergesort', ascending=False, + na_position='first') + expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, + -1)) assert_equal(result, expected) # mergesort, ascending=True, na_position='last' - result = _nargsort( - items2, kind='mergesort', ascending=True, na_position='last') + result = _nargsort(items2, kind='mergesort', ascending=True, + na_position='last') expected = list(range(5, 105)) + list(range(5)) + list(range(105, 110)) assert_equal(result, expected) # mergesort, ascending=True, na_position='first' - result = _nargsort( - items2, kind='mergesort', ascending=True, na_position='first') + result = _nargsort(items2, kind='mergesort', ascending=True, + na_position='first') expected = list(range(5)) + list(range(105, 110)) + list(range(5, 105)) assert_equal(result, expected) # mergesort, ascending=False, na_position='last' - result = _nargsort( - items2, kind='mergesort', ascending=False, na_position='last') - expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, 110)) + result = _nargsort(items2, kind='mergesort', ascending=False, + na_position='last') + expected = list(range(104, 4, -1)) + list(range(5)) + list(range(105, + 110)) assert_equal(result, expected) # mergesort, ascending=False, na_position='first' - result = _nargsort( - items2, kind='mergesort', ascending=False, na_position='first') - expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) + result = _nargsort(items2, kind='mergesort', ascending=False, + na_position='first') + expected = list(range(5)) + list(range(105, 110)) + list(range(104, 4, + -1)) assert_equal(result, expected) def test_datetime_count(self): - df = DataFrame({'a': [1,2,3] * 2, + df = DataFrame({'a': [1, 2, 3] * 2, 'dates': pd.date_range('now', periods=6, freq='T')}) result = df.groupby('a').dates.count() - expected = Series([2, 2, 2], index=Index([1, 2, 3], name='a'), - name='dates') + expected = Series([ + 2, 2, 2 + ], index=Index([1, 2, 3], name='a'), name='dates') tm.assert_series_equal(result, expected) def test_lower_int_prec_count(self): - df = DataFrame({'a': np.array([0, 1, 2, 100], np.int8), - 'b': np.array([1, 2, 3, 6], np.uint32), - 'c': np.array([4, 5, 6, 8], np.int16), - 'grp': list('ab' * 2)}) + df = DataFrame({'a': np.array( + [0, 1, 2, 100], np.int8), + 'b': np.array( + [1, 2, 3, 6], np.uint32), + 'c': np.array( + [4, 5, 6, 8], np.int16), + 'grp': list('ab' * 2)}) result = df.groupby('grp').count() expected = DataFrame({'a': [2, 2], 'b': [2, 2], @@ -5411,6 +5570,7 @@ class RaisingObjectException(Exception): pass class RaisingObject(object): + def __init__(self, msg='I will raise inside Cython'): super(RaisingObject, self).__init__() self.msg = msg @@ -5422,8 +5582,8 @@ def __eq__(self, other): df = DataFrame({'a': [RaisingObject() for _ in range(4)], 'grp': list('ab' * 2)}) result = df.groupby('grp').count() - expected = DataFrame({'a': [2, 2]}, index=pd.Index(list('ab'), - name='grp')) + expected = DataFrame({'a': [2, 2]}, index=pd.Index( + list('ab'), name='grp')) tm.assert_frame_equal(result, expected) def test__cython_agg_general(self): @@ -5435,8 +5595,7 @@ def test__cython_agg_general(self): ('min', np.min), ('max', np.max), ('first', lambda x: x.iloc[0]), - ('last', lambda x: x.iloc[-1]), - ] + ('last', lambda x: x.iloc[-1]), ] df = DataFrame(np.random.randn(1000)) labels = np.random.randint(0, 50, size=1000).astype(float) @@ -5446,33 +5605,30 @@ def test__cython_agg_general(self): try: tm.assert_frame_equal(result, expected) except BaseException as exc: - exc.args += ('operation: %s' % op,) + exc.args += ('operation: %s' % op, ) raise def test_cython_group_transform_algos(self): - #GH 4095 - dtypes = [np.int8, np.int16, np.int32, np.int64, - np.uint8, np.uint32, np.uint64, - np.float32, np.float64] + # GH 4095 + dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32, + np.uint64, np.float32, np.float64] ops = [(pd.algos.group_cumprod_float64, np.cumproduct, [np.float64]), (pd.algos.group_cumsum, np.cumsum, dtypes)] for pd_op, np_op, dtypes in ops: for dtype in dtypes: - data = np.array([[1],[2],[3],[4]], dtype=dtype) + data = np.array([[1], [2], [3], [4]], dtype=dtype) ans = np.zeros_like(data) accum = np.array([[0]], dtype=dtype) - labels = np.array([0,0,0,0], dtype=np.int64) + labels = np.array([0, 0, 0, 0], dtype=np.int64) pd_op(ans, data, labels, accum) - self.assert_numpy_array_equal(np_op(data), ans[:,0]) - - + self.assert_numpy_array_equal(np_op(data), ans[:, 0]) # with nans - labels = np.array([0,0,0,0,0], dtype=np.int64) + labels = np.array([0, 0, 0, 0, 0], dtype=np.int64) - data = np.array([[1],[2],[3],[np.nan],[4]], dtype='float64') + data = np.array([[1], [2], [3], [np.nan], [4]], dtype='float64') accum = np.array([[0.0]]) actual = np.zeros_like(data) actual.fill(np.nan) @@ -5493,45 +5649,46 @@ def test_cython_group_transform_algos(self): actual = np.zeros_like(data, dtype='int64') actual.fill(np.nan) pd.algos.group_cumsum(actual, data.view('int64'), labels, accum) - expected = np.array( - [np.timedelta64(1, 'ns'), np.timedelta64(2, 'ns'), - np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), - np.timedelta64(5, 'ns')]) + expected = np.array([np.timedelta64(1, 'ns'), np.timedelta64( + 2, 'ns'), np.timedelta64(3, 'ns'), np.timedelta64(4, 'ns'), + np.timedelta64(5, 'ns')]) self.assert_numpy_array_equal(actual[:, 0].view('m8[ns]'), expected) - - def test_cython_transform(self): # GH 4095 - ops = [(('cumprod', ()), lambda x: x.cumprod()), - (('cumsum', ()), lambda x: x.cumsum()), - (('shift', (-1,)), lambda x: x.shift(-1)), - (('shift', (1,)), lambda x: x.shift())] + ops = [(('cumprod', + ()), lambda x: x.cumprod()), (('cumsum', ()), + lambda x: x.cumsum()), + (('shift', (-1, )), + lambda x: x.shift(-1)), (('shift', + (1, )), lambda x: x.shift())] s = Series(np.random.randn(1000)) s_missing = s.copy() s_missing.iloc[2:10] = np.nan labels = np.random.randint(0, 50, size=1000).astype(float) - #series + # series for (op, args), targop in ops: for data in [s, s_missing]: # print(data.head()) expected = data.groupby(labels).transform(targop) tm.assert_series_equal(expected, - data.groupby(labels).transform(op, *args)) - tm.assert_series_equal(expected, - getattr(data.groupby(labels), op)(*args)) + data.groupby(labels).transform(op, + *args)) + tm.assert_series_equal(expected, getattr( + data.groupby(labels), op)(*args)) strings = list('qwertyuiopasdfghjklz') strings_missing = strings[:] strings_missing[5] = np.nan df = DataFrame({'float': s, 'float_missing': s_missing, - 'int': [1,1,1,1,2] * 200, + 'int': [1, 1, 1, 1, 2] * 200, 'datetime': pd.date_range('1990-1-1', periods=1000), - 'timedelta': pd.timedelta_range(1, freq='s', periods=1000), + 'timedelta': pd.timedelta_range(1, freq='s', + periods=1000), 'string': strings * 50, 'string_missing': strings_missing * 50}) df['cat'] = df['string'].astype('category') @@ -5539,12 +5696,12 @@ def test_cython_transform(self): df2 = df.copy() df2.index = pd.MultiIndex.from_product([range(100), range(10)]) - #DataFrame - Single and MultiIndex, - #group by values, index level, columns + # DataFrame - Single and MultiIndex, + # group by values, index level, columns for df in [df, df2]: - for gb_target in [dict(by=labels), dict(level=0), - dict(by='string')]: # dict(by='string_missing')]: - # dict(by=['int','string'])]: + for gb_target in [dict(by=labels), dict(level=0), dict(by='string') + ]: # dict(by='string_missing')]: + # dict(by=['int','string'])]: gb = df.groupby(**gb_target) # whitelisted methods set the selection before applying @@ -5558,19 +5715,20 @@ def test_cython_transform(self): # numeric apply fastpath promotes dtype so have # to apply seperately and concat i = gb[['int']].apply(targop) - f = gb[['float','float_missing']].apply(targop) - expected = pd.concat([f,i], axis=1) + f = gb[['float', 'float_missing']].apply(targop) + expected = pd.concat([f, i], axis=1) else: expected = gb.apply(targop) expected = expected.sort_index(axis=1) tm.assert_frame_equal(expected, - gb.transform(op, *args).sort_index(axis=1)) - tm.assert_frame_equal(expected, - getattr(gb, op)(*args)) + gb.transform(op, *args).sort_index( + axis=1)) + tm.assert_frame_equal(expected, getattr(gb, op)(*args)) # individual columns for c in df: - if c not in ['float', 'int', 'float_missing'] and op != 'shift': + if c not in ['float', 'int', 'float_missing' + ] and op != 'shift': self.assertRaises(DataError, gb[c].transform, op) self.assertRaises(DataError, getattr(gb[c], op)) else: @@ -5580,6 +5738,7 @@ def test_cython_transform(self): gb[c].transform(op, *args)) tm.assert_series_equal(expected, getattr(gb[c], op)(*args)) + def test_groupby_cumprod(self): # GH 4095 df = pd.DataFrame({'key': ['b'] * 10, 'value': 2}) @@ -5598,7 +5757,6 @@ def test_groupby_cumprod(self): expected.name = 'value' tm.assert_series_equal(actual, expected) - def test_ops_general(self): ops = [('mean', np.mean), ('median', np.median), @@ -5610,8 +5768,7 @@ def test_ops_general(self): ('max', np.max), ('first', lambda x: x.iloc[0]), ('last', lambda x: x.iloc[-1]), - ('count', np.size), - ] + ('count', np.size), ] try: from scipy.stats import sem except ImportError: @@ -5627,7 +5784,7 @@ def test_ops_general(self): try: tm.assert_frame_equal(result, expected) except BaseException as exc: - exc.args += ('operation: %s' % op,) + exc.args += ('operation: %s' % op, ) raise def test_max_nan_bug(self): @@ -5635,6 +5792,7 @@ def test_max_nan_bug(self): 2013-04-23,2013-04-23 00:00:00,,log080001.log 2013-05-06,2013-05-06 00:00:00,,log.log 2013-05-07,2013-05-07 00:00:00,OE,xlsx""" + df = pd.read_csv(StringIO(raw), parse_dates=[0]) gb = df.groupby('Date') r = gb[['File']].max() @@ -5647,17 +5805,16 @@ def test_nlargest(self): b = Series(list('a' * 5 + 'b' * 5)) gb = a.groupby(b) r = gb.nlargest(3) - e = Series([7, 5, 3, 10, 9, 6], - index=MultiIndex.from_arrays([list('aaabbb'), - [3, 2, 1, 9, 5, 8]])) + e = Series([ + 7, 5, 3, 10, 9, 6 + ], index=MultiIndex.from_arrays([list('aaabbb'), [3, 2, 1, 9, 5, 8]])) tm.assert_series_equal(r, e) - a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) gb = a.groupby(b) - e = Series([3, 2, 1, 3, 3, 2], - index=MultiIndex.from_arrays([list('aaabbb'), - [2, 3, 1, 6, 5, 7]])) + e = Series([ + 3, 2, 1, 3, 3, 2 + ], index=MultiIndex.from_arrays([list('aaabbb'), [2, 3, 1, 6, 5, 7]])) assert_series_equal(gb.nlargest(3, keep='last'), e) with tm.assert_produces_warning(FutureWarning): assert_series_equal(gb.nlargest(3, take_last=True), e) @@ -5667,16 +5824,16 @@ def test_nsmallest(self): b = Series(list('a' * 5 + 'b' * 5)) gb = a.groupby(b) r = gb.nsmallest(3) - e = Series([1, 2, 3, 0, 4, 6], - index=MultiIndex.from_arrays([list('aaabbb'), - [0, 4, 1, 6, 7, 8]])) + e = Series([ + 1, 2, 3, 0, 4, 6 + ], index=MultiIndex.from_arrays([list('aaabbb'), [0, 4, 1, 6, 7, 8]])) tm.assert_series_equal(r, e) a = Series([1, 1, 3, 2, 0, 3, 3, 2, 1, 0]) gb = a.groupby(b) - e = Series([0, 1, 1, 0, 1, 2], - index=MultiIndex.from_arrays([list('aaabbb'), - [4, 1, 0, 9, 8, 7]])) + e = Series([ + 0, 1, 1, 0, 1, 2 + ], index=MultiIndex.from_arrays([list('aaabbb'), [4, 1, 0, 9, 8, 7]])) assert_series_equal(gb.nsmallest(3, keep='last'), e) with tm.assert_produces_warning(FutureWarning): assert_series_equal(gb.nsmallest(3, take_last=True), e) @@ -5698,23 +5855,27 @@ def test_transform_doesnt_clobber_ints(self): def test_groupby_categorical_two_columns(self): # https://github.com/pydata/pandas/issues/8138 - d = {'cat': pd.Categorical(["a","b","a","b"], categories=["a", "b", "c"], ordered=True), - 'ints': [1, 1, 2, 2],'val': [10, 20, 30, 40]} + d = {'cat': + pd.Categorical(["a", "b", "a", "b"], categories=["a", "b", "c"], + ordered=True), + 'ints': [1, 1, 2, 2], + 'val': [10, 20, 30, 40]} test = pd.DataFrame(d) # Grouping on a single column groups_single_key = test.groupby("cat") res = groups_single_key.agg('mean') - exp = DataFrame({"ints":[1.5,1.5,np.nan], "val":[20,30,np.nan]}, + exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]}, index=pd.CategoricalIndex(["a", "b", "c"], name="cat")) tm.assert_frame_equal(res, exp) # Grouping on two columns - groups_double_key = test.groupby(["cat","ints"]) + groups_double_key = test.groupby(["cat", "ints"]) res = groups_double_key.agg('mean') - exp = DataFrame({"val":[10,30,20,40,np.nan,np.nan], - "cat": ["a","a","b","b","c","c"], - "ints": [1,2,1,2,1,2]}).set_index(["cat","ints"]) + exp = DataFrame({"val": [10, 30, 20, 40, np.nan, np.nan], + "cat": ["a", "a", "b", "b", "c", "c"], + "ints": [1, 2, 1, 2, 1, 2]}).set_index(["cat", "ints" + ]) tm.assert_frame_equal(res, exp) # GH 10132 @@ -5728,23 +5889,28 @@ def test_groupby_categorical_two_columns(self): test = pd.DataFrame(d) values = pd.cut(test['C1'], [1, 2, 3, 6]) values.name = "cat" - groups_double_key = test.groupby([values,'C2']) + groups_double_key = test.groupby([values, 'C2']) res = groups_double_key.agg('mean') nan = np.nan - idx = MultiIndex.from_product([["(1, 2]", "(2, 3]", "(3, 6]"],[1,2,3,4]], + idx = MultiIndex.from_product([["(1, 2]", "(2, 3]", "(3, 6]"], + [1, 2, 3, 4]], names=["cat", "C2"]) - exp = DataFrame({"C1":[nan,nan,nan,nan, 3, 3,nan,nan, nan,nan, 4, 5], - "C3":[nan,nan,nan,nan, 10,100,nan,nan, nan,nan,200,34]}, index=idx) + exp = DataFrame({"C1": [nan, nan, nan, nan, 3, 3, + nan, nan, nan, nan, 4, 5], + "C3": [nan, nan, nan, nan, 10, 100, + nan, nan, nan, nan, 200, 34]}, index=idx) tm.assert_frame_equal(res, exp) def test_groupby_apply_all_none(self): # Tests to make sure no errors if apply function returns all None # values. Issue 9684. - test_df = DataFrame({'groups': [0,0,1,1], 'random_vars': [8,7,4,5]}) + test_df = DataFrame({'groups': [0, 0, 1, 1], + 'random_vars': [8, 7, 4, 5]}) def test_func(x): pass + result = test_df.groupby('groups').apply(test_func) expected = DataFrame() tm.assert_frame_equal(result, expected) @@ -5754,35 +5920,38 @@ def test_first_last_max_min_on_time_data(self): # Verify that NaT is not in the result of max, min, first and last on # Dataframe with datetime or timedelta values. from datetime import timedelta as td - df_test=DataFrame({'dt':[nan,'2015-07-24 10:10','2015-07-25 11:11','2015-07-23 12:12',nan], - 'td':[nan,td(days=1),td(days=2),td(days=3),nan]}) - df_test.dt=pd.to_datetime(df_test.dt) - df_test['group']='A' - df_ref=df_test[df_test.dt.notnull()] - - grouped_test=df_test.groupby('group') - grouped_ref=df_ref.groupby('group') - - assert_frame_equal(grouped_ref.max(),grouped_test.max()) - assert_frame_equal(grouped_ref.min(),grouped_test.min()) - assert_frame_equal(grouped_ref.first(),grouped_test.first()) - assert_frame_equal(grouped_ref.last(),grouped_test.last()) + df_test = DataFrame( + {'dt': [nan, '2015-07-24 10:10', '2015-07-25 11:11', + '2015-07-23 12:12', nan], + 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) + df_test.dt = pd.to_datetime(df_test.dt) + df_test['group'] = 'A' + df_ref = df_test[df_test.dt.notnull()] + + grouped_test = df_test.groupby('group') + grouped_ref = df_ref.groupby('group') + + assert_frame_equal(grouped_ref.max(), grouped_test.max()) + assert_frame_equal(grouped_ref.min(), grouped_test.min()) + assert_frame_equal(grouped_ref.first(), grouped_test.first()) + assert_frame_equal(grouped_ref.last(), grouped_test.last()) def test_groupby_preserves_sort(self): # Test to ensure that groupby always preserves sort order of original # object. Issue #8588 and #9651 - df = DataFrame({'int_groups':[3,1,0,1,0,3,3,3], - 'string_groups':['z','a','z','a','a','g','g','g'], - 'ints':[8,7,4,5,2,9,1,1], - 'floats':[2.3,5.3,6.2,-2.4,2.2,1.1,1.1,5], - 'strings':['z','d','a','e','word','word2','42','47']}) + df = DataFrame( + {'int_groups': [3, 1, 0, 1, 0, 3, 3, 3], + 'string_groups': ['z', 'a', 'z', 'a', 'a', 'g', 'g', 'g'], + 'ints': [8, 7, 4, 5, 2, 9, 1, 1], + 'floats': [2.3, 5.3, 6.2, -2.4, 2.2, 1.1, 1.1, 5], + 'strings': ['z', 'd', 'a', 'e', 'word', 'word2', '42', '47']}) # Try sorting on different types and with different group types - for sort_column in ['ints', 'floats', 'strings', ['ints','floats'], - ['ints','strings']]: + for sort_column in ['ints', 'floats', 'strings', ['ints', 'floats'], + ['ints', 'strings']]: for group_column in ['int_groups', 'string_groups', - ['int_groups','string_groups']]: + ['int_groups', 'string_groups']]: df = df.sort_values(by=sort_column) @@ -5819,7 +5988,7 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = com._asarray_tuplesafe(tups) expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): - assert(result[k] == v) + assert (result[k] == v) def test_decons(): @@ -5830,20 +5999,19 @@ def testit(label_list, shape): label_list2 = decons_group_index(group_index, shape) for a, b in zip(label_list, label_list2): - assert(np.array_equal(a, b)) + assert (np.array_equal(a, b)) shape = (4, 5, 6) - label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), - np.tile([0, 2, 4, 3, 0, 1, 2, 3], 100), - np.tile([5, 1, 0, 2, 3, 0, 5, 4], 100)] + label_list = [np.tile([0, 1, 2, 3, 0, 1, 2, 3], 100), np.tile( + [0, 2, 4, 3, 0, 1, 2, 3], 100), np.tile( + [5, 1, 0, 2, 3, 0, 5, 4], 100)] testit(label_list, shape) shape = (10000, 10000) - label_list = [np.tile(np.arange(10000), 5), - np.tile(np.arange(10000), 5)] + label_list = [np.tile(np.arange(10000), 5), np.tile(np.arange(10000), 5)] testit(label_list, shape) if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', - '-s'], exit=False) + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' + ], exit=False) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 4dcc390787908..2c909d653df85 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- # pylint: disable=E1101,E1103,W0232 +# TODO(wesm): fix long line flake8 issues +# flake8: noqa + from datetime import datetime, timedelta, time from pandas import compat -from pandas.compat import (long, is_platform_windows, range, - lrange, lzip, u, zip, PY3) +from pandas.compat import (long, is_platform_windows, range, lrange, lzip, u, + zip, PY3) from itertools import combinations import operator import re @@ -14,15 +17,14 @@ import numpy as np -from pandas import (period_range, date_range, Categorical, Series, - DataFrame, Index, Float64Index, Int64Index, RangeIndex, - MultiIndex, CategoricalIndex, DatetimeIndex, - TimedeltaIndex, PeriodIndex) +from pandas import (period_range, date_range, Categorical, Series, DataFrame, + Index, Float64Index, Int64Index, RangeIndex, MultiIndex, + CategoricalIndex, DatetimeIndex, TimedeltaIndex, + PeriodIndex) from pandas.core.index import InvalidIndexError from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, assert_copy) - import pandas.util.testing as tm import pandas.core.config as cf @@ -32,6 +34,9 @@ from pandas.lib import Timestamp from itertools import product +if PY3: + unicode = lambda x: x + class Base(object): """ base class for index sub-class tests """ @@ -43,7 +48,7 @@ def setup_indices(self): for name, ind in self.indices.items(): setattr(self, name, ind) - def verify_pickle(self,index): + def verify_pickle(self, index): unpickled = self.round_trip_pickle(index) self.assertTrue(index.equals(unpickled)) @@ -64,8 +69,8 @@ def test_shift(self): def test_create_index_existing_name(self): - # GH11193, when an existing index is passed, and a new name is not specified, the new index should inherit the - # previous object name + # GH11193, when an existing index is passed, and a new name is not + # specified, the new index should inherit the previous object name expected = self.create_index() if not isinstance(expected, MultiIndex): expected.name = 'foo' @@ -78,57 +83,54 @@ def test_create_index_existing_name(self): else: expected.names = ['foo', 'bar'] result = pd.Index(expected) - tm.assert_index_equal(result, Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), - ('qux', 'one'), ('qux', 'two')], dtype='object'), - names=['foo', 'bar'])) + tm.assert_index_equal( + result, Index(Index([('foo', 'one'), ('foo', 'two'), + ('bar', 'one'), ('baz', 'two'), + ('qux', 'one'), ('qux', 'two')], + dtype='object'), + names=['foo', 'bar'])) result = pd.Index(expected, names=['A', 'B']) - tm.assert_index_equal(result, Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), - ('qux', 'one'), ('qux', 'two')], dtype='object'), - names=['A', 'B'])) + tm.assert_index_equal( + result, + Index(Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')], + dtype='object'), names=['A', 'B'])) def test_numeric_compat(self): idx = self.create_index() - tm.assertRaisesRegexp(TypeError, - "cannot perform __mul__", + tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", lambda: idx * 1) - tm.assertRaisesRegexp(TypeError, - "cannot perform __mul__", + tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", lambda: 1 * idx) div_err = "cannot perform __truediv__" if PY3 \ else "cannot perform __div__" - tm.assertRaisesRegexp(TypeError, - div_err, - lambda: idx / 1) - tm.assertRaisesRegexp(TypeError, - div_err, - lambda: 1 / idx) - tm.assertRaisesRegexp(TypeError, - "cannot perform __floordiv__", + tm.assertRaisesRegexp(TypeError, div_err, lambda: idx / 1) + tm.assertRaisesRegexp(TypeError, div_err, lambda: 1 / idx) + tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", lambda: idx // 1) - tm.assertRaisesRegexp(TypeError, - "cannot perform __floordiv__", + tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", lambda: 1 // idx) def test_logical_compat(self): idx = self.create_index() - tm.assertRaisesRegexp(TypeError, - 'cannot perform all', + tm.assertRaisesRegexp(TypeError, 'cannot perform all', lambda: idx.all()) - tm.assertRaisesRegexp(TypeError, - 'cannot perform any', + tm.assertRaisesRegexp(TypeError, 'cannot perform any', lambda: idx.any()) def test_boolean_context_compat(self): # boolean context compat idx = self.create_index() + def f(): if idx: pass - tm.assertRaisesRegexp(ValueError,'The truth value of a',f) + + tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) def test_reindex_base(self): idx = self.create_index() @@ -157,7 +159,7 @@ def test_ndarray_compat_properties(self): def test_repr_roundtrip(self): idx = self.create_index() - tm.assert_index_equal(eval(repr(idx)),idx) + tm.assert_index_equal(eval(repr(idx)), idx) def test_str(self): @@ -209,7 +211,7 @@ def test_set_name_methods(self): self.assertIsNone(res) self.assertEqual(ind.name, new_name) self.assertEqual(ind.names, [new_name]) - #with assertRaisesRegexp(TypeError, "list-like"): + # with assertRaisesRegexp(TypeError, "list-like"): # # should still fail even if it would be the right length # ind.set_names("a") with assertRaisesRegexp(ValueError, "Level must be None"): @@ -223,8 +225,7 @@ def test_set_name_methods(self): def test_hash_error(self): for ind in self.indices.values(): - with tm.assertRaisesRegexp(TypeError, - "unhashable type: %r" % + with tm.assertRaisesRegexp(TypeError, "unhashable type: %r" % type(ind).__name__): hash(ind) @@ -252,7 +253,7 @@ def test_duplicates(self): continue if isinstance(ind, MultiIndex): continue - idx = self._holder([ind[0]]*5) + idx = self._holder([ind[0]] * 5) self.assertFalse(idx.is_unique) self.assertTrue(idx.has_duplicates) @@ -261,7 +262,7 @@ def test_duplicates(self): idx.name = 'foo' result = idx.drop_duplicates() self.assertEqual(result.name, 'foo') - self.assert_index_equal(result, Index([ind[0]],name='foo')) + self.assert_index_equal(result, Index([ind[0]], name='foo')) def test_sort(self): for ind in self.indices.values(): @@ -286,7 +287,7 @@ def test_view(self): def test_compat(self): for ind in self.indices.values(): - self.assertEqual(ind.tolist(),list(ind)) + self.assertEqual(ind.tolist(), list(ind)) def test_argsort(self): for k, ind in self.indices.items(): @@ -310,14 +311,15 @@ def test_take(self): for k, ind in self.indices.items(): # separate - if k in ['boolIndex','tuples','empty']: + if k in ['boolIndex', 'tuples', 'empty']: continue result = ind.take(indexer) expected = ind[indexer] self.assertTrue(result.equals(expected)) - if not isinstance(ind, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): + if not isinstance(ind, + (DatetimeIndex, PeriodIndex, TimedeltaIndex)): # GH 10791 with tm.assertRaises(AttributeError): ind.freq @@ -326,7 +328,8 @@ def test_setops_errorcases(self): for name, idx in compat.iteritems(self.indices): # # non-iterable input cases = [0.5, 'xxx'] - methods = [idx.intersection, idx.union, idx.difference, idx.sym_diff] + methods = [idx.intersection, idx.union, idx.difference, + idx.sym_diff] for method in methods: for case in cases: @@ -346,7 +349,8 @@ def test_intersection_base(self): self.assertTrue(tm.equalContents(intersect, second)) # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] + cases = [klass(second.values) + for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" @@ -372,7 +376,8 @@ def test_union_base(self): self.assertTrue(tm.equalContents(union, everything)) # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] + cases = [klass(second.values) + for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" @@ -402,7 +407,8 @@ def test_difference_base(self): self.assertTrue(tm.equalContents(result, answer)) # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] + cases = [klass(second.values) + for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" @@ -434,7 +440,8 @@ def test_symmetric_diff(self): self.assertTrue(tm.equalContents(result, answer)) # GH 10149 - cases = [klass(second.values) for klass in [np.array, Series, list]] + cases = [klass(second.values) + for klass in [np.array, Series, list]] for case in cases: if isinstance(idx, PeriodIndex): msg = "can only call with other PeriodIndex-ed objects" @@ -459,9 +466,8 @@ def test_insert_base(self): if not len(idx): continue - #test 0th element - self.assertTrue(idx[0:4].equals( - result.insert(0, idx[0]))) + # test 0th element + self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) def test_delete_base(self): @@ -557,10 +563,10 @@ def test_numpy_ufuncs(self): for name, idx in compat.iteritems(self.indices): for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, - np.log1p, np.sqrt, np.sin, np.cos, - np.tan, np.arcsin, np.arccos, np.arctan, - np.sinh, np.cosh, np.tanh, np.arcsinh, np.arccosh, - np.arctanh, np.deg2rad, np.rad2deg]: + np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, + np.arccos, np.arctan, np.sinh, np.cosh, np.tanh, + np.arcsinh, np.arccosh, np.arctanh, np.deg2rad, + np.rad2deg]: if isinstance(idx, pd.tseries.base.DatetimeIndexOpsMixin): # raise TypeError or ValueError (PeriodIndex) # PeriodIndex behavior should be changed in future version @@ -679,21 +685,19 @@ class TestIndex(Base, tm.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.indices = dict( - unicodeIndex=tm.makeUnicodeIndex(100), - strIndex=tm.makeStringIndex(100), - dateIndex=tm.makeDateIndex(100), - periodIndex=tm.makePeriodIndex(100), - tdIndex=tm.makeTimedeltaIndex(100), - intIndex=tm.makeIntIndex(100), - rangeIndex=tm.makeIntIndex(100), - floatIndex=tm.makeFloatIndex(100), - boolIndex=Index([True, False]), - catIndex=tm.makeCategoricalIndex(100), - empty=Index([]), - tuples=MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'], - [1, 2, 3])) - ) + self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100), + strIndex=tm.makeStringIndex(100), + dateIndex=tm.makeDateIndex(100), + periodIndex=tm.makePeriodIndex(100), + tdIndex=tm.makeTimedeltaIndex(100), + intIndex=tm.makeIntIndex(100), + rangeIndex=tm.makeIntIndex(100), + floatIndex=tm.makeFloatIndex(100), + boolIndex=Index([True, False]), + catIndex=tm.makeCategoricalIndex(100), + empty=Index([]), + tuples=MultiIndex.from_tuples(lzip( + ['foo', 'bar', 'baz'], [1, 2, 3]))) self.setup_indices() def create_index(self): @@ -742,15 +746,19 @@ def test_construction_list_mixed_tuples(self): # 10697 # if we are constructing from a mixed list of tuples, make sure that we # are independent of the sorting order - idx1 = Index([('A',1),'B']) - self.assertIsInstance(idx1, Index) and self.assertNotInstance(idx1, MultiIndex) - idx2 = Index(['B',('A',1)]) - self.assertIsInstance(idx2, Index) and self.assertNotInstance(idx2, MultiIndex) + idx1 = Index([('A', 1), 'B']) + self.assertIsInstance(idx1, Index) and self.assertNotInstance( + idx1, MultiIndex) + idx2 = Index(['B', ('A', 1)]) + self.assertIsInstance(idx2, Index) and self.assertNotInstance( + idx2, MultiIndex) def test_constructor_from_series(self): - expected = DatetimeIndex([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]) - s = Series([Timestamp('20110101'),Timestamp('20120101'),Timestamp('20130101')]) + expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'), + Timestamp('20130101')]) + s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp( + '20130101')]) result = Index(s) self.assertTrue(result.equals(expected)) result = DatetimeIndex(s) @@ -758,37 +766,44 @@ def test_constructor_from_series(self): # GH 6273 # create from a series, passing a freq - s = Series(pd.to_datetime(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'])) + s = Series(pd.to_datetime(['1-1-1990', '2-1-1990', '3-1-1990', + '4-1-1990', '5-1-1990'])) result = DatetimeIndex(s, freq='MS') - expected = DatetimeIndex(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'],freq='MS') + expected = DatetimeIndex( + ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990' + ], freq='MS') self.assertTrue(result.equals(expected)) - df = pd.DataFrame(np.random.rand(5,3)) - df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'] + df = pd.DataFrame(np.random.rand(5, 3)) + df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', + '5-1-1990'] result = DatetimeIndex(df['date'], freq='MS') self.assertTrue(result.equals(expected)) self.assertEqual(df['date'].dtype, object) - exp = pd.Series(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'], name='date') + exp = pd.Series( + ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990' + ], name='date') self.assert_series_equal(df['date'], exp) # GH 6274 # infer freq of same result = pd.infer_freq(df['date']) - self.assertEqual(result,'MS') + self.assertEqual(result, 'MS') def test_constructor_ndarray_like(self): # GH 5460#issuecomment-44474502 # it should be possible to convert any object that satisfies the numpy # ndarray interface directly into an Index class ArrayLike(object): + def __init__(self, array): self.array = array + def __array__(self, dtype=None): return self.array - for array in [np.arange(5), - np.array(['a', 'b', 'c']), + for array in [np.arange(5), np.array(['a', 'b', 'c']), date_range('2000-01-01', periods=3).values]: expected = pd.Index(array) result = pd.Index(ArrayLike(array)) @@ -815,65 +830,82 @@ def test_constructor_simple_new(self): def test_constructor_dtypes(self): - for idx in [Index(np.array([1, 2, 3], dtype=int)), - Index(np.array([1, 2, 3], dtype=int), dtype=int), - Index(np.array([1., 2., 3.], dtype=float), dtype=int), - Index([1, 2, 3], dtype=int), - Index([1., 2., 3.], dtype=int)]: + for idx in [Index(np.array([1, 2, 3], dtype=int)), Index( + np.array( + [1, 2, 3], dtype=int), dtype=int), Index( + np.array( + [1., 2., 3.], dtype=float), dtype=int), Index( + [1, 2, 3], dtype=int), Index( + [1., 2., 3.], dtype=int)]: self.assertIsInstance(idx, Int64Index) - for idx in [Index(np.array([1., 2., 3.], dtype=float)), - Index(np.array([1, 2, 3], dtype=int), dtype=float), - Index(np.array([1., 2., 3.], dtype=float), dtype=float), - Index([1, 2, 3], dtype=float), - Index([1., 2., 3.], dtype=float)]: + for idx in [Index(np.array([1., 2., 3.], dtype=float)), Index( + np.array( + [1, 2, 3], dtype=int), dtype=float), Index( + np.array( + [1., 2., 3.], dtype=float), dtype=float), Index( + [1, 2, 3], dtype=float), Index( + [1., 2., 3.], dtype=float)]: self.assertIsInstance(idx, Float64Index) - for idx in [Index(np.array([True, False, True], dtype=bool)), - Index([True, False, True]), - Index(np.array([True, False, True], dtype=bool), dtype=bool), - Index([True, False, True], dtype=bool)]: + for idx in [Index(np.array( + [True, False, True], dtype=bool)), Index([True, False, True]), + Index( + np.array( + [True, False, True], dtype=bool), dtype=bool), + Index( + [True, False, True], dtype=bool)]: self.assertIsInstance(idx, Index) self.assertEqual(idx.dtype, object) - for idx in [Index(np.array([1, 2, 3], dtype=int), dtype='category'), - Index([1, 2, 3], dtype='category'), - Index(np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-02')]), dtype='category'), - Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype='category')]: + for idx in [Index( + np.array([1, 2, 3], dtype=int), dtype='category'), Index( + [1, 2, 3], dtype='category'), Index( + np.array([np.datetime64('2011-01-01'), np.datetime64( + '2011-01-02')]), dtype='category'), Index( + [datetime(2011, 1, 1), datetime(2011, 1, 2) + ], dtype='category')]: self.assertIsInstance(idx, CategoricalIndex) - for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-02')])), + for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64( + '2011-01-02')])), Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])]: self.assertIsInstance(idx, DatetimeIndex) - for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-02')]), dtype=object), - Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype=object)]: + for idx in [Index( + np.array([np.datetime64('2011-01-01'), np.datetime64( + '2011-01-02')]), dtype=object), Index( + [datetime(2011, 1, 1), datetime(2011, 1, 2) + ], dtype=object)]: self.assertNotIsInstance(idx, DatetimeIndex) self.assertIsInstance(idx, Index) self.assertEqual(idx.dtype, object) - for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')])), - Index([timedelta(1), timedelta(1)])]: + for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64( + 1, 'D')])), Index([timedelta(1), timedelta(1)])]: self.assertIsInstance(idx, TimedeltaIndex) - for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), dtype=object), - Index([timedelta(1), timedelta(1)], dtype=object)]: + for idx in [Index( + np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), + dtype=object), Index( + [timedelta(1), timedelta(1)], dtype=object)]: self.assertNotIsInstance(idx, TimedeltaIndex) self.assertIsInstance(idx, Index) self.assertEqual(idx.dtype, object) def test_view_with_args(self): - restricted = ['unicodeIndex','strIndex','catIndex','boolIndex','empty'] + restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex', + 'empty'] for i in restricted: ind = self.indices[i] # with arguments - self.assertRaises(TypeError, lambda : ind.view('i8')) + self.assertRaises(TypeError, lambda: ind.view('i8')) # these are ok - for i in list(set(self.indices.keys())-set(restricted)): + for i in list(set(self.indices.keys()) - set(restricted)): ind = self.indices[i] # with arguments @@ -883,8 +915,8 @@ def test_legacy_pickle_identity(self): # GH 8431 pth = tm.get_data_path() - s1 = pd.read_pickle(os.path.join(pth,'s1-0.12.0.pickle')) - s2 = pd.read_pickle(os.path.join(pth,'s2-0.12.0.pickle')) + s1 = pd.read_pickle(os.path.join(pth, 's1-0.12.0.pickle')) + s2 = pd.read_pickle(os.path.join(pth, 's2-0.12.0.pickle')) self.assertFalse(s1.index.identical(s2.index)) self.assertFalse(s1.index.equals(s2.index)) @@ -918,22 +950,20 @@ def test_insert(self): # validate neg/pos inserts result = Index(['b', 'c', 'd']) - #test 0th element - self.assertTrue(Index(['a', 'b', 'c', 'd']).equals( - result.insert(0, 'a'))) + # test 0th element + self.assertTrue(Index(['a', 'b', 'c', 'd']).equals(result.insert(0, + 'a'))) - #test Nth element that follows Python list behavior - self.assertTrue(Index(['b', 'c', 'e', 'd']).equals( - result.insert(-1, 'e'))) + # test Nth element that follows Python list behavior + self.assertTrue(Index(['b', 'c', 'e', 'd']).equals(result.insert(-1, + 'e'))) - #test loc +/- neq (0, -1) - self.assertTrue(result.insert(1, 'z').equals( - result.insert(-2, 'z'))) + # test loc +/- neq (0, -1) + self.assertTrue(result.insert(1, 'z').equals(result.insert(-2, 'z'))) - #test empty + # test empty null_index = Index([]) - self.assertTrue(Index(['a']).equals( - null_index.insert(0, 'a'))) + self.assertTrue(Index(['a']).equals(null_index.insert(0, 'a'))) def test_delete(self): idx = Index(['a', 'b', 'c', 'd'], name='idx') @@ -1021,9 +1051,13 @@ def test_nanosecond_index_access(self): first_value = x.asof(x.index[0]) # this does not yet work, as parsing strings is done via dateutil - #self.assertEqual(first_value, x['2013-01-01 00:00:00.000000050+0000']) + # self.assertEqual(first_value, + # x['2013-01-01 00:00:00.000000050+0000']) - self.assertEqual(first_value, x[Timestamp(np.datetime64('2013-01-01 00:00:00.000000050+0000', 'ns'))]) + self.assertEqual( + first_value, + x[Timestamp(np.datetime64('2013-01-01 00:00:00.000000050+0000', + 'ns'))]) def test_comparators(self): index = self.dateIndex @@ -1127,8 +1161,8 @@ def test_intersection(self): self.assertEqual(result3.name, expected3.name) # non-monotonic non-unique - idx1 = Index(['A','B','A','C']) - idx2 = Index(['B','D']) + idx1 = Index(['A', 'B', 'A', 'C']) + idx2 = Index(['B', 'D']) expected = Index(['B'], dtype='object') result = idx1.intersection(idx2) self.assertTrue(result.equals(expected)) @@ -1345,23 +1379,22 @@ def test_format(self): index = Index([datetime.now()]) - - # windows has different precision on datetime.datetime.now (it doesn't include us - # since the default for Timestamp shows these but Index formating does not - # we are skipping + # windows has different precision on datetime.datetime.now (it doesn't + # include us since the default for Timestamp shows these but Index + # formating does not we are skipping if not is_platform_windows(): formatted = index.format() expected = [str(index[0])] self.assertEqual(formatted, expected) # 2845 - index = Index([1, 2.0+3.0j, np.nan]) + index = Index([1, 2.0 + 3.0j, np.nan]) formatted = index.format() expected = [str(index[0]), str(index[1]), u('NaN')] self.assertEqual(formatted, expected) # is this really allowed? - index = Index([1, 2.0+3.0j, None]) + index = Index([1, 2.0 + 3.0j, None]) formatted = index.format() expected = [str(index[0]), str(index[1]), u('NaN')] self.assertEqual(formatted, expected) @@ -1453,15 +1486,19 @@ def test_get_indexer_nearest(self): actual = idx.get_indexer([0, 5, 9], method=method, tolerance=0) tm.assert_numpy_array_equal(actual, [0, 5, 9]) - for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], [0, 2, 9]]): + for method, expected in zip(all_methods, [[0, 1, 8], [1, 2, 9], [0, 2, + 9]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) tm.assert_numpy_array_equal(actual, expected) - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, tolerance=1) + actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, + tolerance=1) tm.assert_numpy_array_equal(actual, expected) - for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], [0, 2, -1]]): - actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, tolerance=0.2) + for method, expected in zip(all_methods, [[0, -1, -1], [-1, 2, -1], + [0, 2, -1]]): + actual = idx.get_indexer([0.2, 1.8, 8.5], method=method, + tolerance=0.2) tm.assert_numpy_array_equal(actual, expected) with tm.assertRaisesRegexp(ValueError, 'limit argument'): @@ -1475,7 +1512,8 @@ def test_get_indexer_nearest_decreasing(self): actual = idx.get_indexer([0, 5, 9], method=method) tm.assert_numpy_array_equal(actual, [9, 4, 0]) - for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], [9, 7, 0]]): + for method, expected in zip(all_methods, [[8, 7, 0], [9, 8, 1], [9, 7, + 0]]): actual = idx.get_indexer([0.2, 1.8, 8.5], method=method) tm.assert_numpy_array_equal(actual, expected) @@ -1666,8 +1704,9 @@ def test_tuple_union_bug(self): aidx1 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], dtype=[('num', int), ('let', 'a1')]) - aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B'), (1, 'C'), (2, - 'C')], dtype=[('num', int), ('let', 'a1')]) + aidx2 = np.array([(1, 'A'), (2, 'A'), (1, 'B'), + (2, 'B'), (1, 'C'), (2, 'C')], + dtype=[('num', int), ('let', 'a1')]) idx1 = pandas.Index(aidx1) idx2 = pandas.Index(aidx2) @@ -1694,8 +1733,7 @@ def test_get_set_value(self): values = np.random.randn(100) date = self.dateIndex[67] - assert_almost_equal(self.dateIndex.get_value(values, date), - values[67]) + assert_almost_equal(self.dateIndex.get_value(values, date), values[67]) self.dateIndex.set_value(values, date, 10) self.assertEqual(values[67], 10) @@ -1748,7 +1786,7 @@ def check_idx(idx): idx.name = 'foobar' tm.assert_numpy_array_equal(expected, - idx.isin(values, level='foobar')) + idx.isin(values, level='foobar')) self.assertRaises(KeyError, idx.isin, values, level='xyzzy') self.assertRaises(KeyError, idx.isin, values, level=np.nan) @@ -1763,7 +1801,8 @@ def test_boolean_cmp(self): idx = Index(values) res = (idx == values) - tm.assert_numpy_array_equal(res,np.array([True,True,True,True],dtype=bool)) + tm.assert_numpy_array_equal(res, np.array( + [True, True, True, True], dtype=bool)) def test_get_level_values(self): result = self.strIndex.get_level_values(0) @@ -1790,15 +1829,16 @@ def test_str_attribute(self): idx = Index([' jack', 'jill ', ' jesse ', 'frank']) for method in methods: expected = Index([getattr(str, method)(x) for x in idx.values]) - tm.assert_index_equal(getattr(Index.str, method)(idx.str), expected) + tm.assert_index_equal( + getattr(Index.str, method)(idx.str), expected) # create a few instances that are not able to use .str accessor - indices = [Index(range(5)), - tm.makeDateIndex(10), + indices = [Index(range(5)), tm.makeDateIndex(10), MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), PeriodIndex(start='2000', end='2010', freq='A')] for idx in indices: - with self.assertRaisesRegexp(AttributeError, 'only use .str accessor'): + with self.assertRaisesRegexp(AttributeError, + 'only use .str accessor'): idx.str.repeat(2) idx = Index(['a b c', 'd e', 'f']) @@ -1806,8 +1846,7 @@ def test_str_attribute(self): tm.assert_index_equal(idx.str.split(), expected) tm.assert_index_equal(idx.str.split(expand=False), expected) - expected = MultiIndex.from_tuples([('a', 'b', 'c'), - ('d', 'e', np.nan), + expected = MultiIndex.from_tuples([('a', 'b', 'c'), ('d', 'e', np.nan), ('f', np.nan, np.nan)]) tm.assert_index_equal(idx.str.split(expand=True), expected) @@ -1831,10 +1870,9 @@ def test_tab_completion(self): def test_indexing_doesnt_change_class(self): idx = Index([1, 2, 3, 'a', 'b', 'c']) - self.assertTrue(idx[1:3].identical( - pd.Index([2, 3], dtype=np.object_))) - self.assertTrue(idx[[0,1]].identical( - pd.Index([1, 2], dtype=np.object_))) + self.assertTrue(idx[1:3].identical(pd.Index([2, 3], dtype=np.object_))) + self.assertTrue(idx[[0, 1]].identical(pd.Index( + [1, 2], dtype=np.object_))) def test_outer_join_sort(self): left_idx = Index(np.random.permutation(15)) @@ -1888,6 +1926,7 @@ def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): def test_reindex_preserves_type_if_target_is_empty_list_or_array(self): # GH7774 idx = pd.Index(list('abc')) + def get_reindex_type(target): return idx.reindex(target)[0].dtype.type @@ -1899,6 +1938,7 @@ def get_reindex_type(target): def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self): # GH7774 idx = pd.Index(list('abc')) + def get_reindex_type(target): return idx.reindex(target)[0].dtype.type @@ -1906,15 +1946,14 @@ def get_reindex_type(target): self.assertEqual(get_reindex_type(pd.Float64Index([])), np.float64) self.assertEqual(get_reindex_type(pd.DatetimeIndex([])), np.datetime64) - reindexed = idx.reindex(pd.MultiIndex([pd.Int64Index([]), - pd.Float64Index([])], - [[], []]))[0] + reindexed = idx.reindex(pd.MultiIndex( + [pd.Int64Index([]), pd.Float64Index([])], [[], []]))[0] self.assertEqual(reindexed.levels[0].dtype.type, np.int64) self.assertEqual(reindexed.levels[1].dtype.type, np.float64) def test_groupby(self): idx = Index(range(5)) - groups = idx.groupby(np.array([1,1,2,2,2])) + groups = idx.groupby(np.array([1, 1, 2, 2, 2])) exp = {1: [0, 1], 2: [2, 3, 4]} tm.assert_dict_equal(groups, exp) @@ -1923,7 +1962,8 @@ def test_equals_op_multiindex(self): # test comparisons of multiindex from pandas.compat import StringIO df = pd.read_csv(StringIO('a,b,c\n1,2,3\n4,5,6'), index_col=[0, 1]) - tm.assert_numpy_array_equal(df.index == df.index, np.array([True, True])) + tm.assert_numpy_array_equal(df.index == df.index, + np.array([True, True])) mi1 = MultiIndex.from_tuples([(1, 2), (4, 5)]) tm.assert_numpy_array_equal(df.index == mi1, np.array([True, True])) @@ -1936,10 +1976,11 @@ def test_equals_op_multiindex(self): index_a = Index(['foo', 'bar', 'baz']) with tm.assertRaisesRegexp(ValueError, "Lengths must match"): df.index == index_a - tm.assert_numpy_array_equal(index_a == mi3, np.array([False, False, False])) + tm.assert_numpy_array_equal(index_a == mi3, + np.array([False, False, False])) def test_conversion_preserves_name(self): - #GH 10875 + # GH 10875 i = pd.Index(['01:02:03', '01:02:04'], name='label') self.assertEqual(i.name, pd.to_datetime(i).name) self.assertEqual(i.name, pd.to_timedelta(i).name) @@ -1960,31 +2001,39 @@ def test_string_index_repr(self): # multiple lines idx = pd.Index(['a', 'bb', 'ccc'] * 10) if PY3: - expected = u"""Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', + expected = u"""\ +Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], dtype='object')""" + self.assertEqual(repr(idx), expected) else: - expected = u"""Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', + expected = u"""\ +Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object')""" + self.assertEqual(unicode(idx), expected) # truncated idx = pd.Index(['a', 'bb', 'ccc'] * 100) if PY3: - expected = u"""Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', + expected = u"""\ +Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', ... 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], dtype='object', length=300)""" + self.assertEqual(repr(idx), expected) else: - expected = u"""Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', + expected = u"""\ +Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', ... u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], dtype='object', length=300)""" + self.assertEqual(unicode(idx), expected) # short @@ -1993,7 +2042,8 @@ def test_string_index_repr(self): expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')""" self.assertEqual(repr(idx), expected) else: - expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')""" + expected = u"""\ +Index([u'あ', u'いい', u'ううう'], dtype='object')""" self.assertEqual(unicode(idx), expected) # multiple lines @@ -2003,12 +2053,14 @@ def test_string_index_repr(self): 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], dtype='object')""" + self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object')""" + self.assertEqual(unicode(idx), expected) # truncated @@ -2018,12 +2070,14 @@ def test_string_index_repr(self): ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], dtype='object', length=300)""" + self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', ... u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object', length=300)""" + self.assertEqual(unicode(idx), expected) # Emable Unicode option ----------------------------------------- @@ -2046,6 +2100,7 @@ def test_string_index_repr(self): 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], dtype='object')""" + self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', @@ -2053,6 +2108,7 @@ def test_string_index_repr(self): u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object')""" + self.assertEqual(unicode(idx), expected) # truncated @@ -2064,6 +2120,7 @@ def test_string_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], dtype='object', length=300)""" + self.assertEqual(repr(idx), expected) else: expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', @@ -2072,6 +2129,7 @@ def test_string_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], dtype='object', length=300)""" + self.assertEqual(unicode(idx), expected) @@ -2079,13 +2137,14 @@ class TestCategoricalIndex(Base, tm.TestCase): _holder = CategoricalIndex def setUp(self): - self.indices = dict(catIndex = tm.makeCategoricalIndex(100)) + self.indices = dict(catIndex=tm.makeCategoricalIndex(100)) self.setup_indices() def create_index(self, categories=None, ordered=False): if categories is None: categories = list('cab') - return CategoricalIndex(list('aabbca'), categories=categories, ordered=ordered) + return CategoricalIndex( + list('aabbca'), categories=categories, ordered=ordered) def test_construction(self): @@ -2093,49 +2152,55 @@ def test_construction(self): categories = ci.categories result = Index(ci) - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) result = Index(ci.values) - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) # empty result = CategoricalIndex(categories=categories) self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes, np.array([],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) self.assertFalse(result.ordered) # passing categories - result = CategoricalIndex(list('aabbca'),categories=categories) + result = CategoricalIndex(list('aabbca'), categories=categories) self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) self.assertTrue(result.categories.equals(Index(list('abc')))) - tm.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) - result = CategoricalIndex(c,categories=categories) + result = CategoricalIndex(c, categories=categories) self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) - ci = CategoricalIndex(c,categories=list('abcd')) + ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) self.assertTrue(result.categories.equals(Index(categories))) - tm.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,2,0],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab')) self.assertTrue(result.categories.equals(Index(list('ab')))) - tm.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,-1,0],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, -1, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab'), ordered=True) self.assertTrue(result.categories.equals(Index(list('ab')))) - tm.assert_numpy_array_equal(result.codes,np.array([0,0,1,1,-1,0],dtype='int8')) + tm.assert_numpy_array_equal(result.codes, np.array( + [0, 0, 1, 1, -1, 0], dtype='int8')) self.assertTrue(result.ordered) # turn me to an Index @@ -2149,19 +2214,21 @@ def test_construction_with_dtype(self): ci = self.create_index(categories=list('abc')) result = Index(np.array(ci), dtype='category') - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) result = Index(np.array(ci).tolist(), dtype='category') - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) # these are generally only equal when the categories are reordered ci = self.create_index() - result = Index(np.array(ci), dtype='category').reorder_categories(ci.categories) - tm.assert_index_equal(result,ci,exact=True) + result = Index( + np.array(ci), dtype='category').reorder_categories(ci.categories) + tm.assert_index_equal(result, ci, exact=True) # make sure indexes are handled - expected = CategoricalIndex([0,1,2], categories=[0,1,2], ordered=True) + expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], + ordered=True) idx = Index(range(3)) result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True) @@ -2172,30 +2239,34 @@ def test_disallow_set_ops(self): # set ops (+/-) raise TypeError idx = pd.Index(pd.Categorical(['a', 'b'])) - self.assertRaises(TypeError, lambda : idx - idx) - self.assertRaises(TypeError, lambda : idx + idx) - self.assertRaises(TypeError, lambda : idx - ['a','b']) - self.assertRaises(TypeError, lambda : idx + ['a','b']) - self.assertRaises(TypeError, lambda : ['a','b'] - idx) - self.assertRaises(TypeError, lambda : ['a','b'] + idx) + self.assertRaises(TypeError, lambda: idx - idx) + self.assertRaises(TypeError, lambda: idx + idx) + self.assertRaises(TypeError, lambda: idx - ['a', 'b']) + self.assertRaises(TypeError, lambda: idx + ['a', 'b']) + self.assertRaises(TypeError, lambda: ['a', 'b'] - idx) + self.assertRaises(TypeError, lambda: ['a', 'b'] + idx) def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) - tm.assert_index_equal(result, CategoricalIndex(list('aabbca'), categories=list('cab'))) + tm.assert_index_equal(result, CategoricalIndex( + list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) - tm.assert_index_equal(result, CategoricalIndex(list('ffggef'), categories=list('efg'))) + tm.assert_index_equal(result, CategoricalIndex( + list('ffggef'), categories=list('efg'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) - tm.assert_index_equal(result, CategoricalIndex(list('aabbca'), categories=list('cabd'))) + tm.assert_index_equal(result, CategoricalIndex( + list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) - tm.assert_index_equal(result, CategoricalIndex(list('aabb') + [np.nan] + ['a'], categories=list('ab'))) + tm.assert_index_equal(result, CategoricalIndex( + list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() @@ -2203,10 +2274,12 @@ def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() - tm.assert_index_equal(result, CategoricalIndex(list('aabbca'), categories=list('cabdef'), ordered=True)) + tm.assert_index_equal(result, CategoricalIndex( + list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid - self.assertRaises(ValueError, lambda : ci.set_categories(list('cab'), inplace=True)) + self.assertRaises(ValueError, lambda: ci.set_categories( + list('cab'), inplace=True)) def test_contains(self): @@ -2222,22 +2295,24 @@ def test_contains(self): self.assertFalse(1 in ci) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - ci = CategoricalIndex(list('aabbca'), categories=list('cabdef') + [np.nan]) + ci = CategoricalIndex( + list('aabbca'), categories=list('cabdef') + [np.nan]) self.assertFalse(np.nan in ci) - ci = CategoricalIndex(list('aabbca') + [np.nan], categories=list('cabdef')) + ci = CategoricalIndex( + list('aabbca') + [np.nan], categories=list('cabdef')) self.assertTrue(np.nan in ci) def test_min_max(self): ci = self.create_index(ordered=False) - self.assertRaises(TypeError, lambda : ci.min()) - self.assertRaises(TypeError, lambda : ci.max()) + self.assertRaises(TypeError, lambda: ci.min()) + self.assertRaises(TypeError, lambda: ci.max()) ci = self.create_index(ordered=True) - self.assertEqual(ci.min(),'c') - self.assertEqual(ci.max(),'b') + self.assertEqual(ci.min(), 'c') + self.assertEqual(ci.max(), 'b') def test_append(self): @@ -2246,50 +2321,54 @@ def test_append(self): # append cats with the same categories result = ci[:3].append(ci[3:]) - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) foos = [ci[:1], ci[1:3], ci[3:]] result = foos[0].append(foos[1:]) - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) # empty result = ci.append([]) - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) # appending with different categories or reoreded is not ok - self.assertRaises(TypeError, lambda : ci.append(ci.values.set_categories(list('abcd')))) - self.assertRaises(TypeError, lambda : ci.append(ci.values.reorder_categories(list('abc')))) + self.assertRaises( + TypeError, + lambda: ci.append(ci.values.set_categories(list('abcd')))) + self.assertRaises( + TypeError, + lambda: ci.append(ci.values.reorder_categories(list('abc')))) # with objects - result = ci.append(['c','a']) + result = ci.append(['c', 'a']) expected = CategoricalIndex(list('aabbcaca'), categories=categories) - tm.assert_index_equal(result,expected,exact=True) + tm.assert_index_equal(result, expected, exact=True) # invalid objects - self.assertRaises(TypeError, lambda : ci.append(['a','d'])) + self.assertRaises(TypeError, lambda: ci.append(['a', 'd'])) def test_insert(self): ci = self.create_index() categories = ci.categories - #test 0th element + # test 0th element result = ci.insert(0, 'a') - expected = CategoricalIndex(list('aaabbca'),categories=categories) - tm.assert_index_equal(result,expected,exact=True) + expected = CategoricalIndex(list('aaabbca'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) - #test Nth element that follows Python list behavior + # test Nth element that follows Python list behavior result = ci.insert(-1, 'a') - expected = CategoricalIndex(list('aabbcaa'),categories=categories) - tm.assert_index_equal(result,expected,exact=True) + expected = CategoricalIndex(list('aabbcaa'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) - #test empty + # test empty result = CategoricalIndex(categories=categories).insert(0, 'a') - expected = CategoricalIndex(['a'],categories=categories) - tm.assert_index_equal(result,expected,exact=True) + expected = CategoricalIndex(['a'], categories=categories) + tm.assert_index_equal(result, expected, exact=True) # invalid - self.assertRaises(TypeError, lambda : ci.insert(0,'d')) + self.assertRaises(TypeError, lambda: ci.insert(0, 'd')) def test_delete(self): @@ -2297,12 +2376,12 @@ def test_delete(self): categories = ci.categories result = ci.delete(0) - expected = CategoricalIndex(list('abbca'),categories=categories) - tm.assert_index_equal(result,expected,exact=True) + expected = CategoricalIndex(list('abbca'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) result = ci.delete(-1) - expected = CategoricalIndex(list('aabbc'),categories=categories) - tm.assert_index_equal(result,expected,exact=True) + expected = CategoricalIndex(list('aabbc'), categories=categories) + tm.assert_index_equal(result, expected, exact=True) with tm.assertRaises((IndexError, ValueError)): # either depeidnig on numpy version @@ -2312,7 +2391,7 @@ def test_astype(self): ci = self.create_index() result = ci.astype('category') - tm.assert_index_equal(result,ci,exact=True) + tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) self.assertTrue(result.equals(Index(np.array(ci)))) @@ -2326,7 +2405,7 @@ def test_reindex_base(self): # determined by cat ordering idx = self.create_index() - expected = np.array([4,0,1,5,2,3]) + expected = np.array([4, 0, 1, 5, 2, 3]) actual = idx.get_indexer(idx) tm.assert_numpy_array_equal(expected, actual) @@ -2339,28 +2418,37 @@ def test_reindexing(self): ci = self.create_index() oidx = Index(np.array(ci)) - for n in [1,2,5,len(ci)]: - finder = oidx[np.random.randint(0,len(ci),size=n)] + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual) def test_reindex_dtype(self): - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c']) + res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c' + ]) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(Categorical(['a', 'c'])) - tm.assert_index_equal(res, CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']), exact=True) + res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex( + Categorical(['a', 'c'])) + tm.assert_index_equal(res, CategoricalIndex( + ['a', 'a', 'c'], categories=['a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) - tm.assert_index_equal(res, Index(['a', 'a', 'c'], dtype='object'), exact=True) + res, indexer = CategoricalIndex( + ['a', 'b', 'c', 'a' + ], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) + tm.assert_index_equal(res, Index( + ['a', 'a', 'c'], dtype='object'), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) - res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c'])) - tm.assert_index_equal(res, CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']), exact=True) + res, indexer = CategoricalIndex( + ['a', 'b', 'c', 'a'], + categories=['a', 'b', 'c', 'd']).reindex(Categorical(['a', 'c'])) + tm.assert_index_equal(res, CategoricalIndex( + ['a', 'a', 'c'], categories=['a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) def test_duplicates(self): @@ -2374,16 +2462,19 @@ def test_duplicates(self): def test_get_indexer(self): - idx1 = CategoricalIndex(list('aabcde'),categories=list('edabc')) + idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) idx2 = CategoricalIndex(list('abf')) for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, [0, 1, 2, -1]) - self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='pad')) - self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='backfill')) - self.assertRaises(NotImplementedError, lambda : idx2.get_indexer(idx1, method='nearest')) + self.assertRaises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='pad')) + self.assertRaises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='backfill')) + self.assertRaises(NotImplementedError, + lambda: idx2.get_indexer(idx1, method='nearest')) def test_repr_roundtrip(self): @@ -2407,19 +2498,29 @@ def test_repr_roundtrip(self): def test_isin(self): - ci = CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b']) - tm.assert_numpy_array_equal(ci.isin(['c']),np.array([False,False,False,True,False,False])) - tm.assert_numpy_array_equal(ci.isin(['c','a','b']),np.array([True]*5 + [False])) - tm.assert_numpy_array_equal(ci.isin(['c','a','b',np.nan]),np.array([True]*6)) + ci = CategoricalIndex( + list('aabca') + [np.nan], categories=['c', 'a', 'b']) + tm.assert_numpy_array_equal( + ci.isin(['c']), + np.array([False, False, False, True, False, False])) + tm.assert_numpy_array_equal( + ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) + tm.assert_numpy_array_equal( + ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter - tm.assert_numpy_array_equal(ci.isin(ci.set_categories(list('abcdefghi'))),np.array([True]*6)) - tm.assert_numpy_array_equal(ci.isin(ci.set_categories(list('defghi'))),np.array([False]*5 + [True])) + tm.assert_numpy_array_equal( + ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * + 6)) + tm.assert_numpy_array_equal( + ci.isin(ci.set_categories(list('defghi'))), + np.array([False] * 5 + [True])) def test_identical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) - ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) + ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], + ordered=True) self.assertTrue(ci1.identical(ci1)) self.assertTrue(ci1.identical(ci1.copy())) self.assertFalse(ci1.identical(ci2)) @@ -2427,7 +2528,8 @@ def test_identical(self): def test_equals(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) - ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) + ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], + ordered=True) self.assertTrue(ci1.equals(ci1)) self.assertFalse(ci1.equals(ci2)) @@ -2442,24 +2544,34 @@ def test_equals(self): self.assertTrue((ci1 >= ci1).all()) self.assertFalse((ci1 == 1).all()) - self.assertTrue((ci1 == Index(['a','b'])).all()) + self.assertTrue((ci1 == Index(['a', 'b'])).all()) self.assertTrue((ci1 == ci1.values).all()) # invalid comparisons with tm.assertRaisesRegexp(ValueError, "Lengths must match"): - ci1 == Index(['a','b','c']) - self.assertRaises(TypeError, lambda : ci1 == ci2) - self.assertRaises(TypeError, lambda : ci1 == Categorical(ci1.values, ordered=False)) - self.assertRaises(TypeError, lambda : ci1 == Categorical(ci1.values, categories=list('abc'))) + ci1 == Index(['a', 'b', 'c']) + self.assertRaises(TypeError, lambda: ci1 == ci2) + self.assertRaises( + TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) + self.assertRaises( + TypeError, + lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly - self.assertTrue(CategoricalIndex(list('aabca'),categories=['c','a','b']).equals(list('aabca'))) + self.assertTrue(CategoricalIndex( + list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca'))) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.assertTrue(CategoricalIndex(list('aabca'),categories=['c','a','b',np.nan]).equals(list('aabca'))) + self.assertTrue(CategoricalIndex( + list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list( + 'aabca'))) - self.assertFalse(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b']).equals(list('aabca'))) - self.assertTrue(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b']).equals(list('aabca') + [np.nan])) + self.assertFalse(CategoricalIndex( + list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( + 'aabca'))) + self.assertTrue(CategoricalIndex( + list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( + 'aabca') + [np.nan])) def test_string_categorical_index_repr(self): # short @@ -2478,6 +2590,7 @@ def test_string_categorical_index_repr(self): 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', @@ -2485,6 +2598,7 @@ def test_string_categorical_index_repr(self): u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) # truncated @@ -2494,6 +2608,7 @@ def test_string_categorical_index_repr(self): ... 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', @@ -2502,6 +2617,7 @@ def test_string_categorical_index_repr(self): u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)""" + self.assertEqual(unicode(idx), expected) # larger categories @@ -2510,6 +2626,7 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'm', 'o'], categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j', @@ -2534,6 +2651,7 @@ def test_string_categorical_index_repr(self): 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', @@ -2541,6 +2659,7 @@ def test_string_categorical_index_repr(self): u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) # truncated @@ -2550,6 +2669,7 @@ def test_string_categorical_index_repr(self): ... 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', @@ -2558,6 +2678,7 @@ def test_string_categorical_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + self.assertEqual(unicode(idx), expected) # larger categories @@ -2566,11 +2687,13 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) # Emable Unicode option ----------------------------------------- @@ -2593,6 +2716,7 @@ def test_string_categorical_index_repr(self): 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', @@ -2601,6 +2725,7 @@ def test_string_categorical_index_repr(self): u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) # truncated @@ -2612,6 +2737,7 @@ def test_string_categorical_index_repr(self): 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', @@ -2620,6 +2746,7 @@ def test_string_categorical_index_repr(self): u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)""" + self.assertEqual(unicode(idx), expected) # larger categories @@ -2628,11 +2755,13 @@ def test_string_categorical_index_repr(self): expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し', 'す', 'せ', 'そ'], categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" + self.assertEqual(repr(idx), expected) else: expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'], categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')""" + self.assertEqual(unicode(idx), expected) def test_fillna_categorical(self): @@ -2692,14 +2821,13 @@ def test_numeric_compat(self): tm.assert_index_equal(result, didx) result = idx * Series(np.arange(5, dtype='float64') + 0.1) - expected = Float64Index(np.arange(5, dtype='float64') * ( - np.arange(5, dtype='float64') + 0.1)) + expected = Float64Index(np.arange(5, dtype='float64') * + (np.arange(5, dtype='float64') + 0.1)) tm.assert_index_equal(result, expected) # invalid - self.assertRaises(TypeError, lambda: idx * date_range('20130101', - periods=5) - ) + self.assertRaises(TypeError, + lambda: idx * date_range('20130101', periods=5)) self.assertRaises(ValueError, lambda: idx * idx[0:3]) self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) @@ -2707,31 +2835,31 @@ def test_explicit_conversions(self): # GH 8608 # add/sub are overriden explicity for Float/Int Index - idx = self._holder(np.arange(5,dtype='int64')) + idx = self._holder(np.arange(5, dtype='int64')) # float conversions - arr = np.arange(5,dtype='int64')*3.2 + arr = np.arange(5, dtype='int64') * 3.2 expected = Float64Index(arr) fidx = idx * 3.2 - tm.assert_index_equal(fidx,expected) + tm.assert_index_equal(fidx, expected) fidx = 3.2 * idx - tm.assert_index_equal(fidx,expected) + tm.assert_index_equal(fidx, expected) # interops with numpy arrays expected = Float64Index(arr) - a = np.zeros(5,dtype='float64') + a = np.zeros(5, dtype='float64') result = fidx - a - tm.assert_index_equal(result,expected) + tm.assert_index_equal(result, expected) expected = Float64Index(-arr) - a = np.zeros(5,dtype='float64') + a = np.zeros(5, dtype='float64') result = a - fidx - tm.assert_index_equal(result,expected) + tm.assert_index_equal(result, expected) def test_ufunc_compat(self): - idx = self._holder(np.arange(5,dtype='int64')) + idx = self._holder(np.arange(5, dtype='int64')) result = np.sin(idx) - expected = Float64Index(np.sin(np.arange(5,dtype='int64'))) + expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) tm.assert_index_equal(result, expected) def test_index_groupby(self): @@ -2753,9 +2881,8 @@ def test_index_groupby(self): datetime(2011, 11, 1)], tz='UTC').values - ex_keys = pd.tslib.datetime_to_datetime64( - np.array([Timestamp('2011-11-01'), - Timestamp('2011-12-01')])) + ex_keys = pd.tslib.datetime_to_datetime64(np.array([Timestamp( + '2011-11-01'), Timestamp('2011-12-01')])) expected = {ex_keys[0][0]: [idx[0], idx[5]], ex_keys[0][1]: [idx[1], idx[4]]} self.assertEqual(idx.groupby(to_groupby), expected) @@ -2772,8 +2899,8 @@ class TestFloat64Index(Numeric, tm.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.indices = dict(mixed = Float64Index([1.5, 2, 3, 4, 5]), - float = Float64Index(np.arange(5) * 2.5)) + self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), + float=Float64Index(np.arange(5) * 2.5)) self.setup_indices() def create_index(self): @@ -2797,22 +2924,23 @@ def check_coerce(self, a, b, is_float_index=True): def test_constructor(self): # explicit construction - index = Float64Index([1,2,3,4,5]) + index = Float64Index([1, 2, 3, 4, 5]) self.assertIsInstance(index, Float64Index) - self.assertTrue((index.values == np.array([1,2,3,4,5],dtype='float64')).all()) - index = Float64Index(np.array([1,2,3,4,5])) + self.assertTrue((index.values == np.array( + [1, 2, 3, 4, 5], dtype='float64')).all()) + index = Float64Index(np.array([1, 2, 3, 4, 5])) self.assertIsInstance(index, Float64Index) - index = Float64Index([1.,2,3,4,5]) + index = Float64Index([1., 2, 3, 4, 5]) self.assertIsInstance(index, Float64Index) - index = Float64Index(np.array([1.,2,3,4,5])) + index = Float64Index(np.array([1., 2, 3, 4, 5])) self.assertIsInstance(index, Float64Index) self.assertEqual(index.dtype, float) - index = Float64Index(np.array([1.,2,3,4,5]),dtype=np.float32) + index = Float64Index(np.array([1., 2, 3, 4, 5]), dtype=np.float32) self.assertIsInstance(index, Float64Index) self.assertEqual(index.dtype, np.float64) - index = Float64Index(np.array([1,2,3,4,5]),dtype=np.float32) + index = Float64Index(np.array([1, 2, 3, 4, 5]), dtype=np.float32) self.assertIsInstance(index, Float64Index) self.assertEqual(index.dtype, np.float64) @@ -2828,22 +2956,24 @@ def test_constructor_invalid(self): # invalid self.assertRaises(TypeError, Float64Index, 0.) - self.assertRaises(TypeError, Float64Index, ['a','b',0.]) + self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.]) self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')]) def test_constructor_coerce(self): - self.check_coerce(self.mixed,Index([1.5, 2, 3, 4, 5])) - self.check_coerce(self.float,Index(np.arange(5) * 2.5)) - self.check_coerce(self.float,Index(np.array(np.arange(5) * 2.5, dtype=object))) + self.check_coerce(self.mixed, Index([1.5, 2, 3, 4, 5])) + self.check_coerce(self.float, Index(np.arange(5) * 2.5)) + self.check_coerce(self.float, Index(np.array( + np.arange(5) * 2.5, dtype=object))) def test_constructor_explicit(self): # these don't auto convert - self.check_coerce(self.float,Index((np.arange(5) * 2.5), dtype=object), - is_float_index=False) - self.check_coerce(self.mixed,Index([1.5, 2, 3, 4, 5],dtype=object), + self.check_coerce(self.float, + Index((np.arange(5) * 2.5), dtype=object), is_float_index=False) + self.check_coerce(self.mixed, Index( + [1.5, 2, 3, 4, 5], dtype=object), is_float_index=False) def test_astype(self): @@ -2861,18 +2991,18 @@ def test_astype(self): def test_equals(self): - i = Float64Index([1.0,2.0]) + i = Float64Index([1.0, 2.0]) self.assertTrue(i.equals(i)) self.assertTrue(i.identical(i)) - i2 = Float64Index([1.0,2.0]) + i2 = Float64Index([1.0, 2.0]) self.assertTrue(i.equals(i2)) - i = Float64Index([1.0,np.nan]) + i = Float64Index([1.0, np.nan]) self.assertTrue(i.equals(i)) self.assertTrue(i.identical(i)) - i2 = Float64Index([1.0,np.nan]) + i2 = Float64Index([1.0, np.nan]) self.assertTrue(i.equals(i2)) def test_get_indexer(self): @@ -2881,8 +3011,10 @@ def test_get_indexer(self): target = [-0.1, 0.5, 1.1] tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) def test_get_loc(self): idx = Float64Index([0.0, 1.0, 2.0]) @@ -2897,8 +3029,8 @@ def test_get_loc(self): self.assertRaises(KeyError, idx.get_loc, 'foo') self.assertRaises(KeyError, idx.get_loc, 1.5) - self.assertRaises(KeyError, idx.get_loc, 1.5, - method='pad', tolerance=0.1) + self.assertRaises(KeyError, idx.get_loc, 1.5, method='pad', + tolerance=0.1) with tm.assertRaisesRegexp(ValueError, 'must be numeric'): idx.get_loc(1.4, method='nearest', tolerance='foo') @@ -2941,13 +3073,11 @@ def test_nan_multiple_containment(self): tm.assert_numpy_array_equal(i.isin([1.0]), np.array([True, False])) tm.assert_numpy_array_equal(i.isin([2.0, np.pi]), np.array([False, False])) - tm.assert_numpy_array_equal(i.isin([np.nan]), - np.array([False, True])) + tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal(i.isin([1.0, np.nan]), np.array([True, True])) i = Float64Index([1.0, 2.0]) - tm.assert_numpy_array_equal(i.isin([np.nan]), - np.array([False, False])) + tm.assert_numpy_array_equal(i.isin([np.nan]), np.array([False, False])) def test_astype_from_object(self): index = Index([1.0, np.nan, 0.2], dtype='object') @@ -2977,7 +3107,7 @@ class TestInt64Index(Numeric, tm.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.indices = dict(index = Int64Index(np.arange(0, 20, 2))) + self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) self.setup_indices() def create_index(self): @@ -2986,6 +3116,7 @@ def create_index(self): def test_too_many_names(self): def testit(): self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) def test_constructor(self): @@ -3077,8 +3208,7 @@ def test_is_monotonic_na(self): pd.to_datetime(['NaT']), pd.to_datetime(['NaT', '2000-01-01']), pd.to_datetime(['2000-01-01', 'NaT', '2000-01-02']), - pd.to_timedelta(['1 day', 'NaT']), - ] + pd.to_timedelta(['1 day', 'NaT']), ] for index in examples: self.assertFalse(index.is_monotonic_increasing) self.assertFalse(index.is_monotonic_decreasing) @@ -3106,12 +3236,11 @@ def test_identical(self): self.assertTrue(same_values.identical(i)) self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object - ).identical(i)) + self.assertTrue(Index(same_values, name='foo', dtype=object).identical( + i)) - self.assertFalse( - self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) + self.assertFalse(self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) def test_get_indexer(self): target = Int64Index(np.arange(10)) @@ -3247,8 +3376,7 @@ def test_join_right(self): res, lidx, ridx = self.index.join(other, how='right', return_indexers=True) eres = other - elidx = np.array([-1, 6, -1, -1, 1, -1], - dtype=np.int64) + elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int64) tm.assertIsInstance(other, Int64Index) self.assertTrue(res.equals(eres)) @@ -3259,8 +3387,7 @@ def test_join_right(self): res, lidx, ridx = self.index.join(other_mono, how='right', return_indexers=True) eres = other_mono - elidx = np.array([-1, 1, -1, -1, 6, -1], - dtype=np.int64) + elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.int64) tm.assertIsInstance(other, Int64Index) self.assertTrue(res.equals(eres)) tm.assert_numpy_array_equal(lidx, elidx) @@ -3387,8 +3514,9 @@ def test_int_name_format(self): repr(df) def test_print_unicode_columns(self): - df = pd.DataFrame( - {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + "\u05d1": [4, 5, 6], + "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_summary(self): @@ -3466,13 +3594,11 @@ def create_index(self): return RangeIndex(5) def test_binops(self): - ops = [operator.add, operator.sub, operator.mul, - operator.floordiv, operator.truediv, pow] + ops = [operator.add, operator.sub, operator.mul, operator.floordiv, + operator.truediv, pow] scalars = [-1, 1, 2] - idxs = [RangeIndex(0, 10, 1), - RangeIndex(0, 20, 2), - RangeIndex(-10, 10, 2), - RangeIndex(5, -5, -1)] + idxs = [RangeIndex(0, 10, 1), RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), RangeIndex(5, -5, -1)] for op in ops: for a, b in combinations(idxs, 2): result = op(a, b) @@ -3487,6 +3613,7 @@ def test_binops(self): def test_too_many_names(self): def testit(): self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) def test_constructor(self): @@ -3527,13 +3654,8 @@ def test_constructor(self): self.assertRaises(TypeError, lambda: Index(0, 1000)) # invalid args - for i in [Index(['a', 'b']), - Series(['a', 'b']), - np.array(['a', 'b']), - [], - 'foo', - datetime(2000, 1, 1, 0, 0), - np.arange(0, 10)]: + for i in [Index(['a', 'b']), Series(['a', 'b']), np.array(['a', 'b']), + [], 'foo', datetime(2000, 1, 1, 0, 0), np.arange(0, 10)]: self.assertRaises(TypeError, lambda: RangeIndex(i)) def test_constructor_same(self): @@ -3647,8 +3769,7 @@ def test_constructor_corner(self): self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) # invalid passed type - self.assertRaises(TypeError, - lambda: RangeIndex(1, 5, dtype='float64')) + self.assertRaises(TypeError, lambda: RangeIndex(1, 5, dtype='float64')) def test_copy(self): i = RangeIndex(5, name='Foo') @@ -3686,8 +3807,7 @@ def test_insert(self): result = idx[1:4] # test 0th element - self.assertTrue(idx[0:4].equals( - result.insert(0, idx[0]))) + self.assertTrue(idx[0:4].equals(result.insert(0, idx[0]))) def test_delete(self): @@ -3768,12 +3888,11 @@ def test_identical(self): self.assertTrue(same_values.identical(self.index.copy(dtype=object))) self.assertFalse(i.identical(self.index)) - self.assertTrue(Index(same_values, name='foo', dtype=object - ).identical(i)) + self.assertTrue(Index(same_values, name='foo', dtype=object).identical( + i)) - self.assertFalse( - self.index.copy(dtype=object) - .identical(self.index.copy(dtype='int64'))) + self.assertFalse(self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) def test_get_indexer(self): target = RangeIndex(10) @@ -3802,8 +3921,8 @@ def test_join_outer(self): noidx_res = self.index.join(other, how='outer') self.assertTrue(res.equals(noidx_res)) - eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25]) + eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25]) elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, @@ -4039,8 +4158,9 @@ def test_take_preserve_name(self): self.assertEqual(index.name, taken.name) def test_print_unicode_columns(self): - df = pd.DataFrame( - {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + "\u05d1": [4, 5, 6], + "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_roundtrip(self): @@ -4221,10 +4341,10 @@ def test_str(self): self.assertTrue("'foo'" in str(idx)) self.assertTrue(idx.__class__.__name__ in str(idx)) - if hasattr(idx,'tz'): + if hasattr(idx, 'tz'): if idx.tz is not None: self.assertTrue(idx.tz in str(idx)) - if hasattr(idx,'freq'): + if hasattr(idx, 'freq'): self.assertTrue("freq='%s'" % idx.freqstr in str(idx)) def test_view(self): @@ -4238,14 +4358,15 @@ def test_view(self): i_view = i.view(self._holder) result = self._holder(i) - tm.assert_index_equal(result, i) + tm.assert_index_equal(result, i_view) + class TestDatetimeIndex(DatetimeLike, tm.TestCase): _holder = DatetimeIndex _multiprocess_can_split_ = True def setUp(self): - self.indices = dict(index = tm.makeDateIndex(10)) + self.indices = dict(index=tm.makeDateIndex(10)) self.setup_indices() def create_index(self): @@ -4258,24 +4379,26 @@ def test_shift(self): drange = self.create_index() result = drange.shift(1) - expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', - '2013-01-06'], freq='D') + expected = DatetimeIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', + '2013-01-06'], freq='D') self.assert_index_equal(result, expected) result = drange.shift(-1) - expected = DatetimeIndex(['2012-12-31','2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04'], + expected = DatetimeIndex(['2012-12-31', '2013-01-01', '2013-01-02', + '2013-01-03', '2013-01-04'], freq='D') self.assert_index_equal(result, expected) result = drange.shift(3, freq='2D') - expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', '2013-01-10', - '2013-01-11'],freq='D') + expected = DatetimeIndex(['2013-01-07', '2013-01-08', '2013-01-09', + '2013-01-10', + '2013-01-11'], freq='D') self.assert_index_equal(result, expected) - def test_construction_with_alt(self): - i = pd.date_range('20130101',periods=5,freq='H',tz='US/Eastern') + i = pd.date_range('20130101', periods=5, freq='H', tz='US/Eastern') i2 = DatetimeIndex(i, dtype=i.dtype) self.assert_index_equal(i, i2) @@ -4285,7 +4408,8 @@ def test_construction_with_alt(self): i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype) self.assert_index_equal(i, i2) - i2 = DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) + i2 = DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz=i.dtype.tz) self.assert_index_equal(i, i2) # localize into the provided tz @@ -4298,7 +4422,8 @@ def test_construction_with_alt(self): self.assert_index_equal(i2, expected) # incompat tz/dtype - self.assertRaises(ValueError, lambda : DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) + self.assertRaises(ValueError, lambda: DatetimeIndex( + i.tz_localize(None).asi8, dtype=i.dtype, tz='US/Pacific')) def test_pickle_compat_construction(self): pass @@ -4306,16 +4431,21 @@ def test_pickle_compat_construction(self): def test_construction_index_with_mixed_timezones(self): # GH 11488 # no tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + result = Index( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNone(result.tz) # same tz results in DatetimeIndex result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') + Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') + ], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) @@ -4323,8 +4453,10 @@ def test_construction_index_with_mixed_timezones(self): # same tz results in DatetimeIndex (DST) result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) @@ -4332,14 +4464,18 @@ def test_construction_index_with_mixed_timezones(self): self.assertEqual(result.tz, exp.tz) # different tz results in Index(dtype=object) - result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], + result = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') + exp = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertFalse(isinstance(result, DatetimeIndex)) result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') @@ -4347,9 +4483,11 @@ def test_construction_index_with_mixed_timezones(self): self.assertFalse(isinstance(result, DatetimeIndex)) # passing tz results in DatetimeIndex - result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), Timestamp('2011-01-03 00:00')], + result = Index([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), + Timestamp('2011-01-03 00:00')], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) @@ -4362,8 +4500,10 @@ def test_construction_index_with_mixed_timezones(self): self.assertIsNone(result.tz) # length = 1 with tz - result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx') + result = Index( + [Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', + name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) @@ -4381,9 +4521,12 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): # same tz results in DatetimeIndex result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') + pd.NaT, Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') + pd.NaT, Timestamp('2011-01-02 10:00')], + tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) @@ -4391,8 +4534,11 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): # same tz results in DatetimeIndex (DST) result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')], + pd.NaT, + Timestamp('2011-08-01 10:00', tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, + Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) @@ -4401,7 +4547,9 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): # different tz results in Index(dtype=object) result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], + name='idx') exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') @@ -4409,7 +4557,8 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): self.assertFalse(isinstance(result, DatetimeIndex)) result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], dtype='object', name='idx') @@ -4418,8 +4567,9 @@ def test_construction_index_with_mixed_timezones_with_NaT(self): # passing tz results in DatetimeIndex result = Index([pd.NaT, Timestamp('2011-01-01 10:00'), - pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], - tz='Asia/Tokyo', name='idx') + pd.NaT, Timestamp('2011-01-02 10:00', + tz='US/Eastern')], + tz='Asia/Tokyo', name='idx') exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'), pd.NaT, Timestamp('2011-01-03 00:00')], tz='Asia/Tokyo', name='idx') @@ -4445,30 +4595,41 @@ def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex - result = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + result = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') + Timestamp('2011-01-02 10:00', + tz='Asia/Tokyo')], + name='idx') + exp = DatetimeIndex( + [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00') + ], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'), - Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], + Timestamp('2011-08-01 10:00', + tz='US/Eastern')], + name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') - exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], + Timestamp('2011-01-02 10:00', + tz='US/Eastern')], name='idx') + exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), + Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) @@ -4476,15 +4637,18 @@ def test_construction_dti_with_mixed_timezones(self): # tz mismatch affecting to tz-aware raises TypeError/ValueError with tm.assertRaises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx') + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + name='idx') with tm.assertRaises(TypeError): - DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], + DatetimeIndex([Timestamp('2011-01-01 10:00'), + Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='Asia/Tokyo', name='idx') with tm.assertRaises(ValueError): DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), - Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx') + Timestamp('2011-01-02 10:00', tz='US/Eastern')], + tz='US/Eastern', name='idx') def test_get_loc(self): idx = pd.date_range('2000-01-01', periods=3) @@ -4512,8 +4676,7 @@ def test_get_loc(self): with tm.assertRaisesRegexp(ValueError, 'must be convertible'): idx.get_loc('2000-01-01T12', method='nearest', tolerance='foo') with tm.assertRaises(KeyError): - idx.get_loc('2000-01-01T03', method='nearest', - tolerance='2 hours') + idx.get_loc('2000-01-01T03', method='nearest', tolerance='2 hours') self.assertEqual(idx.get_loc('2000', method='nearest'), slice(0, 3)) self.assertEqual(idx.get_loc('2000-01', method='nearest'), slice(0, 3)) @@ -4547,12 +4710,16 @@ def test_get_indexer(self): idx = pd.date_range('2000-01-01', periods=3) tm.assert_numpy_array_equal(idx.get_indexer(idx), [0, 1, 2]) - target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) + target = idx[0] + pd.to_timedelta(['-1 hour', '12 hours', + '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) tm.assert_numpy_array_equal( - idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 hour')), [0, -1, 1]) with tm.assertRaises(ValueError): idx.get_indexer(idx[[0]], method='nearest', tolerance='foo') @@ -4561,7 +4728,7 @@ def test_roundtrip_pickle_with_tz(self): # GH 8367 # round-trip of timezone - index=date_range('20130101',periods=3,tz='US/Eastern',name='foo') + index = date_range('20130101', periods=3, tz='US/Eastern', name='foo') unpickled = self.round_trip_pickle(index) self.assertTrue(index.equals(unpickled)) @@ -4575,7 +4742,7 @@ def test_time_loc(self): # GH8667 from datetime import time from pandas.index import _SIZE_CUTOFF - ns = _SIZE_CUTOFF + np.array([-100, 100],dtype=np.int64) + ns = _SIZE_CUTOFF + np.array([-100, 100], dtype=np.int64) key = time(15, 11, 30) start = key.hour * 3600 + key.minute * 60 + key.second step = 24 * 3600 @@ -4641,7 +4808,6 @@ def test_union(self): def test_nat(self): self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT) - def test_ufunc_coercions(self): idx = date_range('2011-01-01', periods=3, freq='2D', name='x') @@ -4677,34 +4843,47 @@ def test_ufunc_coercions(self): def test_fillna_datetime64(self): # GH 11343 for tz in ['US/Eastern', 'Asia/Tokyo']: - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00']) + idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, + '2011-01-01 11:00']) - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00']) - self.assert_index_equal(idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', + '2011-01-01 11:00']) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) # tz mismatch - exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), pd.Timestamp('2011-01-01 10:00', tz=tz), + exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), + pd.Timestamp('2011-01-01 10:00', tz=tz), pd.Timestamp('2011-01-01 11:00')], dtype=object) - self.assert_index_equal(idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) # object exp = pd.Index([pd.Timestamp('2011-01-01 09:00'), 'x', pd.Timestamp('2011-01-01 11:00')], dtype=object) self.assert_index_equal(idx.fillna('x'), exp) + idx = pd.DatetimeIndex( + ['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz) - idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz) - - exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], tz=tz) - self.assert_index_equal(idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) + exp = pd.DatetimeIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], tz=tz) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp) - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), pd.Timestamp('2011-01-01 10:00'), - pd.Timestamp('2011-01-01 11:00', tz=tz)], dtype=object) - self.assert_index_equal(idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + pd.Timestamp('2011-01-01 10:00'), + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) + self.assert_index_equal( + idx.fillna(pd.Timestamp('2011-01-01 10:00')), exp) # object - exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), 'x', - pd.Timestamp('2011-01-01 11:00', tz=tz)], dtype=object) + exp = pd.Index([pd.Timestamp('2011-01-01 09:00', tz=tz), + 'x', + pd.Timestamp('2011-01-01 11:00', tz=tz)], + dtype=object) self.assert_index_equal(idx.fillna('x'), exp) @@ -4713,7 +4892,7 @@ class TestPeriodIndex(DatetimeLike, tm.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.indices = dict(index = tm.makePeriodIndex(10)) + self.indices = dict(index=tm.makePeriodIndex(10)) self.setup_indices() def create_index(self): @@ -4725,8 +4904,8 @@ def test_shift(self): # GH8083 drange = self.create_index() result = drange.shift(1) - expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', '2013-01-05', - '2013-01-06'], freq='D') + expected = PeriodIndex(['2013-01-02', '2013-01-03', '2013-01-04', + '2013-01-05', '2013-01-06'], freq='D') self.assert_index_equal(result, expected) def test_pickle_compat_construction(self): @@ -4737,9 +4916,11 @@ def test_get_loc(self): for method in [None, 'pad', 'backfill', 'nearest']: self.assertEqual(idx.get_loc(idx[1], method), 1) - self.assertEqual(idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) + self.assertEqual( + idx.get_loc(idx[1].asfreq('H', how='start'), method), 1) self.assertEqual(idx.get_loc(idx[1].to_timestamp(), method), 1) - self.assertEqual(idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) + self.assertEqual( + idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) idx = pd.period_range('2000-01-01', periods=5)[::2] @@ -4767,8 +4948,10 @@ def test_get_indexer(self): target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12', '2000-01-02T01'], freq='H') tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest', tolerance='1 hour'), [0, -1, 1]) @@ -4790,9 +4973,9 @@ def test_repeat(self): def test_period_index_indexer(self): - #GH4125 - idx = pd.period_range('2002-01','2003-12', freq='M') - df = pd.DataFrame(pd.np.random.randn(24,10), index=idx) + # GH4125 + idx = pd.period_range('2002-01', '2003-12', freq='M') + df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx) self.assert_frame_equal(df, df.ix[idx]) self.assert_frame_equal(df, df.ix[list(idx)]) self.assert_frame_equal(df, df.loc[list(idx)]) @@ -4801,16 +4984,22 @@ def test_period_index_indexer(self): def test_fillna_period(self): # GH 11343 - idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H') + idx = pd.PeriodIndex( + ['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H') - exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], freq='H') - self.assert_index_equal(idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) + exp = pd.PeriodIndex( + ['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00' + ], freq='H') + self.assert_index_equal( + idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp) exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x', pd.Period('2011-01-01 11:00', freq='H')], dtype=object) self.assert_index_equal(idx.fillna('x'), exp) - with tm.assertRaisesRegexp(ValueError, 'Input has different freq=D from PeriodIndex\\(freq=H\\)'): + with tm.assertRaisesRegexp( + ValueError, + 'Input has different freq=D from PeriodIndex\\(freq=H\\)'): idx.fillna(pd.Period('2011-01-01', freq='D')) def test_no_millisecond_field(self): @@ -4820,12 +5009,13 @@ def test_no_millisecond_field(self): with self.assertRaises(AttributeError): DatetimeIndex([]).millisecond + class TestTimedeltaIndex(DatetimeLike, tm.TestCase): _holder = TimedeltaIndex _multiprocess_can_split_ = True def setUp(self): - self.indices = dict(index = tm.makeTimedeltaIndex(10)) + self.indices = dict(index=tm.makeTimedeltaIndex(10)) self.setup_indices() def create_index(self): @@ -4837,13 +5027,16 @@ def test_shift(self): drange = self.create_index() result = drange.shift(1) - expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', '3 days 01:00:00', - '4 days 01:00:00', '5 days 01:00:00'],freq='D') + expected = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00', + '3 days 01:00:00', + '4 days 01:00:00', '5 days 01:00:00'], + freq='D') self.assert_index_equal(result, expected) result = drange.shift(3, freq='2D 1s') - expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', '8 days 01:00:03', - '9 days 01:00:03', '10 days 01:00:03'],freq='D') + expected = TimedeltaIndex(['6 days 01:00:03', '7 days 01:00:03', + '8 days 01:00:03', '9 days 01:00:03', + '10 days 01:00:03'], freq='D') self.assert_index_equal(result, expected) def test_get_loc(self): @@ -4854,8 +5047,10 @@ def test_get_loc(self): self.assertEqual(idx.get_loc(idx[1].to_pytimedelta(), method), 1) self.assertEqual(idx.get_loc(str(idx[1]), method), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) - self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=pd.Timedelta(0)), 1) + self.assertEqual( + idx.get_loc(idx[1], 'pad', tolerance=np.timedelta64(0, 's')), 1) self.assertEqual(idx.get_loc(idx[1], 'pad', tolerance=timedelta(0)), 1) with tm.assertRaisesRegexp(ValueError, 'must be convertible'): @@ -4870,8 +5065,10 @@ def test_get_indexer(self): target = pd.to_timedelta(['-1 hour', '12 hours', '1 day 1 hour']) tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), [-1, 0, 1]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), [0, 1, 2]) - tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), [0, 1, 1]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'backfill'), [0, 1, 2]) + tm.assert_numpy_array_equal( + idx.get_indexer(target, 'nearest'), [0, 1, 1]) tm.assert_numpy_array_equal( idx.get_indexer(target, 'nearest', tolerance=pd.Timedelta('1 hour')), @@ -4879,9 +5076,8 @@ def test_get_indexer(self): def test_numeric_compat(self): - idx = self._holder(np.arange(5,dtype='int64')) - didx = self._holder(np.arange(5,dtype='int64')**2 - ) + idx = self._holder(np.arange(5, dtype='int64')) + didx = self._holder(np.arange(5, dtype='int64') ** 2) result = idx * 1 tm.assert_index_equal(result, idx) @@ -4905,9 +5101,8 @@ def test_numeric_compat(self): tm.assert_index_equal(result, didx) result = idx * Series(np.arange(5, dtype='float64') + 0.1) - tm.assert_index_equal(result, - self._holder(np.arange(5, dtype='float64') * ( - np.arange(5, dtype='float64') + 0.1))) + tm.assert_index_equal(result, self._holder(np.arange( + 5, dtype='float64') * (np.arange(5, dtype='float64') + 0.1))) # invalid self.assertRaises(TypeError, lambda: idx * idx) @@ -4920,7 +5115,7 @@ def test_pickle_compat_construction(self): def test_ufunc_coercions(self): # normal ops are also tested in tseries/test_timedeltas.py idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], - freq='2H', name='x') + freq='2H', name='x') for result in [idx * 2, np.multiply(idx, 2)]: tm.assertIsInstance(result, TimedeltaIndex) @@ -4938,7 +5133,7 @@ def test_ufunc_coercions(self): idx = TimedeltaIndex(['2H', '4H', '6H', '8H', '10H'], freq='2H', name='x') - for result in [ - idx, np.negative(idx)]: + for result in [-idx, np.negative(idx)]: tm.assertIsInstance(result, TimedeltaIndex) exp = TimedeltaIndex(['-2H', '-4H', '-6H', '-8H', '-10H'], freq='-2H', name='x') @@ -4947,7 +5142,7 @@ def test_ufunc_coercions(self): idx = TimedeltaIndex(['-2H', '-1H', '0H', '1H', '2H'], freq='H', name='x') - for result in [ abs(idx), np.absolute(idx)]: + for result in [abs(idx), np.absolute(idx)]: tm.assertIsInstance(result, TimedeltaIndex) exp = TimedeltaIndex(['2H', '1H', '0H', '1H', '2H'], freq=None, name='x') @@ -4964,7 +5159,8 @@ def test_fillna_timedelta(self): exp = pd.TimedeltaIndex(['1 day', '3 hour', '3 day']) idx.fillna(pd.Timedelta('3 hour')) - exp = pd.Index([pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) + exp = pd.Index( + [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) self.assert_index_equal(idx.fillna('x'), exp) @@ -4980,9 +5176,10 @@ def setUp(self): major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) self.index_names = ['first', 'second'] - self.indices = dict(index = MultiIndex(levels=[major_axis, minor_axis], - labels=[major_labels, minor_labels], - names=self.index_names, verify_integrity=False)) + self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis], + labels=[major_labels, minor_labels + ], names=self.index_names, + verify_integrity=False)) self.setup_indices() def create_index(self): @@ -4999,7 +5196,8 @@ def test_boolean_context_compat2(self): def f(): if common: pass - tm.assertRaisesRegexp(ValueError,'The truth value of a',f) + + tm.assertRaisesRegexp(ValueError, 'The truth value of a', f) def test_labels_dtypes(self): @@ -5008,16 +5206,16 @@ def test_labels_dtypes(self): self.assertTrue(i.labels[0].dtype == 'int8') self.assertTrue(i.labels[1].dtype == 'int8') - i = MultiIndex.from_product([['a'],range(40)]) + i = MultiIndex.from_product([['a'], range(40)]) self.assertTrue(i.labels[1].dtype == 'int8') - i = MultiIndex.from_product([['a'],range(400)]) + i = MultiIndex.from_product([['a'], range(400)]) self.assertTrue(i.labels[1].dtype == 'int16') - i = MultiIndex.from_product([['a'],range(40000)]) + i = MultiIndex.from_product([['a'], range(40000)]) self.assertTrue(i.labels[1].dtype == 'int32') - i = pd.MultiIndex.from_product([['a'],range(1000)]) - self.assertTrue((i.labels[0]>=0).all()) - self.assertTrue((i.labels[1]>=0).all()) + i = pd.MultiIndex.from_product([['a'], range(1000)]) + self.assertTrue((i.labels[0] >= 0).all()) + self.assertTrue((i.labels[1] >= 0).all()) def test_set_name_methods(self): # so long as these are synonyms, we don't need to test set_names @@ -5051,12 +5249,10 @@ def test_set_name_methods(self): self.assertIsNone(res) self.assertEqual(ind.names, new_names2) - def test_set_levels(self): - # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. - levels, labels = self.index.levels, self.index.labels + levels = self.index.levels new_levels = [[lev + 'a' for lev in level] for level in levels] def assert_matching(actual, expected): @@ -5108,7 +5304,8 @@ def assert_matching(actual, expected): # level changing multiple levels [w/ mutation] ind2 = self.index.copy() - inplace_return = ind2.set_levels(new_levels, level=[0, 1], inplace=True) + inplace_return = ind2.set_levels(new_levels, level=[0, 1], + inplace=True) self.assertIsNone(inplace_return) assert_matching(ind2.levels, new_levels) assert_matching(self.index.levels, levels) @@ -5116,7 +5313,7 @@ def assert_matching(actual, expected): def test_set_labels(self): # side note - you probably wouldn't want to use levels and labels # directly like this - but it is possible. - levels, labels = self.index.levels, self.index.labels + labels = self.index.labels major_labels, minor_labels = labels major_labels = [(x + 1) % 3 for x in major_labels] minor_labels = [(x + 1) % 1 for x in minor_labels] @@ -5171,7 +5368,8 @@ def assert_matching(actual, expected): # label changing multiple levels [w/ mutation] ind2 = self.index.copy() - inplace_return = ind2.set_labels(new_labels, level=[0, 1], inplace=True) + inplace_return = ind2.set_labels(new_labels, level=[0, 1], + inplace=True) self.assertIsNone(inplace_return) assert_matching(ind2.labels, new_labels) assert_matching(self.index.labels, labels) @@ -5295,9 +5493,7 @@ def test_set_value_keeps_names(self): # motivating example from #3742 lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe'] lev2 = ['1', '2', '3'] * 2 - idx = pd.MultiIndex.from_arrays( - [lev1, lev2], - names=['Name', 'Number']) + idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number']) df = pd.DataFrame( np.random.randn(6, 4), columns=['one', 'two', 'three', 'four'], @@ -5342,10 +5538,12 @@ def test_names(self): self.assertEqual(ind_names, level_names) def test_reference_duplicate_name(self): - idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'x']) + idx = MultiIndex.from_tuples( + [('a', 'b'), ('c', 'd')], names=['x', 'x']) self.assertTrue(idx._reference_duplicate_name('x')) - idx = MultiIndex.from_tuples([('a', 'b'), ('c', 'd')], names=['x', 'y']) + idx = MultiIndex.from_tuples( + [('a', 'b'), ('c', 'd')], names=['x', 'y']) self.assertFalse(idx._reference_duplicate_name('x')) def test_astype(self): @@ -5360,8 +5558,7 @@ def test_astype(self): def test_constructor_single_level(self): single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], - names=['first']) + labels=[[0, 1, 2, 3]], names=['first']) tm.assertIsInstance(single_level, Index) self.assertNotIsInstance(single_level, MultiIndex) self.assertEqual(single_level.name, 'first') @@ -5390,7 +5587,8 @@ def test_constructor_mismatched_label_levels(self): # important to check that it's looking at the right thing. with tm.assertRaisesRegexp(ValueError, length_error): - MultiIndex(levels=[['a'], ['b']], labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) + MultiIndex(levels=[['a'], ['b']], + labels=[[0, 1, 2, 3], [0, 3, 4, 1]]) with tm.assertRaisesRegexp(ValueError, label_error): MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]]) @@ -5412,7 +5610,6 @@ def test_constructor_mismatched_label_levels(self): with tm.assertRaisesRegexp(ValueError, label_error): self.index.copy().labels = [[0, 0, 0, 0], [0, 0]] - def assert_multiindex_copied(self, copy, original): # levels shoudl be (at least, shallow copied) assert_copy(copy.levels, original.levels) @@ -5494,9 +5691,11 @@ def test_from_arrays(self): self.assertEqual(list(result), list(self.index)) # infer correctly - result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], ['a', 'b']]) - self.assertTrue(result.levels[0].equals(Index([Timestamp('20130101')]))) - self.assertTrue(result.levels[1].equals(Index(['a','b']))) + result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')], + ['a', 'b']]) + self.assertTrue(result.levels[0].equals(Index([Timestamp('20130101') + ]))) + self.assertTrue(result.levels[1].equals(Index(['a', 'b']))) def test_from_product(self): @@ -5505,9 +5704,9 @@ def test_from_product(self): names = ['first', 'second'] result = MultiIndex.from_product([first, second], names=names) - tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), - ('bar', 'a'), ('bar', 'b'), ('bar', 'c'), - ('buz', 'a'), ('buz', 'b'), ('buz', 'c')] + tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'), + ('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'), + ('buz', 'c')] expected = MultiIndex.from_tuples(tuples, names=names) tm.assert_numpy_array_equal(result, expected) @@ -5516,21 +5715,20 @@ def test_from_product(self): def test_from_product_datetimeindex(self): dt_index = date_range('2000-01-01', periods=2) mi = pd.MultiIndex.from_product([[1, 2], dt_index]) - etalon = pd.lib.list_to_object_array([(1, pd.Timestamp('2000-01-01')), - (1, pd.Timestamp('2000-01-02')), - (2, pd.Timestamp('2000-01-01')), - (2, pd.Timestamp('2000-01-02'))]) + etalon = pd.lib.list_to_object_array([(1, pd.Timestamp( + '2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp( + '2000-01-01')), (2, pd.Timestamp('2000-01-02'))]) tm.assert_numpy_array_equal(mi.values, etalon) def test_values_boxed(self): - tuples = [(1, pd.Timestamp('2000-01-01')), - (2, pd.NaT), + tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT), (3, pd.Timestamp('2000-01-03')), (1, pd.Timestamp('2000-01-04')), (2, pd.Timestamp('2000-01-02')), (3, pd.Timestamp('2000-01-03'))] mi = pd.MultiIndex.from_tuples(tuples) - tm.assert_numpy_array_equal(mi.values, pd.lib.list_to_object_array(tuples)) + tm.assert_numpy_array_equal(mi.values, + pd.lib.list_to_object_array(tuples)) # Check that code branches for boxed values produce identical results tm.assert_numpy_array_equal(mi.values[:4], mi[:4].values) @@ -5558,13 +5756,12 @@ def test_get_level_values(self): tm.assert_numpy_array_equal(result, expected) # GH 10460 - index = MultiIndex(levels=[CategoricalIndex(['A', 'B']), - CategoricalIndex([1, 2, 3])], - labels=[np.array([0, 0, 0, 1, 1, 1]), - np.array([0, 1, 2, 0, 1, 2])]) + index = MultiIndex(levels=[CategoricalIndex( + ['A', 'B']), CategoricalIndex([1, 2, 3])], labels=[np.array( + [0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])]) exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B']) self.assert_index_equal(index.get_level_values(0), exp) - exp = CategoricalIndex([1, 2 ,3, 1, 2, 3]) + exp = CategoricalIndex([1, 2, 3, 1, 2, 3]) self.assert_index_equal(index.get_level_values(1), exp) def test_get_level_values_na(self): @@ -5586,7 +5783,7 @@ def test_get_level_values_na(self): expected = [np.nan, np.nan, np.nan] tm.assert_numpy_array_equal(values.values.astype(float), expected) values = index.get_level_values(1) - expected = np.array(['a', np.nan, 1],dtype=object) + expected = np.array(['a', np.nan, 1], dtype=object) tm.assert_numpy_array_equal(values.values, expected) arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])] @@ -5598,7 +5795,7 @@ def test_get_level_values_na(self): arrays = [[], []] index = pd.MultiIndex.from_arrays(arrays) values = index.get_level_values(0) - self.assertEqual(values.shape, (0,)) + self.assertEqual(values.shape, (0, )) def test_reorder_levels(self): # this blows up @@ -5658,7 +5855,10 @@ def test_roundtrip_pickle_with_tz(self): # GH 8367 # round-trip of timezone - index=MultiIndex.from_product([[1,2],['a','b'],date_range('20130101',periods=3,tz='US/Eastern')],names=['one','two','three']) + index = MultiIndex.from_product( + [[1, 2], ['a', 'b'], date_range('20130101', periods=3, + tz='US/Eastern') + ], names=['one', 'two', 'three']) unpickled = self.round_trip_pickle(index) self.assertTrue(index.equal_levels(unpickled)) @@ -5709,12 +5909,9 @@ def test_get_loc(self): method='nearest') # 3 levels - index = MultiIndex(levels=[Index(lrange(4)), - Index(lrange(4)), - Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) self.assertRaises(KeyError, index.get_loc, (1, 1)) self.assertEqual(index.get_loc((2, 0)), slice(3, 5)) @@ -5728,15 +5925,12 @@ def test_get_loc_duplicates(self): index = Index(['c', 'a', 'a', 'b', 'b']) rs = index.get_loc('c') xp = 0 - assert(rs == xp) + assert (rs == xp) def test_get_loc_level(self): - index = MultiIndex(levels=[Index(lrange(4)), - Index(lrange(4)), - Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) loc, new_index = index.get_loc_level((0, 1)) expected = slice(1, 2) @@ -5751,9 +5945,8 @@ def test_get_loc_level(self): self.assertRaises(KeyError, index.get_loc_level, (2, 2)) - index = MultiIndex(levels=[[2000], lrange(4)], - labels=[np.array([0, 0, 0, 0]), - np.array([0, 1, 2, 3])]) + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array( + [0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) expected = slice(None, None) self.assertEqual(result, expected) @@ -5793,12 +5986,9 @@ def test_slice_locs_with_type_mismatch(self): idx.slice_locs(df.index[1], (16, "a")) def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(lrange(4)), - Index(lrange(4)), - Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) assertRaisesRegexp(KeyError, "[Kk]ey length.*greater than MultiIndex" " lexsort depth", index.slice_locs, (1, 0, 1), @@ -5829,8 +6019,7 @@ def test_slice_locs_not_contained(self): index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]], labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3], - [0, 1, 2, 1, 2, 2, 0, 1, 2]], - sortorder=0) + [0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0) result = index.slice_locs((1, 0), (5, 2)) self.assertEqual(result, (3, 6)) @@ -5941,8 +6130,8 @@ def test_get_indexer(self): idx1 = Index(lrange(10) + lrange(10)) idx2 = Index(lrange(20)) assertRaisesRegexp(InvalidIndexError, "Reindexing only valid with" - " uniquely valued Index objects", - idx1.get_indexer, idx2) + " uniquely valued Index objects", idx1.get_indexer, + idx2) def test_get_indexer_nearest(self): midx = MultiIndex.from_tuples([('a', 1), ('b', 2)]) @@ -5957,24 +6146,20 @@ def test_format(self): def test_format_integer_names(self): index = MultiIndex(levels=[[0, 1], [0, 1]], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[0, 1]) + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]) index.format(names=True) def test_format_sparse_display(self): index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]], - labels=[[0, 0, 0, 1, 1, 1], - [0, 0, 1, 0, 0, 1], - [0, 1, 0, 0, 1, 0], - [0, 0, 0, 0, 0, 0]]) + labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1], + [0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]]) result = index.format() self.assertEqual(result[3], '1 0 0 0') def test_format_sparse_config(self): warn_filters = warnings.filters - warnings.filterwarnings('ignore', - category=FutureWarning, + warnings.filterwarnings('ignore', category=FutureWarning, module=".*format") # GH1538 pd.set_option('display.multi_sparse', False) @@ -5987,8 +6172,8 @@ def test_format_sparse_config(self): warnings.filters = warn_filters def test_to_hierarchical(self): - index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), - (2, 'one'), (2, 'two')]) + index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two')]) result = index.to_hierarchical(3) expected = MultiIndex(levels=[[1, 2], ['one', 'two']], labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], @@ -6010,8 +6195,10 @@ def test_to_hierarchical(self): names=['N1', 'N2']) result = index.to_hierarchical(2) - expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), (1, 'b'), - (2, 'a'), (2, 'a'), (2, 'b'), (2, 'b')], + expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'), + (1, 'b'), + (2, 'a'), (2, 'a'), + (2, 'b'), (2, 'b')], names=['N1', 'N2']) tm.assert_index_equal(result, expected) self.assertEqual(result.names, index.names) @@ -6028,15 +6215,11 @@ def test_equals(self): self.assertTrue(self.index.equals(self.index._tuple_index)) # different number of levels - index = MultiIndex(levels=[Index(lrange(4)), - Index(lrange(4)), - Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0])]) - - index2 = MultiIndex(levels=index.levels[:-1], - labels=index.labels[:-1]) + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) + + index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1]) self.assertFalse(index.equals(index2)) self.assertFalse(index.equal_levels(index2)) @@ -6132,16 +6315,16 @@ def test_union(self): # result = self.index[:4] | tuples[4:] # self.assertTrue(result.equals(tuples)) - # not valid for python 3 - # def test_union_with_regular_index(self): - # other = Index(['A', 'B', 'C']) + # not valid for python 3 + # def test_union_with_regular_index(self): + # other = Index(['A', 'B', 'C']) - # result = other.union(self.index) - # self.assertIn(('foo', 'one'), result) - # self.assertIn('B', result) + # result = other.union(self.index) + # self.assertIn(('foo', 'one'), result) + # self.assertIn('B', result) - # result2 = self.index.union(other) - # self.assertTrue(result.equals(result2)) + # result2 = self.index.union(other) + # self.assertTrue(result.equals(result2)) def test_intersection(self): piece1 = self.index[:5][::-1] @@ -6179,7 +6362,7 @@ def test_difference(self): with tm.assert_produces_warning(): self.index[-3:] - first.tolist() - self.assertRaises(TypeError, lambda : first.tolist() - self.index[-3:]) + self.assertRaises(TypeError, lambda: first.tolist() - self.index[-3:]) expected = MultiIndex.from_tuples(sorted(self.index[:-3].values), sortorder=0, @@ -6228,9 +6411,8 @@ def test_difference(self): # name from non-empty array result = first.difference([('foo', 'one')]) - expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), - ('foo', 'two'), ('qux', 'one'), - ('qux', 'two')]) + expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ( + 'foo', 'two'), ('qux', 'one'), ('qux', 'two')]) expected.names = first.names self.assertEqual(first.names, result.names) assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" @@ -6357,13 +6539,10 @@ def test_droplevel_with_names(self): dropped = index.droplevel(0) self.assertEqual(dropped.name, 'second') - index = MultiIndex(levels=[Index(lrange(4)), - Index(lrange(4)), - Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) dropped = index.droplevel(0) self.assertEqual(dropped.names, ('two', 'three')) @@ -6372,13 +6551,10 @@ def test_droplevel_with_names(self): self.assertTrue(dropped.equals(expected)) def test_droplevel_multiple(self): - index = MultiIndex(levels=[Index(lrange(4)), - Index(lrange(4)), - Index(lrange(4))], - labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), - np.array([0, 1, 0, 0, 0, 1, 0, 1]), - np.array([1, 0, 1, 1, 0, 0, 1, 0])], - names=['one', 'two', 'three']) + index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index( + lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( + [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], + names=['one', 'two', 'three']) dropped = index[:2].droplevel(['three', 'one']) expected = index[:2].droplevel(2).droplevel(0) @@ -6393,14 +6569,14 @@ def test_insert(self): # key not contained in all levels new_index = self.index.insert(0, ('abc', 'three')) tm.assert_numpy_array_equal(new_index.levels[0], - list(self.index.levels[0]) + ['abc']) + list(self.index.levels[0]) + ['abc']) tm.assert_numpy_array_equal(new_index.levels[1], - list(self.index.levels[1]) + ['three']) + list(self.index.levels[1]) + ['three']) self.assertEqual(new_index[0], ('abc', 'three')) # key wrong length assertRaisesRegexp(ValueError, "Item must have length equal to number" - " of levels", self.index.insert, 0, ('foo2',)) + " of levels", self.index.insert, 0, ('foo2', )) left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]], columns=['1st', '2nd', '3rd']) @@ -6421,15 +6597,15 @@ def test_insert(self): ts.loc[('a', 'w')] = 5 ts.loc['a', 'a'] = 6 - right = pd.DataFrame([['a', 'b', 0], - ['b', 'd', 1], - ['b', 'x', 2], + right = pd.DataFrame([['a', 'b', 0], + ['b', 'd', 1], + ['b', 'x', 2], ['b', 'a', -1], - ['b', 'b', 3], - ['a', 'x', 4], - ['a', 'w', 5], - ['a', 'a', 6]], - columns=['1st', '2nd', '3rd']) + ['b', 'b', 3], + ['a', 'x', 4], + ['a', 'w', 5], + ['a', 'a', 6]], + columns=['1st', '2nd', '3rd']) right.set_index(['1st', '2nd'], inplace=True) # FIXME data types changes to float because # of intermediate nan insertion; @@ -6438,8 +6614,8 @@ def test_insert(self): # GH9250 idx = [('test1', i) for i in range(5)] + \ - [('test2', i) for i in range(6)] + \ - [('test', 17), ('test', 18)] + [('test2', i) for i in range(6)] + \ + [('test', 17), ('test', 18)] left = pd.Series(np.linspace(0, 10, 11), pd.MultiIndex.from_tuples(idx[:-2])) @@ -6509,12 +6685,14 @@ def test_join_self(self): def test_join_multi(self): # GH 10665 - midx = pd.MultiIndex.from_product([np.arange(4), np.arange(4)], names=['a', 'b']) + midx = pd.MultiIndex.from_product( + [np.arange(4), np.arange(4)], names=['a', 'b']) idx = pd.Index([1, 2, 5], name='b') # inner jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True) - exp_idx = pd.MultiIndex.from_product([np.arange(4), [1, 2]], names=['a', 'b']) + exp_idx = pd.MultiIndex.from_product( + [np.arange(4), [1, 2]], names=['a', 'b']) exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14]) exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1]) self.assert_index_equal(jidx, exp_idx) @@ -6528,7 +6706,8 @@ def test_join_multi(self): # keep MultiIndex jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True) - exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1]) + exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0, + 1, -1]) self.assert_index_equal(jidx, midx) self.assertIsNone(lidx) self.assert_numpy_array_equal(ridx, exp_ridx) @@ -6569,16 +6748,15 @@ def test_reindex_level(self): self.index.reindex, self.index, method='pad', level='second') - assertRaisesRegexp(TypeError, "Fill method not supported", - idx.reindex, idx, method='bfill', level='first') + assertRaisesRegexp(TypeError, "Fill method not supported", idx.reindex, + idx, method='bfill', level='first') def test_duplicates(self): self.assertFalse(self.index.has_duplicates) self.assertTrue(self.index.append(self.index).has_duplicates) - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) self.assertTrue(index.has_duplicates) # GH 9075 @@ -6613,7 +6791,7 @@ def check(nlevels, with_nulls): labels[500] = -1 # common nan value labels = list(labels.copy() for i in range(nlevels)) for i in range(nlevels): - labels[i][500 + i - nlevels // 2 ] = -1 + labels[i][500 + i - nlevels // 2] = -1 labels += [np.array([-1, 1]).repeat(500)] else: @@ -6660,7 +6838,8 @@ def check(nlevels, with_nulls): mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]]) self.assertFalse(mi.has_duplicates) self.assertEqual(mi.get_duplicates(), []) - tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype='bool')) + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + 2, dtype='bool')) for n in range(1, 6): # 1st level shape for m in range(1, 5): # 2nd level shape @@ -6671,19 +6850,17 @@ def check(nlevels, with_nulls): self.assertEqual(len(mi), (n + 1) * (m + 1)) self.assertFalse(mi.has_duplicates) self.assertEqual(mi.get_duplicates(), []) - tm.assert_numpy_array_equal(mi.duplicated(), - np.zeros(len(mi), dtype='bool')) + tm.assert_numpy_array_equal(mi.duplicated(), np.zeros( + len(mi), dtype='bool')) def test_duplicate_meta_data(self): # GH 10115 - index = MultiIndex(levels=[[0, 1], [0, 1, 2]], - labels=[[0, 0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 0, 1, 2]]) + index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[ + [0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]) for idx in [index, index.set_names([None, None]), index.set_names([None, 'Num']), - index.set_names(['Upper','Num']), - ]: + index.set_names(['Upper', 'Num']), ]: self.assertTrue(idx.has_duplicates) self.assertEqual(idx.drop_duplicates().names, idx.names) @@ -6693,10 +6870,11 @@ def test_tolist(self): self.assertEqual(result, exp) def test_repr_with_unicode_data(self): - with pd.core.config.option_context("display.encoding",'UTF-8'): + with pd.core.config.option_context("display.encoding", 'UTF-8'): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index - self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped + self.assertFalse("\\u" in repr(index) + ) # we don't want unicode-escaped def test_repr_roundtrip(self): @@ -6710,10 +6888,13 @@ def test_repr_roundtrip(self): result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) - self.assertEqual(mi.get_level_values('first').inferred_type, 'string') - self.assertEqual(result.get_level_values('first').inferred_type, 'unicode') + self.assertEqual( + mi.get_level_values('first').inferred_type, 'string') + self.assertEqual( + result.get_level_values('first').inferred_type, 'unicode') - mi_u = MultiIndex.from_product([list(u'ab'),range(3)],names=['first','second']) + mi_u = MultiIndex.from_product( + [list(u'ab'), range(3)], names=['first', 'second']) result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True) @@ -6734,10 +6915,13 @@ def test_repr_roundtrip(self): result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) - self.assertEqual(mi.get_level_values('first').inferred_type, 'string') - self.assertEqual(result.get_level_values('first').inferred_type, 'unicode') + self.assertEqual( + mi.get_level_values('first').inferred_type, 'string') + self.assertEqual( + result.get_level_values('first').inferred_type, 'unicode') - mi = MultiIndex.from_product([list(u'abcdefg'),range(10)],names=['first','second']) + mi = MultiIndex.from_product( + [list(u'abcdefg'), range(10)], names=['first', 'second']) result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True) @@ -6777,8 +6961,8 @@ def test_isnull_behavior(self): def test_level_setting_resets_attributes(self): ind = MultiIndex.from_arrays([ - ['A', 'A', 'B', 'B', 'B'], - [1, 2, 1, 2, 3]]) + ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] + ]) assert ind.is_monotonic ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], inplace=True) @@ -6788,8 +6972,8 @@ def test_level_setting_resets_attributes(self): def test_isin(self): values = [('foo', 2), ('bar', 3), ('quux', 4)] - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], - np.arange(4)]) + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) result = idx.isin(values) expected = np.array([False, False, True, True]) tm.assert_numpy_array_equal(result, expected) @@ -6803,13 +6987,13 @@ def test_isin(self): def test_isin_nan(self): idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), - [False, False]) + [False, False]) tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), - [False, False]) + [False, False]) def test_isin_level_kwarg(self): - idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], - np.arange(4)]) + idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( + 4)]) vals_0 = ['foo', 'bar', 'quux'] vals_1 = [2, 3, 10] @@ -6847,16 +7031,20 @@ def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None]) self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None]) self.assertEqual(idx.reindex(target.values)[0].names, [None, None]) - self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, [None, None]) - self.assertEqual(idx.reindex(other_dtype.values)[0].names, [None, None]) + self.assertEqual( + idx.reindex(other_dtype.tolist())[0].names, [None, None]) + self.assertEqual( + idx.reindex(other_dtype.values)[0].names, [None, None]) idx.names = ['foo', 'bar'] self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar']) self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar']) self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar']) self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) - self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) + self.assertEqual( + idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) + self.assertEqual( + idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self): # GH7774 @@ -6905,8 +7093,7 @@ def test_equals_operator(self): def test_get_combined_index(): from pandas.core.index import _get_combined_index result = _get_combined_index([]) - assert(result.equals(Index([]))) - + assert (result.equals(Index([]))) if __name__ == '__main__': diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 5c3e4c01a965a..fc7a57ae2f179 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -30,7 +30,7 @@ _verbose = False -#------------------------------------------------------------------------------- +# ------------------------------------------------------------------------ # Indexing test cases @@ -42,10 +42,11 @@ def _generate_indices(f, values=False): axes = f.axes if values: - axes = [ lrange(len(a)) for a in axes ] + axes = [lrange(len(a)) for a in axes] return itertools.product(*axes) + def _get_value(f, i, values=False): """ return the value for the location i """ @@ -54,12 +55,13 @@ def _get_value(f, i, values=False): return f.values[i] # this is equiv of f[col][row]..... - #v = f - #for a in reversed(i): + # v = f + # for a in reversed(i): # v = v.__getitem__(a) - #return v + # return v return f.ix[i] + def _get_result(obj, method, key, axis): """ return the result for this obj with this key and this axis """ @@ -70,81 +72,90 @@ def _get_result(obj, method, key, axis): # so ix can work for comparisions if method == 'indexer': method = 'ix' - key = obj._get_axis(axis)[key] + key = obj._get_axis(axis)[key] # in case we actually want 0 index slicing try: - xp = getattr(obj, method).__getitem__(_axify(obj,key,axis)) + xp = getattr(obj, method).__getitem__(_axify(obj, key, axis)) except: - xp = getattr(obj, method).__getitem__(key) + xp = getattr(obj, method).__getitem__(key) return xp + def _axify(obj, key, axis): # create a tuple accessor - if axis is not None: - axes = [ slice(None) ] * obj.ndim - axes[axis] = key - return tuple(axes) - return k + axes = [slice(None)] * obj.ndim + axes[axis] = key + return tuple(axes) -def _mklbl(prefix,n): - return ["%s%s" % (prefix,i) for i in range(n)] +def _mklbl(prefix, n): + return ["%s%s" % (prefix, i) for i in range(n)] + class TestIndexing(tm.TestCase): _multiprocess_can_split_ = True - _objs = set(['series','frame','panel']) - _typs = set(['ints','labels','mixed','ts','floats','empty']) + _objs = set(['series', 'frame', 'panel']) + _typs = set(['ints', 'labels', 'mixed', 'ts', 'floats', 'empty']) def setUp(self): import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) - self.series_ints = Series(np.random.rand(4), index=lrange(0,8,2)) - self.frame_ints = DataFrame(np.random.randn(4, 4), index=lrange(0, 8, 2), columns=lrange(0,12,3)) - self.panel_ints = Panel(np.random.rand(4,4,4), items=lrange(0,8,2),major_axis=lrange(0,12,3),minor_axis=lrange(0,16,4)) + self.series_ints = Series(np.random.rand(4), index=lrange(0, 8, 2)) + self.frame_ints = DataFrame( + np.random.randn( + 4, 4), index=lrange(0, 8, 2), columns=lrange(0, 12, 3)) + self.panel_ints = Panel( + np.random.rand(4, 4, 4), items=lrange(0, 8, 2), + major_axis=lrange(0, 12, 3), minor_axis=lrange(0, 16, 4)) self.series_labels = Series(np.random.randn(4), index=list('abcd')) - self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) - self.panel_labels = Panel(np.random.randn(4,4,4), items=list('abcd'), major_axis=list('ABCD'), minor_axis=list('ZYXW')) - - self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) - self.frame_mixed = DataFrame(np.random.randn(4, 4), index=[2, 4, 'null', 8]) - self.panel_mixed = Panel(np.random.randn(4,4,4), items=[2,4,'null',8]) - - self.series_ts = Series(np.random.randn(4), index=date_range('20130101', periods=4)) - self.frame_ts = DataFrame(np.random.randn(4, 4), index=date_range('20130101', periods=4)) - self.panel_ts = Panel(np.random.randn(4, 4, 4), items=date_range('20130101', periods=4)) - - #self.series_floats = Series(np.random.randn(4), index=[1.00, 2.00, 3.00, 4.00]) - #self.frame_floats = DataFrame(np.random.randn(4, 4), columns=[1.00, 2.00, 3.00, 4.00]) - #self.panel_floats = Panel(np.random.rand(4,4,4), items = [1.00,2.00,3.00,4.00]) - - self.frame_empty = DataFrame({}) - self.series_empty = Series({}) - self.panel_empty = Panel({}) + self.frame_labels = DataFrame( + np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) + self.panel_labels = Panel( + np.random.randn(4, 4, 4), items=list('abcd'), + major_axis=list('ABCD'), minor_axis=list('ZYXW')) + + self.series_mixed = Series(np.random.randn(4), index=[2, 4, 'null', 8]) + self.frame_mixed = DataFrame( + np.random.randn(4, 4), index=[2, 4, 'null', 8]) + self.panel_mixed = Panel( + np.random.randn(4, 4, 4), items=[2, 4, 'null', 8]) + + self.series_ts = Series( + np.random.randn(4), index=date_range('20130101', periods=4)) + self.frame_ts = DataFrame( + np.random.randn(4, 4), index=date_range('20130101', periods=4)) + self.panel_ts = Panel( + np.random.randn(4, 4, 4), items=date_range('20130101', periods=4)) + + self.frame_empty = DataFrame({}) + self.series_empty = Series({}) + self.panel_empty = Panel({}) # form agglomerates for o in self._objs: d = dict() for t in self._typs: - d[t] = getattr(self,'%s_%s' % (o,t),None) + d[t] = getattr(self, '%s_%s' % (o, t), None) - setattr(self,o,d) + setattr(self, o, d) - def check_values(self, f, func, values = False): + def check_values(self, f, func, values=False): - if f is None: return + if f is None: + return axes = f.axes indicies = itertools.product(*axes) for i in indicies: - result = getattr(f,func)[i] + result = getattr(f, func)[i] # check agains values if values: @@ -156,33 +167,32 @@ def check_values(self, f, func, values = False): assert_almost_equal(result, expected) - - def check_result(self, name, method1, key1, method2, key2, typs = None, objs = None, axes = None, fails = None): - - + def check_result(self, name, method1, key1, method2, key2, typs=None, + objs=None, axes=None, fails=None): def _eq(t, o, a, obj, k1, k2): """ compare equal for these 2 keys """ - if a is not None and a > obj.ndim-1: + if a is not None and a > obj.ndim - 1: return - def _print(result, error = None): + def _print(result, error=None): if error is not None: error = str(error) - v = "%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % (name,result,t,o,method1,method2,a,error or '') + v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s," + "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" % + (name, result, t, o, method1, method2, a, error or '')) if _verbose: com.pprint_thing(v) try: - - ### good debug location ### - #if name == 'bool' and t == 'empty' and o == 'series' and method1 == 'loc': + # if (name == 'bool' and t == 'empty' and o == 'series' and + # method1 == 'loc'): # import pdb; pdb.set_trace() - rs = getattr(obj, method1).__getitem__(_axify(obj,k1,a)) + rs = getattr(obj, method1).__getitem__(_axify(obj, k1, a)) try: - xp = _get_result(obj,method2,k2,a) + xp = _get_result(obj, method2, k2, a) except: result = 'no comp' _print(result) @@ -192,11 +202,11 @@ def _print(result, error = None): if np.isscalar(rs) and np.isscalar(xp): self.assertEqual(rs, xp) elif xp.ndim == 1: - assert_series_equal(rs,xp) + assert_series_equal(rs, xp) elif xp.ndim == 2: - assert_frame_equal(rs,xp) + assert_frame_equal(rs, xp) elif xp.ndim == 3: - assert_panel_equal(rs,xp) + assert_panel_equal(rs, xp) result = 'ok' except (AssertionError): result = 'fail' @@ -223,7 +233,7 @@ def _print(result, error = None): return result = type(detail).__name__ - raise AssertionError(_print(result, error = detail)) + raise AssertionError(_print(result, error=detail)) if typs is None: typs = self._typs @@ -232,19 +242,19 @@ def _print(result, error = None): objs = self._objs if axes is not None: - if not isinstance(axes,(tuple,list)): - axes = [ axes ] + if not isinstance(axes, (tuple, list)): + axes = [axes] else: axes = list(axes) else: - axes = [ 0, 1, 2] + axes = [0, 1, 2] # check for o in objs: if o not in self._objs: continue - d = getattr(self,o) + d = getattr(self, o) for a in axes: for t in typs: if t not in self._typs: @@ -269,61 +279,59 @@ def test_indexer_caching(self): # setitem expected = Series(np.ones(n), index=index) s = Series(np.zeros(n), index=index) - s[s==0] = 1 - assert_series_equal(s,expected) + s[s == 0] = 1 + assert_series_equal(s, expected) def test_at_and_iat_get(self): - - def _check(f, func, values = False): + def _check(f, func, values=False): if f is not None: indicies = _generate_indices(f, values) for i in indicies: - result = getattr(f,func)[i] - expected = _get_value(f,i,values) + result = getattr(f, func)[i] + expected = _get_value(f, i, values) assert_almost_equal(result, expected) for o in self._objs: - d = getattr(self,o) + d = getattr(self, o) # iat - _check(d['ints'],'iat', values=True) - for f in [d['labels'],d['ts'],d['floats']]: + _check(d['ints'], 'iat', values=True) + for f in [d['labels'], d['ts'], d['floats']]: if f is not None: self.assertRaises(ValueError, self.check_values, f, 'iat') # at - _check(d['ints'], 'at') - _check(d['labels'],'at') - _check(d['ts'], 'at') - _check(d['floats'],'at') + _check(d['ints'], 'at') + _check(d['labels'], 'at') + _check(d['ts'], 'at') + _check(d['floats'], 'at') def test_at_and_iat_set(self): - - def _check(f, func, values = False): + def _check(f, func, values=False): if f is not None: indicies = _generate_indices(f, values) for i in indicies: - getattr(f,func)[i] = 1 - expected = _get_value(f,i,values) + getattr(f, func)[i] = 1 + expected = _get_value(f, i, values) assert_almost_equal(expected, 1) for t in self._objs: - d = getattr(self,t) + d = getattr(self, t) - _check(d['ints'],'iat',values=True) - for f in [d['labels'],d['ts'],d['floats']]: + _check(d['ints'], 'iat', values=True) + for f in [d['labels'], d['ts'], d['floats']]: if f is not None: self.assertRaises(ValueError, _check, f, 'iat') # at - _check(d['ints'], 'at') - _check(d['labels'],'at') - _check(d['ts'], 'at') - _check(d['floats'],'at') + _check(d['ints'], 'at') + _check(d['labels'], 'at') + _check(d['ts'], 'at') + _check(d['floats'], 'at') def test_at_iat_coercion(self): @@ -333,7 +341,7 @@ def test_at_iat_coercion(self): s = df['A'] result = s.at[dates[5]] - xp = s.values[5] + xp = s.values[5] self.assertEqual(result, xp) # GH 7729 @@ -341,14 +349,14 @@ def test_at_iat_coercion(self): s = Series(['2014-01-01', '2014-02-02'], dtype='datetime64[ns]') expected = Timestamp('2014-02-02') - for r in [ lambda : s.iat[1], lambda : s.iloc[1] ]: + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: result = r() self.assertEqual(result, expected) - s = Series(['1 days','2 days'], dtype='timedelta64[ns]') + s = Series(['1 days', '2 days'], dtype='timedelta64[ns]') expected = Timedelta('2 days') - for r in [ lambda : s.iat[1], lambda : s.iloc[1] ]: + for r in [lambda: s.iat[1], lambda: s.iloc[1]]: result = r() self.assertEqual(result, expected) @@ -360,149 +368,180 @@ def test_imethods_with_dups(self): # GH6493 # iat/iloc with dups - s = Series(range(5), index=[1,1,2,2,3], dtype='int64') + s = Series(range(5), index=[1, 1, 2, 2, 3], dtype='int64') result = s.iloc[2] - self.assertEqual(result,2) + self.assertEqual(result, 2) result = s.iat[2] - self.assertEqual(result,2) + self.assertEqual(result, 2) - self.assertRaises(IndexError, lambda : s.iat[10]) - self.assertRaises(IndexError, lambda : s.iat[-10]) + self.assertRaises(IndexError, lambda: s.iat[10]) + self.assertRaises(IndexError, lambda: s.iat[-10]) - result = s.iloc[[2,3]] - expected = Series([2,3],[2,2],dtype='int64') - assert_series_equal(result,expected) + result = s.iloc[[2, 3]] + expected = Series([2, 3], [2, 2], dtype='int64') + assert_series_equal(result, expected) df = s.to_frame() result = df.iloc[2] expected = Series(2, index=[0], name=2) assert_series_equal(result, expected) - result = df.iat[2,0] + result = df.iat[2, 0] expected = 2 - self.assertEqual(result,2) + self.assertEqual(result, 2) def test_repeated_getitem_dups(self): # GH 5678 # repeated gettitems on a dup index returing a ndarray - df = DataFrame(np.random.random_sample((20,5)), index=['ABCDE'[x%5] for x in range(20)]) - expected = df.loc['A',0] - result = df.loc[:,0].loc['A'] - assert_series_equal(result,expected) + df = DataFrame( + np.random.random_sample((20, 5)), + index=['ABCDE' [x % 5] for x in range(20)]) + expected = df.loc['A', 0] + result = df.loc[:, 0].loc['A'] + assert_series_equal(result, expected) def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds - df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE')) + df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE')) expected = df # lists of positions should raise IndexErrror! - with tm.assertRaisesRegexp(IndexError, 'positional indexers are out-of-bounds'): - df.iloc[:,[0,1,2,3,4,5]] - self.assertRaises(IndexError, lambda : df.iloc[[1,30]]) - self.assertRaises(IndexError, lambda : df.iloc[[1,-30]]) - self.assertRaises(IndexError, lambda : df.iloc[[100]]) + with tm.assertRaisesRegexp(IndexError, + 'positional indexers are out-of-bounds'): + df.iloc[:, [0, 1, 2, 3, 4, 5]] + self.assertRaises(IndexError, lambda: df.iloc[[1, 30]]) + self.assertRaises(IndexError, lambda: df.iloc[[1, -30]]) + self.assertRaises(IndexError, lambda: df.iloc[[100]]) s = df['A'] - self.assertRaises(IndexError, lambda : s.iloc[[100]]) - self.assertRaises(IndexError, lambda : s.iloc[[-100]]) + self.assertRaises(IndexError, lambda: s.iloc[[100]]) + self.assertRaises(IndexError, lambda: s.iloc[[-100]]) # still raise on a single indexer - with tm.assertRaisesRegexp(IndexError, 'single positional indexer is out-of-bounds'): + with tm.assertRaisesRegexp( + IndexError, 'single positional indexer is out-of-bounds'): df.iloc[30] - self.assertRaises(IndexError, lambda : df.iloc[-30]) + self.assertRaises(IndexError, lambda: df.iloc[-30]) # GH10779 - # single positive/negative indexer exceeding Series bounds should raise an IndexError - with tm.assertRaisesRegexp(IndexError, 'single positional indexer is out-of-bounds'): + # single positive/negative indexer exceeding Series bounds should raise + # an IndexError + with tm.assertRaisesRegexp( + IndexError, 'single positional indexer is out-of-bounds'): s.iloc[30] - self.assertRaises(IndexError, lambda : s.iloc[-30]) + self.assertRaises(IndexError, lambda: s.iloc[-30]) # slices are ok - result = df.iloc[:,4:10] # 0 < start < len < stop - expected = df.iloc[:,4:] - assert_frame_equal(result,expected) + result = df.iloc[:, 4:10] # 0 < start < len < stop + expected = df.iloc[:, 4:] + assert_frame_equal(result, expected) - result = df.iloc[:,-4:-10] # stop < 0 < start < len - expected = df.iloc[:,:0] - assert_frame_equal(result,expected) + result = df.iloc[:, -4:-10] # stop < 0 < start < len + expected = df.iloc[:, :0] + assert_frame_equal(result, expected) - result = df.iloc[:,10:4:-1] # 0 < stop < len < start (down) - expected = df.iloc[:,:4:-1] - assert_frame_equal(result,expected) + result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) + expected = df.iloc[:, :4:-1] + assert_frame_equal(result, expected) - result = df.iloc[:,4:-10:-1] # stop < 0 < start < len (down) - expected = df.iloc[:,4::-1] - assert_frame_equal(result,expected) + result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) + expected = df.iloc[:, 4::-1] + assert_frame_equal(result, expected) - result = df.iloc[:,-10:4] # start < 0 < stop < len - expected = df.iloc[:,:4] - assert_frame_equal(result,expected) + result = df.iloc[:, -10:4] # start < 0 < stop < len + expected = df.iloc[:, :4] + assert_frame_equal(result, expected) - result = df.iloc[:,10:4] # 0 < stop < len < start - expected = df.iloc[:,:0] - assert_frame_equal(result,expected) + result = df.iloc[:, 10:4] # 0 < stop < len < start + expected = df.iloc[:, :0] + assert_frame_equal(result, expected) - result = df.iloc[:,-10:-11:-1] # stop < start < 0 < len (down) - expected = df.iloc[:,:0] - assert_frame_equal(result,expected) + result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) + expected = df.iloc[:, :0] + assert_frame_equal(result, expected) - result = df.iloc[:,10:11] # 0 < len < start < stop - expected = df.iloc[:,:0] - assert_frame_equal(result,expected) + result = df.iloc[:, 10:11] # 0 < len < start < stop + expected = df.iloc[:, :0] + assert_frame_equal(result, expected) # slice bounds exceeding is ok result = s.iloc[18:30] expected = s.iloc[18:] - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s.iloc[30:] expected = s.iloc[:0] - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s.iloc[30::-1] expected = s.iloc[::-1] - assert_series_equal(result,expected) + assert_series_equal(result, expected) # doc example - def check(result,expected): + def check(result, expected): str(result) result.dtypes - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) - dfl = DataFrame(np.random.randn(5,2),columns=list('AB')) - check(dfl.iloc[:,2:3],DataFrame(index=dfl.index)) - check(dfl.iloc[:,1:3],dfl.iloc[:,[1]]) - check(dfl.iloc[4:6],dfl.iloc[[4]]) + dfl = DataFrame(np.random.randn(5, 2), columns=list('AB')) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) + check(dfl.iloc[4:6], dfl.iloc[[4]]) - self.assertRaises(IndexError, lambda : dfl.iloc[[4,5,6]]) - self.assertRaises(IndexError, lambda : dfl.iloc[:,4]) + self.assertRaises(IndexError, lambda: dfl.iloc[[4, 5, 6]]) + self.assertRaises(IndexError, lambda: dfl.iloc[:, 4]) def test_iloc_getitem_int(self): # integer - self.check_result('integer', 'iloc', 2, 'ix', { 0 : 4, 1: 6, 2: 8 }, typs = ['ints']) - self.check_result('integer', 'iloc', 2, 'indexer', 2, typs = ['labels','mixed','ts','floats','empty'], fails = IndexError) + self.check_result('integer', 'iloc', 2, 'ix', {0: 4, + 1: 6, + 2: 8}, typs=['ints']) + self.check_result('integer', 'iloc', 2, 'indexer', 2, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) def test_iloc_getitem_neg_int(self): # neg integer - self.check_result('neg int', 'iloc', -1, 'ix', { 0 : 6, 1: 9, 2: 12 }, typs = ['ints']) - self.check_result('neg int', 'iloc', -1, 'indexer', -1, typs = ['labels','mixed','ts','floats','empty'], fails = IndexError) + self.check_result('neg int', 'iloc', -1, 'ix', {0: 6, + 1: 9, + 2: 12}, typs=['ints']) + self.check_result('neg int', 'iloc', -1, 'indexer', -1, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) def test_iloc_getitem_list_int(self): # list of ints - self.check_result('list int', 'iloc', [0,1,2], 'ix', { 0 : [0,2,4], 1 : [0,3,6], 2: [0,4,8] }, typs = ['ints']) - self.check_result('list int', 'iloc', [2], 'ix', { 0 : [4], 1 : [6], 2: [8] }, typs = ['ints']) - self.check_result('list int', 'iloc', [0,1,2], 'indexer', [0,1,2], typs = ['labels','mixed','ts','floats','empty'], fails = IndexError) - - # array of ints - # (GH5006), make sure that a single indexer is returning the correct type - self.check_result('array int', 'iloc', np.array([0,1,2]), 'ix', { 0 : [0,2,4], 1 : [0,3,6], 2: [0,4,8] }, typs = ['ints']) - self.check_result('array int', 'iloc', np.array([2]), 'ix', { 0 : [4], 1 : [6], 2: [8] }, typs = ['ints']) - self.check_result('array int', 'iloc', np.array([0,1,2]), 'indexer', [0,1,2], typs = ['labels','mixed','ts','floats','empty'], fails = IndexError) + self.check_result('list int', 'iloc', [0, 1, 2], 'ix', {0: [0, 2, 4], + 1: [0, 3, 6], + 2: [0, 4, 8]}, + typs=['ints']) + self.check_result('list int', 'iloc', [2], 'ix', {0: [4], + 1: [6], + 2: [8]}, + typs=['ints']) + self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) + + # array of ints (GH5006), make sure that a single indexer is returning + # the correct type + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix', + {0: [0, 2, 4], + 1: [0, 3, 6], + 2: [0, 4, 8]}, typs=['ints']) + self.check_result('array int', 'iloc', np.array([2]), 'ix', {0: [4], + 1: [6], + 2: [8]}, + typs=['ints']) + self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer', + [0, 1, 2], + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) def test_iloc_getitem_neg_int_can_reach_first_index(self): # GH10547 and GH10779 @@ -534,160 +573,176 @@ def test_iloc_getitem_neg_int_can_reach_first_index(self): def test_iloc_getitem_dups(self): # no dups in panel (bug?) - self.check_result('list int (dups)', 'iloc', [0,1,1,3], 'ix', { 0 : [0,2,2,6], 1 : [0,3,3,9] }, objs = ['series','frame'], typs = ['ints']) + self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix', + {0: [0, 2, 2, 6], + 1: [0, 3, 3, 9 + ]}, objs=['series', 'frame'], typs=['ints']) # GH 6766 - df1 = DataFrame([{'A':None, 'B':1},{'A':2, 'B':2}]) - df2 = DataFrame([{'A':3, 'B':3},{'A':4, 'B':4}]) + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) df = concat([df1, df2], axis=1) # cross-sectional indexing - result = df.iloc[0,0] + result = df.iloc[0, 0] self.assertTrue(isnull(result)) - result = df.iloc[0,:] - expected = Series([np.nan, 1, 3, 3], index=['A','B','A','B'], name=0) - assert_series_equal(result,expected) + result = df.iloc[0, :] + expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], + name=0) + assert_series_equal(result, expected) def test_iloc_getitem_array(self): # array like - s = Series(index=lrange(1,4)) - self.check_result('array like', 'iloc', s.index, 'ix', { 0 : [2,4,6], 1 : [3,6,9], 2: [4,8,12] }, typs = ['ints']) + s = Series(index=lrange(1, 4)) + self.check_result('array like', 'iloc', s.index, 'ix', {0: [2, 4, 6], + 1: [3, 6, 9], + 2: [4, 8, 12]}, + typs=['ints']) def test_iloc_getitem_bool(self): # boolean indexers - b = [True,False,True,False,] - self.check_result('bool', 'iloc', b, 'ix', b, typs = ['ints']) - self.check_result('bool', 'iloc', b, 'ix', b, typs = ['labels','mixed','ts','floats','empty'], fails = IndexError) + b = [True, False, True, False, ] + self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints']) + self.check_result('bool', 'iloc', b, 'ix', b, + typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) def test_iloc_getitem_slice(self): # slices - self.check_result('slice', 'iloc', slice(1,3), 'ix', { 0 : [2,4], 1: [3,6], 2: [4,8] }, typs = ['ints']) - self.check_result('slice', 'iloc', slice(1,3), 'indexer', slice(1,3), typs = ['labels','mixed','ts','floats','empty'], fails = IndexError) + self.check_result('slice', 'iloc', slice(1, 3), 'ix', {0: [2, 4], + 1: [3, 6], + 2: [4, 8]}, + typs=['ints']) + self.check_result('slice', 'iloc', slice(1, 3), 'indexer', slice( + 1, 3), typs=['labels', 'mixed', 'ts', 'floats', 'empty'], + fails=IndexError) def test_iloc_getitem_slice_dups(self): - df1 = DataFrame(np.random.randn(10,4),columns=['A','A','B','B']) - df2 = DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C']) + df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B']) + df2 = DataFrame( + np.random.randint(0, 10, size=20).reshape(10, + 2), columns=['A', 'C']) # axis=1 - df = concat([df1,df2],axis=1) - assert_frame_equal(df.iloc[:,:4],df1) - assert_frame_equal(df.iloc[:,4:],df2) + df = concat([df1, df2], axis=1) + assert_frame_equal(df.iloc[:, :4], df1) + assert_frame_equal(df.iloc[:, 4:], df2) - df = concat([df2,df1],axis=1) - assert_frame_equal(df.iloc[:,:2],df2) - assert_frame_equal(df.iloc[:,2:],df1) + df = concat([df2, df1], axis=1) + assert_frame_equal(df.iloc[:, :2], df2) + assert_frame_equal(df.iloc[:, 2:], df1) - assert_frame_equal(df.iloc[:,0:3],concat([df2,df1.iloc[:,[0]]],axis=1)) + assert_frame_equal(df.iloc[:, 0:3], concat( + [df2, df1.iloc[:, [0]]], axis=1)) # axis=0 - df = concat([df,df],axis=0) - assert_frame_equal(df.iloc[0:10,:2],df2) - assert_frame_equal(df.iloc[0:10,2:],df1) - assert_frame_equal(df.iloc[10:,:2],df2) - assert_frame_equal(df.iloc[10:,2:],df1) + df = concat([df, df], axis=0) + assert_frame_equal(df.iloc[0:10, :2], df2) + assert_frame_equal(df.iloc[0:10, 2:], df1) + assert_frame_equal(df.iloc[10:, :2], df2) + assert_frame_equal(df.iloc[10:, 2:], df1) - def test_iloc_getitem_multiindex(self): + def test_iloc_getitem_multiindex2(self): + # TODO(wesm): fix this + raise nose.SkipTest('this test was being suppressed, ' + 'needs to be fixed') arr = np.random.randn(3, 3) - df = DataFrame(arr, - columns=[[2,2,4],[6,8,10]], - index=[[4,4,8],[8,10,12]]) + df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) rs = df.iloc[2] - xp = Series(arr[2],index=df.columns) + xp = Series(arr[2], index=df.columns) assert_series_equal(rs, xp) - rs = df.iloc[:,2] - xp = Series(arr[:, 2],index=df.index) + rs = df.iloc[:, 2] + xp = Series(arr[:, 2], index=df.index) assert_series_equal(rs, xp) - rs = df.iloc[2,2] - xp = df.values[2,2] + rs = df.iloc[2, 2] + xp = df.values[2, 2] self.assertEqual(rs, xp) # for multiple items # GH 5528 - rs = df.iloc[[0,1]] - xp = df.xs(4,drop_level=False) - assert_frame_equal(rs,xp) + rs = df.iloc[[0, 1]] + xp = df.xs(4, drop_level=False) + assert_frame_equal(rs, xp) - tup = zip(*[['a','a','b','b'],['x','y','x','y']]) + tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) index = MultiIndex.from_tuples(tup) df = DataFrame(np.random.randn(4, 4), index=index) rs = df.iloc[[2, 3]] - xp = df.xs('b',drop_level=False) - assert_frame_equal(rs,xp) + xp = df.xs('b', drop_level=False) + assert_frame_equal(rs, xp) def test_iloc_setitem(self): df = self.frame_ints - df.iloc[1,1] = 1 - result = df.iloc[1,1] + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] self.assertEqual(result, 1) - df.iloc[:,2:3] = 0 - expected = df.iloc[:,2:3] - result = df.iloc[:,2:3] + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] assert_frame_equal(result, expected) # GH5771 - s = Series(0,index=[4,5,6]) + s = Series(0, index=[4, 5, 6]) s.iloc[1:2] += 1 - expected = Series([0,1,0],index=[4,5,6]) + expected = Series([0, 1, 0], index=[4, 5, 6]) assert_series_equal(s, expected) def test_ix_loc_setitem_consistency(self): # GH 5771 # loc with slice and series - s = Series(0,index=[4,5,6]) + s = Series(0, index=[4, 5, 6]) s.loc[4:5] += 1 - expected = Series([1,1,0],index=[4,5,6]) + expected = Series([1, 1, 0], index=[4, 5, 6]) assert_series_equal(s, expected) # GH 5928 # chained indexing assignment - df = DataFrame({'a' : [0,1,2] }) + df = DataFrame({'a': [0, 1, 2]}) expected = df.copy() - expected.ix[[0,1,2],'a'] = -expected.ix[[0,1,2],'a'] + expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a'] - df['a'].ix[[0,1,2]] = -df['a'].ix[[0,1,2]] - assert_frame_equal(df,expected) + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]] + assert_frame_equal(df, expected) - df = DataFrame({'a' : [0,1,2], 'b' :[0,1,2] }) - df['a'].ix[[0,1,2]] = -df['a'].ix[[0,1,2]].astype('float64') + 0.5 - expected = DataFrame({'a' : [0.5,-0.5,-1.5], 'b' : [0,1,2] }) - assert_frame_equal(df,expected) + df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]}) + df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype('float64') + 0.5 + expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]}) + assert_frame_equal(df, expected) # GH 8607 # ix setitem consistency - df = DataFrame( - {'timestamp':[1413840976, 1413842580, 1413760580], - 'delta':[1174, 904, 161], - 'elapsed':[7673, 9277, 1470] - }) - expected = DataFrame( - {'timestamp':pd.to_datetime([1413840976, 1413842580, 1413760580], unit='s'), - 'delta':[1174, 904, 161], - 'elapsed':[7673, 9277, 1470] - }) + df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], + 'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470]}) + expected = DataFrame({'timestamp': pd.to_datetime( + [1413840976, 1413842580, 1413760580], unit='s'), + 'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470]}) df2 = df.copy() df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - assert_frame_equal(df2,expected) + assert_frame_equal(df2, expected) df2 = df.copy() - df2.loc[:,'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') - assert_frame_equal(df2,expected) + df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s') + assert_frame_equal(df2, expected) df2 = df.copy() - df2.ix[:,2] = pd.to_datetime(df['timestamp'], unit='s') - assert_frame_equal(df2,expected) + df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s') + assert_frame_equal(df2, expected) def test_ix_loc_consistency(self): @@ -702,26 +757,29 @@ def compare(result, expected): self.assertTrue(expected.equals(result)) # failure cases for .loc, but these work for .ix - df = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD')) - for key in [ slice(1,3), tuple([slice(0,2),slice(0,2)]), tuple([slice(0,2),df.columns[0:2]]) ]: + df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) + for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]), + tuple([slice(0, 2), df.columns[0:2]])]: - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex, tm.makeTimedeltaIndex ]: + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex, + tm.makeTimedeltaIndex]: df.index = index(len(df.index)) df.ix[key] - self.assertRaises(TypeError, lambda : df.loc[key]) + self.assertRaises(TypeError, lambda: df.loc[key]) - df = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=pd.date_range('2012-01-01', periods=5)) + df = pd.DataFrame( + np.random.randn(5, 4), columns=list('ABCD'), + index=pd.date_range('2012-01-01', periods=5)) - for key in [ '2012-01-03', - '2012-01-31', - slice('2012-01-03','2012-01-03'), - slice('2012-01-03','2012-01-04'), - slice('2012-01-03','2012-01-06',2), - slice('2012-01-03','2012-01-31'), - tuple([[True,True,True,False,True]]), - ]: + for key in ['2012-01-03', + '2012-01-31', + slice('2012-01-03', '2012-01-03'), + slice('2012-01-03', '2012-01-04'), + slice('2012-01-03', '2012-01-06', 2), + slice('2012-01-03', '2012-01-31'), + tuple([[True, True, True, False, True]]), ]: # getitem @@ -729,7 +787,7 @@ def compare(result, expected): try: expected = df.ix[key] except KeyError: - self.assertRaises(KeyError, lambda : df.loc[key]) + self.assertRaises(KeyError, lambda: df.loc[key]) continue result = df.loc[key] @@ -744,27 +802,28 @@ def compare(result, expected): compare(df2, df1) # edge cases - s = Series([1,2,3,4], index=list('abde')) + s = Series([1, 2, 3, 4], index=list('abde')) result1 = s['a':'c'] result2 = s.ix['a':'c'] result3 = s.loc['a':'c'] - assert_series_equal(result1,result2) - assert_series_equal(result1,result3) + assert_series_equal(result1, result2) + assert_series_equal(result1, result3) # now work rather than raising KeyError - s = Series(range(5),[-2,-1,1,2,3]) + s = Series(range(5), [-2, -1, 1, 2, 3]) result1 = s.ix[-10:3] result2 = s.loc[-10:3] - assert_series_equal(result1,result2) + assert_series_equal(result1, result2) result1 = s.ix[0:3] result2 = s.loc[0:3] - assert_series_equal(result1,result2) + assert_series_equal(result1, result2) def test_setitem_multiindex(self): for index_fn in ('ix', 'loc'): + def check(target, indexers, value, compare_fn, expected=None): fn = getattr(target, index_fn) fn.__setitem__(indexers, value) @@ -773,33 +832,36 @@ def check(target, indexers, value, compare_fn, expected=None): expected = value compare_fn(result, expected) # GH7190 - index = pd.MultiIndex.from_product([np.arange(0,100), np.arange(0, 80)], names=['time', 'firm']) + index = pd.MultiIndex.from_product( + [np.arange(0, 100), np.arange(0, 80)], names=['time', 'firm']) t, n = 0, 2 - df = DataFrame(np.nan,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - check( - target=df, indexers=((t,n), 'X'), - value=0, compare_fn=self.assertEqual - ) - - df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - check( - target=df, indexers=((t,n), 'X'), - value=1, compare_fn=self.assertEqual - ) - - df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - check( - target=df, indexers=((t,n), 'X'), - value=2, compare_fn=self.assertEqual - ) + df = DataFrame( + np.nan, columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=0, + compare_fn=self.assertEqual) + + df = DataFrame( + -999, columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=1, + compare_fn=self.assertEqual) + + df = DataFrame( + columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], + index=index) + check(target=df, indexers=((t, n), 'X'), value=2, + compare_fn=self.assertEqual) # GH 7218, assinging with 0-dim arrays - df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index) - check( - target=df, indexers=((t,n), 'X'), - value=np.array(3), compare_fn=self.assertEqual, - expected=3, - ) + df = DataFrame( + -999, columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], + index=index) + check(target=df, + indexers=((t, n), 'X'), + value=np.array(3), + compare_fn=self.assertEqual, + expected=3, ) # GH5206 df = pd.DataFrame( @@ -812,67 +874,67 @@ def check(target, indexers, value, compare_fn, expected=None): df.ix[row_selection, col_selection] = df['F'] output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C']) assert_frame_equal(df.ix[row_selection, col_selection], output) - check( - target=df, indexers=(row_selection, col_selection), - value=df['F'], compare_fn=assert_frame_equal, - expected=output, - ) + check(target=df, + indexers=(row_selection, col_selection), + value=df['F'], + compare_fn=assert_frame_equal, + expected=output, ) # GH11372 idx = pd.MultiIndex.from_product([ - ['A', 'B', 'C'], - pd.date_range('2015-01-01', '2015-04-01', freq='MS') + ['A', 'B', 'C'], pd.date_range( + '2015-01-01', '2015-04-01', freq='MS') ]) cols = pd.MultiIndex.from_product([ - ['foo', 'bar'], - pd.date_range('2016-01-01', '2016-02-01', freq='MS') + ['foo', 'bar'], pd.date_range( + '2016-01-01', '2016-02-01', freq='MS') ]) - df = pd.DataFrame(np.random.random((12, 4)), index=idx, columns=cols) - subidx = pd.MultiIndex.from_tuples( - [('A', pd.Timestamp('2015-01-01')), ('A', pd.Timestamp('2015-02-01'))] - ) - subcols = pd.MultiIndex.from_tuples( - [('foo', pd.Timestamp('2016-01-01')), ('foo', pd.Timestamp('2016-02-01'))] - ) - vals = pd.DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols) - check( - target=df, indexers=(subidx, subcols), - value=vals, compare_fn=assert_frame_equal, - ) + df = pd.DataFrame( + np.random.random((12, 4)), index=idx, columns=cols) + subidx = pd.MultiIndex.from_tuples([('A', pd.Timestamp( + '2015-01-01')), ('A', pd.Timestamp('2015-02-01'))]) + subcols = pd.MultiIndex.from_tuples([('foo', pd.Timestamp( + '2016-01-01')), ('foo', pd.Timestamp('2016-02-01'))]) + vals = pd.DataFrame( + np.random.random((2, 2)), index=subidx, columns=subcols) + check(target=df, + indexers=(subidx, subcols), + value=vals, + compare_fn=assert_frame_equal, ) # set all columns - vals = pd.DataFrame(np.random.random((2, 4)), index=subidx, columns=cols) - check( - target=df, indexers=(subidx, slice(None, None, None)), - value=vals, compare_fn=assert_frame_equal, - ) + vals = pd.DataFrame( + np.random.random((2, 4)), index=subidx, columns=cols) + check(target=df, + indexers=(subidx, slice(None, None, None)), + value=vals, + compare_fn=assert_frame_equal, ) # identity copy = df.copy() - check( - target=df, indexers=(df.index, df.columns), - value=df, compare_fn=assert_frame_equal, - expected=copy - ) + check(target=df, indexers=(df.index, df.columns), value=df, + compare_fn=assert_frame_equal, expected=copy) def test_indexing_with_datetime_tz(self): # 8260 # support datetime64 with tz - idx = Index(date_range('20130101',periods=3,tz='US/Eastern'), + idx = Index(date_range('20130101', periods=3, tz='US/Eastern'), name='foo') - dr = date_range('20130110',periods=3) - df = DataFrame({'A' : idx, 'B' : dr}) + dr = date_range('20130110', periods=3) + df = DataFrame({'A': idx, 'B': dr}) df['C'] = idx - df.iloc[1,1] = pd.NaT - df.iloc[1,2] = pd.NaT + df.iloc[1, 1] = pd.NaT + df.iloc[1, 2] = pd.NaT # indexing result = df.iloc[1] - expected = Series([Timestamp('2013-01-02 00:00:00-0500', tz='US/Eastern'), np.nan, np.nan], + expected = Series([Timestamp('2013-01-02 00:00:00-0500', + tz='US/Eastern'), np.nan, np.nan], index=list('ABC'), dtype='object', name=1) assert_series_equal(result, expected) result = df.loc[1] - expected = Series([Timestamp('2013-01-02 00:00:00-0500', tz='US/Eastern'), np.nan, np.nan], + expected = Series([Timestamp('2013-01-02 00:00:00-0500', + tz='US/Eastern'), np.nan, np.nan], index=list('ABC'), dtype='object', name=1) assert_series_equal(result, expected) @@ -891,85 +953,91 @@ def test_indexing_with_datetime_tz(self): assert_frame_equal(result, expected) # indexing - setting an element - df = DataFrame( data = pd.to_datetime(['2015-03-30 20:12:32','2015-03-12 00:11:11']) ,columns=['time'] ) - df['new_col']=['new','old'] - df.time=df.set_index('time').index.tz_localize('UTC') - v = df[df.new_col=='new'].set_index('time').index.tz_convert('US/Pacific') + df = DataFrame(data=pd.to_datetime( + ['2015-03-30 20:12:32', '2015-03-12 00:11:11']), columns=['time']) + df['new_col'] = ['new', 'old'] + df.time = df.set_index('time').index.tz_localize('UTC') + v = df[df.new_col == 'new'].set_index('time').index.tz_convert( + 'US/Pacific') # trying to set a single element on a part of a different timezone def f(): - df.loc[df.new_col=='new','time'] = v + df.loc[df.new_col == 'new', 'time'] = v + self.assertRaises(ValueError, f) - v = df.loc[df.new_col=='new','time'] + pd.Timedelta('1s') - df.loc[df.new_col=='new','time'] = v - assert_series_equal(df.loc[df.new_col=='new','time'],v) + v = df.loc[df.new_col == 'new', 'time'] + pd.Timedelta('1s') + df.loc[df.new_col == 'new', 'time'] = v + assert_series_equal(df.loc[df.new_col == 'new', 'time'], v) def test_loc_setitem_dups(self): # GH 6541 - df_orig = DataFrame({'me' : list('rttti'), - 'foo': list('aaade'), - 'bar': np.arange(5,dtype='float64')*1.34+2, - 'bar2': np.arange(5,dtype='float64')*-.34+2}).set_index('me') + df_orig = DataFrame( + {'me': list('rttti'), + 'foo': list('aaade'), + 'bar': np.arange(5, dtype='float64') * 1.34 + 2, + 'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me') - indexer = tuple(['r',['bar','bar2']]) + indexer = tuple(['r', ['bar', 'bar2']]) df = df_orig.copy() - df.loc[indexer]*=2.0 - assert_series_equal(df.loc[indexer],2.0*df_orig.loc[indexer]) + df.loc[indexer] *= 2.0 + assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - indexer = tuple(['r','bar']) + indexer = tuple(['r', 'bar']) df = df_orig.copy() - df.loc[indexer]*=2.0 - self.assertEqual(df.loc[indexer],2.0*df_orig.loc[indexer]) + df.loc[indexer] *= 2.0 + self.assertEqual(df.loc[indexer], 2.0 * df_orig.loc[indexer]) - indexer = tuple(['t',['bar','bar2']]) + indexer = tuple(['t', ['bar', 'bar2']]) df = df_orig.copy() - df.loc[indexer]*=2.0 - assert_frame_equal(df.loc[indexer],2.0*df_orig.loc[indexer]) + df.loc[indexer] *= 2.0 + assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer]) def test_iloc_setitem_dups(self): # GH 6766 # iloc with a mask aligning from another iloc - df1 = DataFrame([{'A':None, 'B':1},{'A':2, 'B':2}]) - df2 = DataFrame([{'A':3, 'B':3},{'A':4, 'B':4}]) + df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}]) + df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}]) df = concat([df1, df2], axis=1) expected = df.fillna(3) expected['A'] = expected['A'].astype('float64') inds = np.isnan(df.iloc[:, 0]) mask = inds[inds].index - df.iloc[mask,0] = df.iloc[mask,2] + df.iloc[mask, 0] = df.iloc[mask, 2] assert_frame_equal(df, expected) # del a dup column across blocks - expected = DataFrame({ 0 : [1,2], 1 : [3,4] }) - expected.columns=['B','B'] + expected = DataFrame({0: [1, 2], 1: [3, 4]}) + expected.columns = ['B', 'B'] del df['A'] assert_frame_equal(df, expected) # assign back to self - df.iloc[[0,1],[0,1]] = df.iloc[[0,1],[0,1]] + df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] assert_frame_equal(df, expected) # reversed x 2 - df.iloc[[1,0],[0,1]] = df.iloc[[1,0],[0,1]].reset_index(drop=True) - df.iloc[[1,0],[0,1]] = df.iloc[[1,0],[0,1]].reset_index(drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) + df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index( + drop=True) assert_frame_equal(df, expected) def test_chained_getitem_with_lists(self): # GH6394 - # Regression in chained getitem indexing with embedded list-like from 0.12 + # Regression in chained getitem indexing with embedded list-like from + # 0.12 def check(result, expected): - tm.assert_numpy_array_equal(result,expected) + tm.assert_numpy_array_equal(result, expected) tm.assertIsInstance(result, np.ndarray) - - df = DataFrame({'A': 5*[np.zeros(3)], 'B':5*[np.ones(3)]}) + df = DataFrame({'A': 5 * [np.zeros(3)], 'B': 5 * [np.ones(3)]}) expected = df['A'].iloc[2] - result = df.loc[2,'A'] + result = df.loc[2, 'A'] check(result, expected) result2 = df.iloc[2]['A'] check(result2, expected) @@ -981,95 +1049,135 @@ def check(result, expected): def test_loc_getitem_int(self): # int label - self.check_result('int label', 'loc', 2, 'ix', 2, typs = ['ints'], axes = 0) - self.check_result('int label', 'loc', 3, 'ix', 3, typs = ['ints'], axes = 1) - self.check_result('int label', 'loc', 4, 'ix', 4, typs = ['ints'], axes = 2) - self.check_result('int label', 'loc', 2, 'ix', 2, typs = ['label'], fails = KeyError) + self.check_result('int label', 'loc', 2, 'ix', 2, typs=['ints'], + axes=0) + self.check_result('int label', 'loc', 3, 'ix', 3, typs=['ints'], + axes=1) + self.check_result('int label', 'loc', 4, 'ix', 4, typs=['ints'], + axes=2) + self.check_result('int label', 'loc', 2, 'ix', 2, typs=['label'], + fails=KeyError) def test_loc_getitem_label(self): # label - self.check_result('label', 'loc', 'c', 'ix', 'c', typs = ['labels'], axes=0) - self.check_result('label', 'loc', 'null', 'ix', 'null', typs = ['mixed'] , axes=0) - self.check_result('label', 'loc', 8, 'ix', 8, typs = ['mixed'] , axes=0) - self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, typs = ['ts'], axes=0) - self.check_result('label', 'loc', 'c', 'ix', 'c', typs = ['empty'], fails = KeyError) + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'], + axes=0) + self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'], + axes=0) + self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0) + self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1, + typs=['ts'], axes=0) + self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'], + fails=KeyError) def test_loc_getitem_label_out_of_range(self): # out of range label - self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['ints','labels','mixed','ts'], fails=KeyError) - self.check_result('label range', 'loc', 'f', 'ix', 'f', typs = ['floats'], fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['ints','labels','mixed'], fails=KeyError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['ts'], axes=0, fails=TypeError) - self.check_result('label range', 'loc', 20, 'ix', 20, typs = ['floats'], axes=0, fails=TypeError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['ints', 'labels', 'mixed', 'ts'], + fails=KeyError) + self.check_result('label range', 'loc', 'f', 'ix', 'f', + typs=['floats'], fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, + typs=['ints', 'labels', 'mixed'], fails=KeyError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'], + axes=0, fails=TypeError) + self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'], + axes=0, fails=TypeError) def test_loc_getitem_label_list(self): # list of labels - self.check_result('list lbl', 'loc', [0,2,4], 'ix', [0,2,4], typs = ['ints'], axes=0) - self.check_result('list lbl', 'loc', [3,6,9], 'ix', [3,6,9], typs = ['ints'], axes=1) - self.check_result('list lbl', 'loc', [4,8,12], 'ix', [4,8,12], typs = ['ints'], axes=2) - self.check_result('list lbl', 'loc', ['a','b','d'], 'ix', ['a','b','d'], typs = ['labels'], axes=0) - self.check_result('list lbl', 'loc', ['A','B','C'], 'ix', ['A','B','C'], typs = ['labels'], axes=1) - self.check_result('list lbl', 'loc', ['Z','Y','W'], 'ix', ['Z','Y','W'], typs = ['labels'], axes=2) - self.check_result('list lbl', 'loc', [2,8,'null'], 'ix', [2,8,'null'], typs = ['mixed'], axes=0) - self.check_result('list lbl', 'loc', [Timestamp('20130102'),Timestamp('20130103')], 'ix', - [Timestamp('20130102'),Timestamp('20130103')], typs = ['ts'], axes=0) - - self.check_result('list lbl', 'loc', [0,1,2], 'indexer', [0,1,2], typs = ['empty'], fails = KeyError) - self.check_result('list lbl', 'loc', [0,2,3], 'ix', [0,2,3], typs = ['ints'], axes=0, fails = KeyError) - self.check_result('list lbl', 'loc', [3,6,7], 'ix', [3,6,7], typs = ['ints'], axes=1, fails = KeyError) - self.check_result('list lbl', 'loc', [4,8,10], 'ix', [4,8,10], typs = ['ints'], axes=2, fails = KeyError) + self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4], + typs=['ints'], axes=0) + self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9], + typs=['ints'], axes=1) + self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12], + typs=['ints'], axes=2) + self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix', + ['a', 'b', 'd'], typs=['labels'], axes=0) + self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix', + ['A', 'B', 'C'], typs=['labels'], axes=1) + self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix', + ['Z', 'Y', 'W'], typs=['labels'], axes=2) + self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix', + [2, 8, 'null'], typs=['mixed'], axes=0) + self.check_result('list lbl', 'loc', + [Timestamp('20130102'), Timestamp('20130103')], 'ix', + [Timestamp('20130102'), Timestamp('20130103')], + typs=['ts'], axes=0) + + self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2], + typs=['empty'], fails=KeyError) + self.check_result('list lbl', 'loc', [0, 2, 3], 'ix', [0, 2, 3], + typs=['ints'], axes=0, fails=KeyError) + self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7], + typs=['ints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10], + typs=['ints'], axes=2, fails=KeyError) # fails - self.check_result('list lbl', 'loc', [20,30,40], 'ix', [20,30,40], typs = ['ints'], axes=1, fails = KeyError) - self.check_result('list lbl', 'loc', [20,30,40], 'ix', [20,30,40], typs = ['ints'], axes=2, fails = KeyError) + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints'], axes=1, fails=KeyError) + self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40], + typs=['ints'], axes=2, fails=KeyError) # array like - self.check_result('array like', 'loc', Series(index=[0,2,4]).index, 'ix', [0,2,4], typs = ['ints'], axes=0) - self.check_result('array like', 'loc', Series(index=[3,6,9]).index, 'ix', [3,6,9], typs = ['ints'], axes=1) - self.check_result('array like', 'loc', Series(index=[4,8,12]).index, 'ix', [4,8,12], typs = ['ints'], axes=2) + self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index, + 'ix', [0, 2, 4], typs=['ints'], axes=0) + self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index, + 'ix', [3, 6, 9], typs=['ints'], axes=1) + self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index, + 'ix', [4, 8, 12], typs=['ints'], axes=2) def test_loc_getitem_bool(self): # boolean indexers - b = [True,False,True,False] - self.check_result('bool', 'loc', b, 'ix', b, typs = ['ints','labels','mixed','ts','floats']) - self.check_result('bool', 'loc', b, 'ix', b, typs = ['empty'], fails = KeyError) + b = [True, False, True, False] + self.check_result('bool', 'loc', b, 'ix', b, + typs=['ints', 'labels', 'mixed', 'ts', 'floats']) + self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], + fails=KeyError) def test_loc_getitem_int_slice(self): # ok - self.check_result('int slice2', 'loc', slice(2,4), 'ix', [2,4], typs = ['ints'], axes = 0) - self.check_result('int slice2', 'loc', slice(3,6), 'ix', [3,6], typs = ['ints'], axes = 1) - self.check_result('int slice2', 'loc', slice(4,8), 'ix', [4,8], typs = ['ints'], axes = 2) + self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4], + typs=['ints'], axes=0) + self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6], + typs=['ints'], axes=1) + self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8], + typs=['ints'], axes=2) # GH 3053 # loc should treat integer slices like label slices from itertools import product - index = MultiIndex.from_tuples([t for t in product([6,7,8], ['a', 'b'])]) + index = MultiIndex.from_tuples([t for t in product( + [6, 7, 8], ['a', 'b'])]) df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[6:8,:] - expected = df.ix[6:8,:] - assert_frame_equal(result,expected) + result = df.loc[6:8, :] + expected = df.ix[6:8, :] + assert_frame_equal(result, expected) - index = MultiIndex.from_tuples([t for t in product([10, 20, 30], ['a', 'b'])]) + index = MultiIndex.from_tuples([t + for t in product( + [10, 20, 30], ['a', 'b'])]) df = DataFrame(np.random.randn(6, 6), index, index) - result = df.loc[20:30,:] - expected = df.ix[20:30,:] - assert_frame_equal(result,expected) + result = df.loc[20:30, :] + expected = df.ix[20:30, :] + assert_frame_equal(result, expected) # doc examples - result = df.loc[10,:] - expected = df.ix[10,:] - assert_frame_equal(result,expected) + result = df.loc[10, :] + expected = df.ix[10, :] + assert_frame_equal(result, expected) - result = df.loc[:,10] - #expected = df.ix[:,10] (this fails) + result = df.loc[:, 10] + # expected = df.ix[:,10] (this fails) expected = df[10] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_loc_to_fail(self): @@ -1079,7 +1187,8 @@ def test_loc_to_fail(self): columns=['e', 'f', 'g']) # raise a KeyError? - self.assertRaises(KeyError, df.loc.__getitem__, tuple([[1, 2], [1, 2]])) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([[1, 2], [1, 2]])) # GH 7496 # loc should not fallback @@ -1088,139 +1197,167 @@ def test_loc_to_fail(self): s.loc[1] = 1 s.loc['a'] = 2 - self.assertRaises(KeyError, lambda : s.loc[-1]) - self.assertRaises(KeyError, lambda : s.loc[[-1, -2]]) + self.assertRaises(KeyError, lambda: s.loc[-1]) + self.assertRaises(KeyError, lambda: s.loc[[-1, -2]]) - self.assertRaises(KeyError, lambda : s.loc[['4']]) + self.assertRaises(KeyError, lambda: s.loc[['4']]) s.loc[-1] = 3 - result = s.loc[[-1,-2]] - expected = Series([3,np.nan],index=[-1,-2]) + result = s.loc[[-1, -2]] + expected = Series([3, np.nan], index=[-1, -2]) assert_series_equal(result, expected) s['a'] = 2 - self.assertRaises(KeyError, lambda : s.loc[[-2]]) + self.assertRaises(KeyError, lambda: s.loc[[-2]]) del s['a'] + def f(): s.loc[[-2]] = 0 + self.assertRaises(KeyError, f) # inconsistency between .loc[values] and .loc[values,:] # GH 7999 - df = DataFrame([['a'],['b']],index=[1,2],columns=['value']) + df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value']) def f(): - df.loc[[3],:] + df.loc[[3], :] + self.assertRaises(KeyError, f) def f(): df.loc[[3]] + self.assertRaises(KeyError, f) # at should not fallback # GH 7814 - s = Series([1,2,3], index=list('abc')) + s = Series([1, 2, 3], index=list('abc')) result = s.at['a'] self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda : s.at[0]) + self.assertRaises(ValueError, lambda: s.at[0]) - df = DataFrame({'A' : [1,2,3]},index=list('abc')) - result = df.at['a','A'] + df = DataFrame({'A': [1, 2, 3]}, index=list('abc')) + result = df.at['a', 'A'] self.assertEqual(result, 1) - self.assertRaises(ValueError, lambda : df.at['a',0]) + self.assertRaises(ValueError, lambda: df.at['a', 0]) - s = Series([1,2,3], index=[3,2,1]) + s = Series([1, 2, 3], index=[3, 2, 1]) result = s.at[1] self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda : s.at['a']) + self.assertRaises(ValueError, lambda: s.at['a']) - df = DataFrame({0 : [1,2,3]},index=[3,2,1]) - result = df.at[1,0] + df = DataFrame({0: [1, 2, 3]}, index=[3, 2, 1]) + result = df.at[1, 0] self.assertEqual(result, 3) - self.assertRaises(ValueError, lambda : df.at['a',0]) + self.assertRaises(ValueError, lambda: df.at['a', 0]) def test_loc_getitem_label_slice(self): # label slices (with ints) - self.check_result('lab slice', 'loc', slice(1,3), 'ix', slice(1,3), typs = ['labels','mixed','empty','ts','floats'], fails=TypeError) + self.check_result('lab slice', 'loc', slice(1, 3), 'ix', slice( + 1, 3), typs=['labels', 'mixed', 'empty', 'ts', 'floats'], + fails=TypeError) # real label slices - self.check_result('lab slice', 'loc', slice('a','c'), 'ix', slice('a','c'), typs = ['labels'], axes=0) - self.check_result('lab slice', 'loc', slice('A','C'), 'ix', slice('A','C'), typs = ['labels'], axes=1) - self.check_result('lab slice', 'loc', slice('W','Z'), 'ix', slice('W','Z'), typs = ['labels'], axes=2) - - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=0) - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=1, fails=TypeError) - self.check_result('ts slice', 'loc', slice('20130102','20130104'), 'ix', slice('20130102','20130104'), typs = ['ts'], axes=2, fails=TypeError) - - self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=0, fails=TypeError) - self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=1, fails=KeyError) - self.check_result('mixed slice', 'loc', slice(2,8), 'ix', slice(2,8), typs = ['mixed'], axes=2, fails=KeyError) - - self.check_result('mixed slice', 'loc', slice(2,4,2), 'ix', slice(2,4,2), typs = ['mixed'], axes=0, fails=TypeError) + self.check_result('lab slice', 'loc', slice('a', 'c'), 'ix', slice( + 'a', 'c'), typs=['labels'], axes=0) + self.check_result('lab slice', 'loc', slice('A', 'C'), 'ix', slice( + 'A', 'C'), typs=['labels'], axes=1) + self.check_result('lab slice', 'loc', slice('W', 'Z'), 'ix', slice( + 'W', 'Z'), typs=['labels'], axes=2) + + self.check_result('ts slice', 'loc', slice( + '20130102', '20130104'), 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=0) + self.check_result('ts slice', 'loc', slice( + '20130102', '20130104'), 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=1, fails=TypeError) + self.check_result('ts slice', 'loc', slice( + '20130102', '20130104'), 'ix', slice('20130102', '20130104'), + typs=['ts'], axes=2, fails=TypeError) + + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=0, fails=TypeError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=1, fails=KeyError) + self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8), + typs=['mixed'], axes=2, fails=KeyError) + + self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice( + 2, 4, 2), typs=['mixed'], axes=0, fails=TypeError) def test_loc_general(self): - df = DataFrame(np.random.rand(4,4),columns=['A','B','C','D'], index=['A','B','C','D']) + df = DataFrame( + np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'], + index=['A', 'B', 'C', 'D']) # want this to work - result = df.loc[:,"A":"B"].iloc[0:2,:] - self.assertTrue((result.columns == ['A','B']).all() == True) - self.assertTrue((result.index == ['A','B']).all() == True) + result = df.loc[:, "A":"B"].iloc[0:2, :] + self.assertTrue((result.columns == ['A', 'B']).all()) + self.assertTrue((result.index == ['A', 'B']).all()) # mixed type - result = DataFrame({ 'a' : [Timestamp('20130101')], 'b' : [1] }).iloc[0] - expected = Series([ Timestamp('20130101'), 1], index=['a','b'], name=0) + result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0] + expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0) assert_series_equal(result, expected) self.assertEqual(result.dtype, object) def test_loc_setitem_consistency(self): - # GH 6149 # coerce similary for setitem and loc when rows have a null-slice - expected = DataFrame({ 'date': Series(0,index=range(5),dtype=np.int64), - 'val' : Series(range(5),dtype=np.int64) }) - - df = DataFrame({ 'date': date_range('2000-01-01','2000-01-5'), - 'val' : Series(range(5),dtype=np.int64) }) - df.loc[:,'date'] = 0 - assert_frame_equal(df,expected) - - df = DataFrame({ 'date': date_range('2000-01-01','2000-01-5'), - 'val' : Series(range(5),dtype=np.int64) }) - df.loc[:,'date'] = np.array(0,dtype=np.int64) - assert_frame_equal(df,expected) - - df = DataFrame({ 'date': date_range('2000-01-01','2000-01-5'), - 'val' : Series(range(5),dtype=np.int64) }) - df.loc[:,'date'] = np.array([0,0,0,0,0],dtype=np.int64) - assert_frame_equal(df,expected) - - expected = DataFrame({ 'date': Series('foo',index=range(5)), - 'val' : Series(range(5),dtype=np.int64) }) - df = DataFrame({ 'date': date_range('2000-01-01','2000-01-5'), - 'val' : Series(range(5),dtype=np.int64) }) - df.loc[:,'date'] = 'foo' - assert_frame_equal(df,expected) - - expected = DataFrame({ 'date': Series(1.0,index=range(5)), - 'val' : Series(range(5),dtype=np.int64) }) - df = DataFrame({ 'date': date_range('2000-01-01','2000-01-5'), - 'val' : Series(range(5),dtype=np.int64) }) - df.loc[:,'date'] = 1.0 - assert_frame_equal(df,expected) + expected = DataFrame({'date': Series(0, index=range(5), + dtype=np.int64), + 'val': Series(range(5), dtype=np.int64)}) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 0 + assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array(0, dtype=np.int64) + assert_frame_equal(df, expected) + + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64) + assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series('foo', index=range(5)), + 'val': Series( + range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 'foo' + assert_frame_equal(df, expected) + + expected = DataFrame({'date': Series(1.0, index=range(5)), + 'val': Series( + range(5), dtype=np.int64)}) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + df.loc[:, 'date'] = 1.0 + assert_frame_equal(df, expected) # empty (essentially noops) expected = DataFrame(columns=['x', 'y']) expected['x'] = expected['x'].astype(np.int64) df = DataFrame(columns=['x', 'y']) df.loc[:, 'x'] = 1 - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) df = DataFrame(columns=['x', 'y']) df['x'] = 1 - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) # .loc[:,column] setting with slice == len of the column # GH10408 @@ -1232,109 +1369,122 @@ def test_loc_setitem_consistency(self): Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes, Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No""" - df = pd.read_csv(StringIO(data),header=[0,1], index_col=[0,1,2]) - df.loc[:,('Respondent','StartDate')] = pd.to_datetime(df.loc[:,('Respondent','StartDate')]) - df.loc[:,('Respondent','EndDate')] = pd.to_datetime(df.loc[:,('Respondent','EndDate')]) - df.loc[:,('Respondent','Duration')] = df.loc[:,('Respondent','EndDate')] - df.loc[:,('Respondent','StartDate')] + df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2]) + df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'StartDate')]) + df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, ( + 'Respondent', 'EndDate')]) + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')] - df.loc[:,('Respondent','Duration')] = df.loc[:,('Respondent','Duration')].astype('timedelta64[s]') - expected = Series([1380,720,840,2160.],index=df.index,name=('Respondent','Duration')) - assert_series_equal(df[('Respondent','Duration')],expected) + df.loc[:, ('Respondent', 'Duration')] = df.loc[:, ( + 'Respondent', 'Duration')].astype('timedelta64[s]') + expected = Series([1380, 720, 840, 2160.], index=df.index, + name=('Respondent', 'Duration')) + assert_series_equal(df[('Respondent', 'Duration')], expected) def test_loc_setitem_frame(self): df = self.frame_labels - result = df.iloc[0,0] + result = df.iloc[0, 0] - df.loc['a','A'] = 1 - result = df.loc['a','A'] + df.loc['a', 'A'] = 1 + result = df.loc['a', 'A'] self.assertEqual(result, 1) - result = df.iloc[0,0] + result = df.iloc[0, 0] self.assertEqual(result, 1) - df.loc[:,'B':'D'] = 0 - expected = df.loc[:,'B':'D'] - result = df.ix[:,1:] + df.loc[:, 'B':'D'] = 0 + expected = df.loc[:, 'B':'D'] + result = df.ix[:, 1:] assert_frame_equal(result, expected) # GH 6254 # setting issue df = DataFrame(index=[3, 5, 4], columns=['A']) - df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3],dtype='int64') - expected = DataFrame(dict(A = Series([1,2,3],index=[4, 3, 5]))).reindex(index=[3,5,4]) + df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64') + expected = DataFrame(dict(A=Series( + [1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4]) assert_frame_equal(df, expected) # GH 6252 # setting with an empty frame keys1 = ['@' + str(i) for i in range(5)] - val1 = np.arange(5,dtype='int64') + val1 = np.arange(5, dtype='int64') keys2 = ['@' + str(i) for i in range(4)] - val2 = np.arange(4,dtype='int64') + val2 = np.arange(4, dtype='int64') index = list(set(keys1).union(keys2)) - df = DataFrame(index = index) + df = DataFrame(index=index) df['A'] = nan df.loc[keys1, 'A'] = val1 df['B'] = nan df.loc[keys2, 'B'] = val2 - expected = DataFrame(dict(A = Series(val1,index=keys1), B = Series(val2,index=keys2))).reindex(index=index) + expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series( + val2, index=keys2))).reindex(index=index) assert_frame_equal(df, expected) # GH 8669 # invalid coercion of nan -> int - df = DataFrame({'A' : [1,2,3], 'B' : np.nan }) + df = DataFrame({'A': [1, 2, 3], 'B': np.nan}) df.loc[df.B > df.A, 'B'] = df.A - expected = DataFrame({'A' : [1,2,3], 'B' : np.nan}) + expected = DataFrame({'A': [1, 2, 3], 'B': np.nan}) assert_frame_equal(df, expected) # GH 6546 # setting with mixed labels - df = DataFrame({1:[1,2],2:[3,4],'a':['a','b']}) + df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']}) - result = df.loc[0, [1,2]] - expected = Series([1,3],index=[1,2],dtype=object, name=0) + result = df.loc[0, [1, 2]] + expected = Series([1, 3], index=[1, 2], dtype=object, name=0) assert_series_equal(result, expected) - expected = DataFrame({1:[5,2],2:[6,4],'a':['a','b']}) - df.loc[0, [1,2]] = [5,6] + expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']}) + df.loc[0, [1, 2]] = [5, 6] assert_frame_equal(df, expected) def test_loc_setitem_frame_multiples(self): # multiple setting - df = DataFrame({ 'A' : ['foo','bar','baz'], - 'B' : Series(range(3),dtype=np.int64) }) + df = DataFrame({'A': ['foo', 'bar', 'baz'], + 'B': Series( + range(3), dtype=np.int64)}) rhs = df.loc[1:2] rhs.index = df.index[0:2] df.loc[0:1] = rhs - expected = DataFrame({ 'A' : ['bar','baz','baz'], - 'B' : Series([1,2,2],dtype=np.int64) }) + expected = DataFrame({'A': ['bar', 'baz', 'baz'], + 'B': Series( + [1, 2, 2], dtype=np.int64)}) assert_frame_equal(df, expected) - # multiple setting with frame on rhs (with M8) - df = DataFrame({ 'date' : date_range('2000-01-01','2000-01-5'), - 'val' : Series(range(5),dtype=np.int64) }) - expected = DataFrame({ 'date' : [Timestamp('20000101'),Timestamp('20000102'),Timestamp('20000101'), - Timestamp('20000102'),Timestamp('20000103')], - 'val' : Series([0,1,0,1,2],dtype=np.int64) }) + df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'), + 'val': Series( + range(5), dtype=np.int64)}) + expected = DataFrame({'date': [Timestamp('20000101'), Timestamp( + '20000102'), Timestamp('20000101'), Timestamp('20000102'), + Timestamp('20000103')], + 'val': Series( + [0, 1, 0, 1, 2], dtype=np.int64)}) rhs = df.loc[0:2] rhs.index = df.index[2:5] df.loc[2:4] = rhs assert_frame_equal(df, expected) def test_iloc_getitem_frame(self): - df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), columns=lrange(0,8,2)) + df = DataFrame( + np.random.randn( + 10, 4), index=lrange(0, 20, 2), columns=lrange(0, 8, 2)) result = df.iloc[2] exp = df.ix[4] assert_series_equal(result, exp) - result = df.iloc[2,2] - exp = df.ix[4,4] + result = df.iloc[2, 2] + exp = df.ix[4, 4] self.assertEqual(result, exp) # slice @@ -1342,134 +1492,142 @@ def test_iloc_getitem_frame(self): expected = df.ix[8:14] assert_frame_equal(result, expected) - result = df.iloc[:,2:3] - expected = df.ix[:,4:5] + result = df.iloc[:, 2:3] + expected = df.ix[:, 4:5] assert_frame_equal(result, expected) # list of integers - result = df.iloc[[0,1,3]] - expected = df.ix[[0,2,6]] + result = df.iloc[[0, 1, 3]] + expected = df.ix[[0, 2, 6]] assert_frame_equal(result, expected) - result = df.iloc[[0,1,3],[0,1]] - expected = df.ix[[0,2,6],[0,2]] + result = df.iloc[[0, 1, 3], [0, 1]] + expected = df.ix[[0, 2, 6], [0, 2]] assert_frame_equal(result, expected) # neg indicies - result = df.iloc[[-1,1,3],[-1,1]] - expected = df.ix[[18,2,6],[6,2]] + result = df.iloc[[-1, 1, 3], [-1, 1]] + expected = df.ix[[18, 2, 6], [6, 2]] assert_frame_equal(result, expected) # dups indicies - result = df.iloc[[-1,-1,1,3],[-1,1]] - expected = df.ix[[18,18,2,6],[6,2]] + result = df.iloc[[-1, -1, 1, 3], [-1, 1]] + expected = df.ix[[18, 18, 2, 6], [6, 2]] assert_frame_equal(result, expected) # with index-like - s = Series(index=lrange(1,5)) + s = Series(index=lrange(1, 5)) result = df.iloc[s.index] - expected = df.ix[[2,4,6,8]] + expected = df.ix[[2, 4, 6, 8]] assert_frame_equal(result, expected) # try with labelled frame - df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), columns=list('ABCD')) + df = DataFrame( + np.random.randn(10, + 4), index=list('abcdefghij'), columns=list('ABCD')) - result = df.iloc[1,1] - exp = df.ix['b','B'] + result = df.iloc[1, 1] + exp = df.ix['b', 'B'] self.assertEqual(result, exp) - result = df.iloc[:,2:3] - expected = df.ix[:,['C']] + result = df.iloc[:, 2:3] + expected = df.ix[:, ['C']] assert_frame_equal(result, expected) # negative indexing - result = df.iloc[-1,-1] - exp = df.ix['j','D'] + result = df.iloc[-1, -1] + exp = df.ix['j', 'D'] self.assertEqual(result, exp) # out-of-bounds exception - self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10,5])) + self.assertRaises(IndexError, df.iloc.__getitem__, tuple([10, 5])) # trying to use a label - self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j','D'])) + self.assertRaises(ValueError, df.iloc.__getitem__, tuple(['j', 'D'])) def test_iloc_getitem_panel(self): # GH 7189 - p = Panel(np.arange(4*3*2).reshape(4,3,2), - items=['A','B','C','D'], - major_axis=['a','b','c'], - minor_axis=['one','two']) + p = Panel(np.arange(4 * 3 * 2).reshape(4, 3, 2), + items=['A', 'B', 'C', 'D'], + major_axis=['a', 'b', 'c'], + minor_axis=['one', 'two']) result = p.iloc[1] expected = p.loc['B'] assert_frame_equal(result, expected) - result = p.iloc[1,1] - expected = p.loc['B','b'] + result = p.iloc[1, 1] + expected = p.loc['B', 'b'] assert_series_equal(result, expected) - result = p.iloc[1,1,1] - expected = p.loc['B','b','two'] - self.assertEqual(result,expected) + result = p.iloc[1, 1, 1] + expected = p.loc['B', 'b', 'two'] + self.assertEqual(result, expected) # slice result = p.iloc[1:3] - expected = p.loc[['B','C']] + expected = p.loc[['B', 'C']] assert_panel_equal(result, expected) - result = p.iloc[:,0:2] - expected = p.loc[:,['a','b']] + result = p.iloc[:, 0:2] + expected = p.loc[:, ['a', 'b']] assert_panel_equal(result, expected) # list of integers - result = p.iloc[[0,2]] - expected = p.loc[['A','C']] + result = p.iloc[[0, 2]] + expected = p.loc[['A', 'C']] assert_panel_equal(result, expected) # neg indicies - result = p.iloc[[-1,1],[-1,1]] - expected = p.loc[['D','B'],['c','b']] + result = p.iloc[[-1, 1], [-1, 1]] + expected = p.loc[['D', 'B'], ['c', 'b']] assert_panel_equal(result, expected) # dups indicies - result = p.iloc[[-1,-1,1],[-1,1]] - expected = p.loc[['D','D','B'],['c','b']] + result = p.iloc[[-1, -1, 1], [-1, 1]] + expected = p.loc[['D', 'D', 'B'], ['c', 'b']] assert_panel_equal(result, expected) # combined - result = p.iloc[0,[True,True],[0,1]] - expected = p.loc['A',['a','b'],['one','two']] + result = p.iloc[0, [True, True], [0, 1]] + expected = p.loc['A', ['a', 'b'], ['one', 'two']] assert_frame_equal(result, expected) # out-of-bounds exception - self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10,5])) + self.assertRaises(IndexError, p.iloc.__getitem__, tuple([10, 5])) + def f(): - p.iloc[0,[True,True],[0,1,2]] + p.iloc[0, [True, True], [0, 1, 2]] + self.assertRaises(IndexError, f) # trying to use a label - self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j','D'])) + self.assertRaises(ValueError, p.iloc.__getitem__, tuple(['j', 'D'])) # GH - p = Panel(np.random.rand(4,3,2), items=['A','B','C','D'], major_axis=['U','V','W'], minor_axis=['X','Y']) + p = Panel( + np.random.rand(4, 3, 2), items=['A', 'B', 'C', 'D'], + major_axis=['U', 'V', 'W'], minor_axis=['X', 'Y']) expected = p['A'] - result = p.iloc[0,:,:] + result = p.iloc[0, :, :] assert_frame_equal(result, expected) - result = p.iloc[0,[True,True,True],:] + result = p.iloc[0, [True, True, True], :] assert_frame_equal(result, expected) - result = p.iloc[0,[True,True,True],[0,1]] + result = p.iloc[0, [True, True, True], [0, 1]] assert_frame_equal(result, expected) def f(): - p.iloc[0,[True,True,True],[0,1,2]] + p.iloc[0, [True, True, True], [0, 1, 2]] + self.assertRaises(IndexError, f) def f(): - p.iloc[0,[True,True,True],[2]] + p.iloc[0, [True, True, True], [2]] + self.assertRaises(IndexError, f) # GH 7199 @@ -1480,124 +1638,134 @@ def f(): names=['UPPER', 'lower']) simple_index = [x[0] for x in multi_index] - wd1 = Panel(items=['First', 'Second'], - major_axis=['a', 'b', 'c', 'd'], + wd1 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], minor_axis=multi_index) - wd2 = Panel(items=['First', 'Second'], - major_axis=['a', 'b', 'c', 'd'], + wd2 = Panel(items=['First', 'Second'], major_axis=['a', 'b', 'c', 'd'], minor_axis=simple_index) expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]] result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG - assert_frame_equal(result1,expected1) + assert_frame_equal(result1, expected1) expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]] result2 = wd2.iloc[0, [True, True, True, False], [0, 2]] - assert_frame_equal(result2,expected2) + assert_frame_equal(result2, expected2) - expected1 = DataFrame(index=['a'],columns=multi_index,dtype='float64') - result1 = wd1.iloc[0,[0],[0,1,2]] - assert_frame_equal(result1,expected1) + expected1 = DataFrame(index=['a'], columns=multi_index, + dtype='float64') + result1 = wd1.iloc[0, [0], [0, 1, 2]] + assert_frame_equal(result1, expected1) - expected2 = DataFrame(index=['a'],columns=simple_index,dtype='float64') - result2 = wd2.iloc[0,[0],[0,1,2]] - assert_frame_equal(result2,expected2) + expected2 = DataFrame(index=['a'], columns=simple_index, + dtype='float64') + result2 = wd2.iloc[0, [0], [0, 1, 2]] + assert_frame_equal(result2, expected2) # GH 7516 - mi = MultiIndex.from_tuples([(0,'x'), (1,'y'), (2,'z')]) - p = Panel(np.arange(3*3*3,dtype='int64').reshape(3,3,3), items=['a','b','c'], major_axis=mi, minor_axis=['u','v','w']) + mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')]) + p = Panel( + np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3), + items=['a', 'b', 'c'], major_axis=mi, minor_axis=['u', 'v', 'w']) result = p.iloc[:, 1, 0] - expected = Series([3,12,21],index=['a','b','c'], name='u') - assert_series_equal(result,expected) + expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u') + assert_series_equal(result, expected) - result = p.loc[:, (1,'y'), 'u'] - assert_series_equal(result,expected) + result = p.loc[:, (1, 'y'), 'u'] + assert_series_equal(result, expected) def test_iloc_getitem_doc_issue(self): # multi axis slicing issue with single block # surfaced in GH 6059 - arr = np.random.randn(6,4) - index = date_range('20130101',periods=6) + arr = np.random.randn(6, 4) + index = date_range('20130101', periods=6) columns = list('ABCD') - df = DataFrame(arr,index=index,columns=columns) + df = DataFrame(arr, index=index, columns=columns) # defines ref_locs df.describe() - result = df.iloc[3:5,0:2] + result = df.iloc[3:5, 0:2] str(result) result.dtypes - expected = DataFrame(arr[3:5,0:2],index=index[3:5],columns=columns[0:2]) - assert_frame_equal(result,expected) + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=columns[0:2]) + assert_frame_equal(result, expected) # for dups df.columns = list('aaaa') - result = df.iloc[3:5,0:2] + result = df.iloc[3:5, 0:2] str(result) result.dtypes - expected = DataFrame(arr[3:5,0:2],index=index[3:5],columns=list('aa')) - assert_frame_equal(result,expected) + expected = DataFrame(arr[3:5, 0:2], index=index[3:5], + columns=list('aa')) + assert_frame_equal(result, expected) # related - arr = np.random.randn(6,4) - index = list(range(0,12,2)) - columns = list(range(0,8,2)) - df = DataFrame(arr,index=index,columns=columns) + arr = np.random.randn(6, 4) + index = list(range(0, 12, 2)) + columns = list(range(0, 8, 2)) + df = DataFrame(arr, index=index, columns=columns) df._data.blocks[0].mgr_locs - result = df.iloc[1:5,2:4] + result = df.iloc[1:5, 2:4] str(result) result.dtypes - expected = DataFrame(arr[1:5,2:4],index=index[1:5],columns=columns[2:4]) - assert_frame_equal(result,expected) + expected = DataFrame(arr[1:5, 2:4], index=index[1:5], + columns=columns[2:4]) + assert_frame_equal(result, expected) def test_setitem_ndarray_1d(self): # GH5508 # len of indexer vs length of the 1d ndarray - df = DataFrame(index=Index(lrange(1,11))) + df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) # invalid def f(): - df.ix[2:5, 'bar'] = np.array([2.33j, 1.23+0.1j, 2.2]) + df.ix[2:5, 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2]) + self.assertRaises(ValueError, f) # valid - df.ix[2:5, 'bar'] = np.array([2.33j, 1.23+0.1j, 2.2, 1.0]) + df.ix[2:5, 'bar'] = np.array([2.33j, 1.23 + 0.1j, 2.2, 1.0]) result = df.ix[2:5, 'bar'] - expected = Series([2.33j, 1.23+0.1j, 2.2, 1.0], index=[2,3,4,5], name='bar') + expected = Series([2.33j, 1.23 + 0.1j, 2.2, 1.0], index=[2, 3, 4, 5], + name='bar') assert_series_equal(result, expected) # dtype getting changed? - df = DataFrame(index=Index(lrange(1,11))) + df = DataFrame(index=Index(lrange(1, 11))) df['foo'] = np.zeros(10, dtype=np.float64) df['bar'] = np.zeros(10, dtype=np.complex) def f(): - df[2:5] = np.arange(1,4)*1j + df[2:5] = np.arange(1, 4) * 1j + self.assertRaises(ValueError, f) def test_iloc_setitem_series(self): - df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), columns=list('ABCD')) + df = DataFrame( + np.random.randn(10, + 4), index=list('abcdefghij'), columns=list('ABCD')) - df.iloc[1,1] = 1 - result = df.iloc[1,1] + df.iloc[1, 1] = 1 + result = df.iloc[1, 1] self.assertEqual(result, 1) - df.iloc[:,2:3] = 0 - expected = df.iloc[:,2:3] - result = df.iloc[:,2:3] + df.iloc[:, 2:3] = 0 + expected = df.iloc[:, 2:3] + result = df.iloc[:, 2:3] assert_frame_equal(result, expected) - s = Series(np.random.randn(10), index=lrange(0,20,2)) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) s.iloc[1] = 1 result = s.iloc[1] @@ -1608,36 +1776,39 @@ def test_iloc_setitem_series(self): result = s.iloc[:4] assert_series_equal(result, expected) - s= Series([-1]*6) - s.iloc[0::2]= [0,2,4] - s.iloc[1::2]= [1,3,5] - result = s - expected= Series([0,1,2,3,4,5]) + s = Series([-1] * 6) + s.iloc[0::2] = [0, 2, 4] + s.iloc[1::2] = [1, 3, 5] + result = s + expected = Series([0, 1, 2, 3, 4, 5]) assert_series_equal(result, expected) def test_iloc_setitem_list_of_lists(self): # GH 7551 # list-of-list is set incorrectly in mixed vs. single dtyped frames - df = DataFrame(dict(A = np.arange(5,dtype='int64'), B = np.arange(5,10,dtype='int64'))) - df.iloc[2:4] = [[10,11],[12,13]] - expected = DataFrame(dict(A = [0,1,10,12,4], B = [5,6,11,13,9])) + df = DataFrame(dict(A=np.arange(5, dtype='int64'), B=np.arange( + 5, 10, dtype='int64'))) + df.iloc[2:4] = [[10, 11], [12, 13]] + expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9])) assert_frame_equal(df, expected) - df = DataFrame(dict(A = list('abcde'), B = np.arange(5,10,dtype='int64'))) - df.iloc[2:4] = [['x',11],['y',13]] - expected = DataFrame(dict(A = ['a','b','x','y','e'], B = [5,6,11,13,9])) + df = DataFrame( + dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64'))) + df.iloc[2:4] = [['x', 11], ['y', 13]] + expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'], B=[5, 6, 11, 13, + 9])) assert_frame_equal(df, expected) def test_iloc_getitem_multiindex(self): mi_labels = DataFrame(np.random.randn(4, 3), columns=[['i', 'i', 'j'], ['A', 'A', 'B']], - index=[['i', 'i', 'j', 'k'], ['X', 'X', 'Y','Y']]) - - mi_int = DataFrame(np.random.randn(3, 3), - columns=[[2,2,4],[6,8,10]], - index=[[4,4,8],[8,10,12]]) + index=[['i', 'i', 'j', 'k'], + ['X', 'X', 'Y', 'Y']]) + mi_int = DataFrame(np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) # the first row rs = mi_int.iloc[0] @@ -1647,18 +1818,18 @@ def test_iloc_getitem_multiindex(self): self.assertEqual(xp.name, 8) # 2nd (last) columns - rs = mi_int.iloc[:,2] - xp = mi_int.ix[:,2] + rs = mi_int.iloc[:, 2] + xp = mi_int.ix[:, 2] assert_series_equal(rs, xp) # corner column - rs = mi_int.iloc[2,2] - xp = mi_int.ix[:,2].ix[2] + rs = mi_int.iloc[2, 2] + xp = mi_int.ix[:, 2].ix[2] self.assertEqual(rs, xp) # this is basically regular indexing - rs = mi_labels.iloc[2,2] - xp = mi_labels.ix['j'].ix[:,'j'].ix[0,0] + rs = mi_labels.iloc[2, 2] + xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0] self.assertEqual(rs, xp) def test_loc_multiindex(self): @@ -1667,9 +1838,9 @@ def test_loc_multiindex(self): ['A', 'A', 'B']], index=[['i', 'i', 'j'], ['X', 'X', 'Y']]) - mi_int = DataFrame(np.random.randn(3, 3), - columns=[[2,2,4],[6,8,10]], - index=[[4,4,8],[8,10,12]]) + mi_int = DataFrame(np.random.randn(3, 3), + columns=[[2, 2, 4], [6, 8, 10]], + index=[[4, 4, 8], [8, 10, 12]]) # the first row rs = mi_labels.loc['i'] @@ -1677,30 +1848,30 @@ def test_loc_multiindex(self): assert_frame_equal(rs, xp) # 2nd (last) columns - rs = mi_labels.loc[:,'j'] - xp = mi_labels.ix[:,'j'] + rs = mi_labels.loc[:, 'j'] + xp = mi_labels.ix[:, 'j'] assert_frame_equal(rs, xp) # corner column - rs = mi_labels.loc['j'].loc[:,'j'] - xp = mi_labels.ix['j'].ix[:,'j'] - assert_frame_equal(rs,xp) + rs = mi_labels.loc['j'].loc[:, 'j'] + xp = mi_labels.ix['j'].ix[:, 'j'] + assert_frame_equal(rs, xp) # with a tuple - rs = mi_labels.loc[('i','X')] - xp = mi_labels.ix[('i','X')] - assert_frame_equal(rs,xp) + rs = mi_labels.loc[('i', 'X')] + xp = mi_labels.ix[('i', 'X')] + assert_frame_equal(rs, xp) rs = mi_int.loc[4] xp = mi_int.ix[4] - assert_frame_equal(rs,xp) + assert_frame_equal(rs, xp) # GH6788 # multi-index indexer is None (meaning take all) attributes = ['Attribute' + str(i) for i in range(1)] attribute_values = ['Value' + str(i) for i in range(5)] - index = MultiIndex.from_product([attributes,attribute_values]) + index = MultiIndex.from_product([attributes, attribute_values]) df = 0.1 * np.random.randn(10, 1 * 5) + 0.5 df = DataFrame(df, columns=index) result = df[attributes] @@ -1708,15 +1879,19 @@ def test_loc_multiindex(self): # GH 7349 # loc with a multi-index seems to be doing fallback - df = DataFrame(np.arange(12).reshape(-1,1),index=pd.MultiIndex.from_product([[1,2,3,4],[1,2,3]])) + df = DataFrame( + np.arange(12).reshape(-1, 1), + index=pd.MultiIndex.from_product([[1, 2, 3, 4], [1, 2, 3]])) - expected = df.loc[([1,2],),:] - result = df.loc[[1,2]] + expected = df.loc[([1, 2], ), :] + result = df.loc[[1, 2]] assert_frame_equal(result, expected) # GH 7399 # incomplete indexers - s = pd.Series(np.arange(15,dtype='int64'),MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + s = pd.Series( + np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) expected = s.loc[:, 'a':'c'] result = s.loc[0:4, 'a':'c'] @@ -1733,8 +1908,10 @@ def test_loc_multiindex(self): # GH 7400 # multiindexer gettitem with list of indexers skips wrong element - s = pd.Series(np.arange(15,dtype='int64'),MultiIndex.from_product([range(5), ['a', 'b', 'c']])) - expected = s.iloc[[6,7,8,12,13,14]] + s = pd.Series( + np.arange(15, dtype='int64'), + MultiIndex.from_product([range(5), ['a', 'b', 'c']])) + expected = s.iloc[[6, 7, 8, 12, 13, 14]] result = s.loc[2:4:2, 'a':'c'] assert_series_equal(result, expected) @@ -1743,16 +1920,17 @@ def test_multiindex_perf_warn(self): if sys.version_info < (2, 7): raise nose.SkipTest('python version < 2.7') - df = DataFrame({'jim':[0, 0, 1, 1], - 'joe':['x', 'x', 'z', 'y'], - 'jolie':np.random.rand(4)}).set_index(['jim', 'joe']) + df = DataFrame({'jim': [0, 0, 1, 1], + 'joe': ['x', 'x', 'z', 'y'], + 'jolie': np.random.rand(4)}).set_index(['jim', 'joe']) - with tm.assert_produces_warning(PerformanceWarning, clear=[pd.core.index]): - _ = df.loc[(1, 'z')] + with tm.assert_produces_warning(PerformanceWarning, + clear=[pd.core.index]): + df.loc[(1, 'z')] - df = df.iloc[[2,1,3,0]] + df = df.iloc[[2, 1, 3, 0]] with tm.assert_produces_warning(PerformanceWarning): - _ = df.loc[(0,)] + df.loc[(0, )] @slow def test_multiindex_get_loc(self): # GH7724, GH2646 @@ -1771,25 +1949,26 @@ def validate(mi, df, key): mask &= df.iloc[:, i] == k if not mask.any(): - self.assertNotIn(key[:i+1], mi.index) + self.assertNotIn(key[:i + 1], mi.index) continue - self.assertIn(key[:i+1], mi.index) + self.assertIn(key[:i + 1], mi.index) right = df[mask].copy() if i + 1 != len(key): # partial key - right.drop(cols[:i+1], axis=1, inplace=True) - right.set_index(cols[i+1:-1], inplace=True) - assert_frame_equal(mi.loc[key[:i+1]], right) + right.drop(cols[:i + 1], axis=1, inplace=True) + right.set_index(cols[i + 1:-1], inplace=True) + assert_frame_equal(mi.loc[key[:i + 1]], right) else: # full key right.set_index(cols[:-1], inplace=True) if len(right) == 1: # single hit right = Series(right['jolia'].values, - name=right.index[0], index=['jolia']) - assert_series_equal(mi.loc[key[:i+1]], right) + name=right.index[0], + index=['jolia']) + assert_series_equal(mi.loc[key[:i + 1]], right) else: # multi hit - assert_frame_equal(mi.loc[key[:i+1]], right) + assert_frame_equal(mi.loc[key[:i + 1]], right) def loop(mi, df, keys): for key in keys: @@ -1797,17 +1976,19 @@ def loop(mi, df, keys): n, m = 1000, 50 - vals = [randint(0, 10, n), choice(list('abcdefghij'), n), - choice(pd.date_range('20141009', periods=10).tolist(), n), - choice(list('ZYXWVUTSRQ'), n), randn(n)] + vals = [randint(0, 10, n), choice( + list('abcdefghij'), n), choice( + pd.date_range('20141009', periods=10).tolist(), n), choice( + list('ZYXWVUTSRQ'), n), randn(n)] vals = list(map(tuple, zip(*vals))) # bunch of keys for testing - keys = [randint(0, 11, m), choice(list('abcdefghijk'), m), - choice(pd.date_range('20141009', periods=11).tolist(), m), - choice(list('ZYXWVUTSRQP'), m)] + keys = [randint(0, 11, m), choice( + list('abcdefghijk'), m), choice( + pd.date_range('20141009', periods=11).tolist(), m), choice( + list('ZYXWVUTSRQP'), m)] keys = list(map(tuple, zip(*keys))) - keys += list(map(lambda t: t[:-1], vals[::n//m])) + keys += list(map(lambda t: t[:-1], vals[::n // m])) # covers both unique index and non-unique index df = pd.DataFrame(vals, columns=cols) @@ -1815,7 +1996,8 @@ def loop(mi, df, keys): for frame in a, b: for i in range(5): # lexsort depth - df = frame.copy() if i == 0 else frame.sort_values(by=cols[:i]) + df = frame.copy() if i == 0 else frame.sort_values( + by=cols[:i]) mi = df.set_index(cols[:-1]) assert not mi.index.lexsort_depth < i loop(mi, df, keys) @@ -1825,37 +2007,36 @@ def test_series_getitem_multiindex(self): # GH 6018 # series regression getitem with a multi-index - s = Series([1,2,3]) - s.index = MultiIndex.from_tuples([(0,0),(1,1), (2,1)]) + s = Series([1, 2, 3]) + s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) - result = s[:,0] - expected = Series([1],index=[0]) - assert_series_equal(result,expected) + result = s[:, 0] + expected = Series([1], index=[0]) + assert_series_equal(result, expected) - result = s.ix[:,1] - expected = Series([2,3],index=[1,2]) - assert_series_equal(result,expected) + result = s.ix[:, 1] + expected = Series([2, 3], index=[1, 2]) + assert_series_equal(result, expected) # xs - result = s.xs(0,level=0) - expected = Series([1],index=[0]) - assert_series_equal(result,expected) + result = s.xs(0, level=0) + expected = Series([1], index=[0]) + assert_series_equal(result, expected) - result = s.xs(1,level=1) - expected = Series([2,3],index=[1,2]) - assert_series_equal(result,expected) + result = s.xs(1, level=1) + expected = Series([2, 3], index=[1, 2]) + assert_series_equal(result, expected) # GH6258 - s = Series([1,3,4,1,3,4], - index=MultiIndex.from_product([list('AB'), - list(date_range('20130903',periods=3))])) - result = s.xs('20130903',level=1) - expected = Series([1,1],index=list('AB')) - assert_series_equal(result,expected) + s = Series([1, 3, 4, 1, 3, 4], index=MultiIndex.from_product([list( + 'AB'), list(date_range('20130903', periods=3))])) + result = s.xs('20130903', level=1) + expected = Series([1, 1], index=list('AB')) + assert_series_equal(result, expected) # GH5684 - idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), - ('b', 'one'), ('b', 'two')]) + idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), + ('b', 'two')]) s = Series([1, 2, 3, 4], index=idx) s.index.set_names(['L1', 'L2'], inplace=True) result = s.xs('one', level='L2') @@ -1868,9 +2049,21 @@ def test_ix_general(self): # ix general issues # GH 2817 - data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444}, - 'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0}, - 'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}} + data = {'amount': {0: 700, + 1: 600, + 2: 222, + 3: 333, + 4: 444}, + 'col': {0: 3.5, + 1: 3.5, + 2: 4.0, + 3: 4.0, + 4: 4.0}, + 'year': {0: 2012, + 1: 2011, + 2: 2012, + 3: 2012, + 4: 2012}} df = DataFrame(data).set_index(keys=['col', 'year']) key = 4.0, 2012 @@ -1888,23 +2081,34 @@ def test_ix_general(self): tm.assert_frame_equal(res, expected) def test_ix_weird_slicing(self): - ## http://stackoverflow.com/q/17056560/1240268 - df = DataFrame({'one' : [1, 2, 3, np.nan, np.nan], 'two' : [1, 2, 3, 4, 5]}) - df.ix[df['one']>1, 'two'] = -df['two'] - - expected = DataFrame({'one': {0: 1.0, 1: 2.0, 2: 3.0, 3: nan, 4: nan}, - 'two': {0: 1, 1: -2, 2: -3, 3: 4, 4: 5}}) + # http://stackoverflow.com/q/17056560/1240268 + df = DataFrame({'one': [1, 2, 3, np.nan, np.nan], + 'two': [1, 2, 3, 4, 5]}) + df.ix[df['one'] > 1, 'two'] = -df['two'] + + expected = DataFrame({'one': {0: 1.0, + 1: 2.0, + 2: 3.0, + 3: nan, + 4: nan}, + 'two': {0: 1, + 1: -2, + 2: -3, + 3: 4, + 4: 5}}) assert_frame_equal(df, expected) def test_xs_multiindex(self): # GH2903 - columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'hello'), ('b', 'world')], names=['lvl0', 'lvl1']) + columns = MultiIndex.from_tuples( + [('a', 'foo'), ('a', 'bar'), ('b', 'hello'), + ('b', 'world')], names=['lvl0', 'lvl1']) df = DataFrame(np.random.randn(4, 4), columns=columns) - df.sortlevel(axis=1,inplace=True) + df.sortlevel(axis=1, inplace=True) result = df.xs('a', level='lvl0', axis=1) - expected = df.iloc[:,0:2].loc[:,'a'] - assert_frame_equal(result,expected) + expected = df.iloc[:, 0:2].loc[:, 'a'] + assert_frame_equal(result, expected) result = df.xs('foo', level='lvl1', axis=1) expected = df.iloc[:, 1:2].copy() @@ -1915,127 +2119,142 @@ def test_per_axis_per_level_getitem(self): # GH6134 # example test case - ix = MultiIndex.from_product([_mklbl('A',5),_mklbl('B',7),_mklbl('C',4),_mklbl('D',2)]) - df = DataFrame(np.arange(len(ix.get_values())),index=ix) - - result = df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:] - expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if ( - a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]] + ix = MultiIndex.from_product([_mklbl('A', 5), _mklbl('B', 7), _mklbl( + 'C', 4), _mklbl('D', 2)]) + df = DataFrame(np.arange(len(ix.get_values())), index=ix) + + result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] assert_frame_equal(result, expected) - expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if ( - a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C2' or c == 'C3')]] - result = df.loc[(slice('A1','A3'),slice(None), slice('C1','C3')),:] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C2' or c == 'C3')]] + result = df.loc[(slice('A1', 'A3'), slice(None), slice('C1', 'C3')), :] assert_frame_equal(result, expected) # test multi-index slicing with per axis and per index controls - index = MultiIndex.from_tuples([('A',1),('A',2),('A',3),('B',1)], - names=['one','two']) - columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','foo'),('b','bah')], + index = MultiIndex.from_tuples([('A', 1), ('A', 2), + ('A', 3), ('B', 1)], + names=['one', 'two']) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(16,dtype='int64').reshape(4, 4), index=index, columns=columns) + df = DataFrame( + np.arange(16, dtype='int64').reshape( + 4, 4), index=index, columns=columns) df = df.sortlevel(axis=0).sortlevel(axis=1) # identity - result = df.loc[(slice(None),slice(None)),:] + result = df.loc[(slice(None), slice(None)), :] assert_frame_equal(result, df) - result = df.loc[(slice(None),slice(None)),(slice(None),slice(None))] + result = df.loc[(slice(None), slice(None)), (slice(None), slice(None))] assert_frame_equal(result, df) - result = df.loc[:,(slice(None),slice(None))] + result = df.loc[:, (slice(None), slice(None))] assert_frame_equal(result, df) # index - result = df.loc[(slice(None),[1]),:] - expected = df.iloc[[0,3]] + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] assert_frame_equal(result, expected) - result = df.loc[(slice(None),1),:] - expected = df.iloc[[0,3]] + result = df.loc[(slice(None), 1), :] + expected = df.iloc[[0, 3]] assert_frame_equal(result, expected) # columns - result = df.loc[:,(slice(None),['foo'])] - expected = df.iloc[:,[1,3]] + result = df.loc[:, (slice(None), ['foo'])] + expected = df.iloc[:, [1, 3]] assert_frame_equal(result, expected) # both - result = df.loc[(slice(None),1),(slice(None),['foo'])] - expected = df.iloc[[0,3],[1,3]] + result = df.loc[(slice(None), 1), (slice(None), ['foo'])] + expected = df.iloc[[0, 3], [1, 3]] assert_frame_equal(result, expected) - result = df.loc['A','a'] - expected = DataFrame(dict(bar = [1,5,9], foo = [0,4,8]), - index=Index([1,2,3],name='two'), - columns=Index(['bar','foo'],name='lvl1')) + result = df.loc['A', 'a'] + expected = DataFrame(dict(bar=[1, 5, 9], foo=[0, 4, 8]), + index=Index([1, 2, 3], name='two'), + columns=Index(['bar', 'foo'], name='lvl1')) assert_frame_equal(result, expected) - result = df.loc[(slice(None),[1,2]),:] - expected = df.iloc[[0,1,3]] + result = df.loc[(slice(None), [1, 2]), :] + expected = df.iloc[[0, 1, 3]] assert_frame_equal(result, expected) # multi-level series - s = Series(np.arange(len(ix.get_values())),index=ix) - result = s.loc['A1':'A3', :, ['C1','C3']] - expected = s.loc[[ tuple([a,b,c,d]) for a,b,c,d in s.index.values if ( - a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]] + s = Series(np.arange(len(ix.get_values())), index=ix) + result = s.loc['A1':'A3', :, ['C1', 'C3']] + expected = s.loc[[tuple([a, b, c, d]) + for a, b, c, d in s.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] assert_series_equal(result, expected) # boolean indexers - result = df.loc[(slice(None),df.loc[:,('a','bar')]>5),:] - expected = df.iloc[[2,3]] + result = df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] + expected = df.iloc[[2, 3]] assert_frame_equal(result, expected) def f(): - df.loc[(slice(None),np.array([True,False])),:] + df.loc[(slice(None), np.array([True, False])), :] + self.assertRaises(ValueError, f) # ambiguous cases # these can be multiply interpreted (e.g. in this case # as df.loc[slice(None),[1]] as well - self.assertRaises(KeyError, lambda : df.loc[slice(None),[1]]) + self.assertRaises(KeyError, lambda: df.loc[slice(None), [1]]) - result = df.loc[(slice(None),[1]),:] - expected = df.iloc[[0,3]] + result = df.loc[(slice(None), [1]), :] + expected = df.iloc[[0, 3]] assert_frame_equal(result, expected) # not lexsorted - self.assertEqual(df.index.lexsort_depth,2) - df = df.sortlevel(level=1,axis=0) - self.assertEqual(df.index.lexsort_depth,0) - with tm.assertRaisesRegexp(KeyError, 'MultiIndex Slicing requires the index to be fully lexsorted tuple len \(2\), lexsort depth \(0\)'): - df.loc[(slice(None),df.loc[:,('a','bar')]>5),:] + self.assertEqual(df.index.lexsort_depth, 2) + df = df.sortlevel(level=1, axis=0) + self.assertEqual(df.index.lexsort_depth, 0) + with tm.assertRaisesRegexp( + KeyError, + 'MultiIndex Slicing requires the index to be fully ' + 'lexsorted tuple len \(2\), lexsort depth \(0\)'): + df.loc[(slice(None), df.loc[:, ('a', 'bar')] > 5), :] def test_multiindex_slicers_non_unique(self): # GH 7106 # non-unique mi index support - df = DataFrame(dict(A = ['foo','foo','foo','foo'], - B = ['a','a','a','a'], - C = [1,2,1,3], - D = [1,2,3,4])).set_index(['A','B','C']).sortlevel() + df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], + B=['a', 'a', 'a', 'a'], + C=[1, 2, 1, 3], + D=[1, 2, 3, 4])) + .set_index(['A', 'B', 'C']).sortlevel()) self.assertFalse(df.index.is_unique) - expected = DataFrame(dict(A = ['foo','foo'], - B = ['a','a'], - C = [1,1], - D = [1,3])).set_index(['A','B','C']).sortlevel() - result = df.loc[(slice(None),slice(None),1),:] + expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], + C=[1, 1], D=[1, 3])) + .set_index(['A', 'B', 'C']).sortlevel()) + result = df.loc[(slice(None), slice(None), 1), :] assert_frame_equal(result, expected) # this is equivalent of an xs expression - result = df.xs(1,level=2,drop_level=False) + result = df.xs(1, level=2, drop_level=False) assert_frame_equal(result, expected) - df = DataFrame(dict(A = ['foo','foo','foo','foo'], - B = ['a','a','a','a'], - C = [1,2,1,2], - D = [1,2,3,4])).set_index(['A','B','C']).sortlevel() + df = (DataFrame(dict(A=['foo', 'foo', 'foo', 'foo'], + B=['a', 'a', 'a', 'a'], + C=[1, 2, 1, 2], + D=[1, 2, 3, 4])) + .set_index(['A', 'B', 'C']).sortlevel()) self.assertFalse(df.index.is_unique) - expected = DataFrame(dict(A = ['foo','foo'], - B = ['a','a'], - C = [1,1], - D = [1,3])).set_index(['A','B','C']).sortlevel() - result = df.loc[(slice(None),slice(None),1),:] + expected = (DataFrame(dict(A=['foo', 'foo'], B=['a', 'a'], + C=[1, 1], D=[1, 3])) + .set_index(['A', 'B', 'C']).sortlevel()) + result = df.loc[(slice(None), slice(None), 1), :] self.assertFalse(result.index.is_unique) assert_frame_equal(result, expected) @@ -2044,99 +2263,101 @@ def test_multiindex_slicers_datetimelike(self): # GH 7429 # buggy/inconsistent behavior when slicing with datetime-like import datetime - dates = [datetime.datetime(2012,1,1,12,12,12) + datetime.timedelta(days=i) for i in range(6)] - freq = [1,2] - index = MultiIndex.from_product([dates,freq], names=['date','frequency']) + dates = [datetime.datetime(2012, 1, 1, 12, 12, 12) + + datetime.timedelta(days=i) for i in range(6)] + freq = [1, 2] + index = MultiIndex.from_product( + [dates, freq], names=['date', 'frequency']) - df = DataFrame(np.arange(6*2*4,dtype='int64').reshape(-1,4),index=index,columns=list('ABCD')) + df = DataFrame( + np.arange(6 * 2 * 4, dtype='int64').reshape( + -1, 4), index=index, columns=list('ABCD')) # multi-axis slicing idx = pd.IndexSlice - expected = df.iloc[[0,2,4],[0,1]] - result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),Timestamp('2012-01-03 12:12:12')),slice(1,1)), slice('A','B')] - assert_frame_equal(result,expected) + expected = df.iloc[[0, 2, 4], [0, 1]] + result = df.loc[(slice( + Timestamp('2012-01-01 12:12:12'), Timestamp( + '2012-01-03 12:12:12')), slice(1, 1)), slice('A', 'B')] + assert_frame_equal(result, expected) - result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp('2012-01-03 12:12:12')],idx[1:1]), slice('A','B')] - assert_frame_equal(result,expected) + result = df.loc[(idx[Timestamp('2012-01-01 12:12:12'):Timestamp( + '2012-01-03 12:12:12')], idx[1:1]), slice('A', 'B')] + assert_frame_equal(result, expected) - result = df.loc[(slice(Timestamp('2012-01-01 12:12:12'),Timestamp('2012-01-03 12:12:12')),1), slice('A','B')] - assert_frame_equal(result,expected) + result = df.loc[(slice( + Timestamp('2012-01-01 12:12:12'), Timestamp( + '2012-01-03 12:12:12')), 1), slice('A', 'B')] + assert_frame_equal(result, expected) # with strings - result = df.loc[(slice('2012-01-01 12:12:12','2012-01-03 12:12:12'),slice(1,1)), slice('A','B')] - assert_frame_equal(result,expected) - - result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'],1), idx['A','B']] - assert_frame_equal(result,expected) + result = df.loc[(slice('2012-01-01 12:12:12', '2012-01-03 12:12:12'), + slice(1, 1)), slice('A', 'B')] + assert_frame_equal(result, expected) + result = df.loc[(idx['2012-01-01 12:12:12':'2012-01-03 12:12:12'], 1), + idx['A', 'B']] + assert_frame_equal(result, expected) def test_multiindex_slicers_edges(self): - # GH 8132 # various edge cases - df = DataFrame({'A': ['A0'] * 5 + ['A1']*5 + ['A2']*5, - 'B': ['B0','B0','B1','B1','B2'] * 3, - 'DATE': ["2013-06-11", - "2013-07-02", - "2013-07-09", - "2013-07-30", - "2013-08-06", - "2013-06-11", - "2013-07-02", - "2013-07-09", - "2013-07-30", - "2013-08-06", - "2013-09-03", - "2013-10-01", - "2013-07-09", - "2013-08-06", - "2013-09-03"], - 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3,4, 2]}) + df = DataFrame( + {'A': ['A0'] * 5 + ['A1'] * 5 + ['A2'] * 5, + 'B': ['B0', 'B0', 'B1', 'B1', 'B2'] * 3, + 'DATE': ["2013-06-11", "2013-07-02", "2013-07-09", "2013-07-30", + "2013-08-06", "2013-06-11", "2013-07-02", "2013-07-09", + "2013-07-30", "2013-08-06", "2013-09-03", "2013-10-01", + "2013-07-09", "2013-08-06", "2013-09-03"], + 'VALUES': [22, 35, 14, 9, 4, 40, 18, 4, 2, 5, 1, 2, 3, 4, 2]}) df['DATE'] = pd.to_datetime(df['DATE']) df1 = df.set_index(['A', 'B', 'DATE']) df1 = df1.sortlevel() - df2 = df.set_index('DATE') # A1 - Get all values under "A0" and "A1" - result = df1.loc[(slice('A1')),:] + result = df1.loc[(slice('A1')), :] expected = df1.iloc[0:10] assert_frame_equal(result, expected) # A2 - Get all values from the start to "A2" - result = df1.loc[(slice('A2')),:] + result = df1.loc[(slice('A2')), :] expected = df1 assert_frame_equal(result, expected) # A3 - Get all values under "B1" or "B2" - result = df1.loc[(slice(None),slice('B1','B2')),:] - expected = df1.iloc[[2,3,4,7,8,9,12,13,14]] + result = df1.loc[(slice(None), slice('B1', 'B2')), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13, 14]] assert_frame_equal(result, expected) # A4 - Get all values between 2013-07-02 and 2013-07-09 - result = df1.loc[(slice(None),slice(None),slice('20130702','20130709')),:] - expected = df1.iloc[[1,2,6,7,12]] + result = df1.loc[(slice(None), slice(None), slice('20130702', + '20130709')), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] assert_frame_equal(result, expected) # B1 - Get all values in B0 that are also under A0, A1 and A2 - result = df1.loc[(slice('A2'),slice('B0')),:] - expected = df1.iloc[[0,1,5,6,10,11]] + result = df1.loc[(slice('A2'), slice('B0')), :] + expected = df1.iloc[[0, 1, 5, 6, 10, 11]] assert_frame_equal(result, expected) - # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for the As) - result = df1.loc[(slice(None),slice('B2')),:] + # B2 - Get all values in B0, B1 and B2 (similar to what #2 is doing for + # the As) + result = df1.loc[(slice(None), slice('B2')), :] expected = df1 assert_frame_equal(result, expected) # B3 - Get all values from B1 to B2 and up to 2013-08-06 - result = df1.loc[(slice(None),slice('B1','B2'),slice('2013-08-06')),:] - expected = df1.iloc[[2,3,4,7,8,9,12,13]] + result = df1.loc[(slice(None), slice('B1', 'B2'), slice('2013-08-06') + ), :] + expected = df1.iloc[[2, 3, 4, 7, 8, 9, 12, 13]] assert_frame_equal(result, expected) # B4 - Same as A4 but the start of the date slice is not a key. # shows indexing on a partial selection slice - result = df1.loc[(slice(None),slice(None),slice('20130701','20130709')),:] - expected = df1.iloc[[1,2,6,7,12]] + result = df1.loc[(slice(None), slice(None), slice('20130701', + '20130709')), :] + expected = df1.iloc[[1, 2, 6, 7, 12]] assert_frame_equal(result, expected) def test_per_axis_per_level_doc_examples(self): @@ -2145,88 +2366,95 @@ def test_per_axis_per_level_doc_examples(self): idx = pd.IndexSlice # from indexing.rst / advanced - index = MultiIndex.from_product([_mklbl('A',4), - _mklbl('B',2), - _mklbl('C',4), - _mklbl('D',2)]) - columns = MultiIndex.from_tuples([('a','foo'),('a','bar'), - ('b','foo'),('b','bah')], + index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), + _mklbl('C', 4), _mklbl('D', 2)]) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index)*len(columns),dtype='int64').reshape((len(index),len(columns))), + df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') + .reshape((len(index), len(columns))), index=index, columns=columns) - result = df.loc[(slice('A1','A3'),slice(None), ['C1','C3']),:] - expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if ( - a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]] + result = df.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] assert_frame_equal(result, expected) - result = df.loc[idx['A1':'A3',:,['C1','C3']],:] + result = df.loc[idx['A1':'A3', :, ['C1', 'C3']], :] assert_frame_equal(result, expected) - result = df.loc[(slice(None),slice(None), ['C1','C3']),:] - expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if ( - c == 'C1' or c == 'C3')]] + result = df.loc[(slice(None), slice(None), ['C1', 'C3']), :] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == 'C1' or c == 'C3')]] assert_frame_equal(result, expected) - result = df.loc[idx[:,:,['C1','C3']],:] + result = df.loc[idx[:, :, ['C1', 'C3']], :] assert_frame_equal(result, expected) # not sorted def f(): - df.loc['A1',(slice(None),'foo')] + df.loc['A1', (slice(None), 'foo')] + self.assertRaises(KeyError, f) df = df.sortlevel(axis=1) # slicing - df.loc['A1',(slice(None),'foo')] - df.loc[(slice(None),slice(None), ['C1','C3']),(slice(None),'foo')] + df.loc['A1', (slice(None), 'foo')] + df.loc[(slice(None), slice(None), ['C1', 'C3']), (slice(None), 'foo')] # setitem - df.loc(axis=0)[:,:,['C1','C3']] = -10 + df.loc(axis=0)[:, :, ['C1', 'C3']] = -10 def test_loc_arguments(self): - index = MultiIndex.from_product([_mklbl('A',4), - _mklbl('B',2), - _mklbl('C',4), - _mklbl('D',2)]) - columns = MultiIndex.from_tuples([('a','foo'),('a','bar'), - ('b','foo'),('b','bah')], + index = MultiIndex.from_product([_mklbl('A', 4), _mklbl('B', 2), + _mklbl('C', 4), _mklbl('D', 2)]) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) - df = DataFrame(np.arange(len(index)*len(columns),dtype='int64').reshape((len(index),len(columns))), + df = DataFrame(np.arange(len(index) * len(columns), dtype='int64') + .reshape((len(index), len(columns))), index=index, columns=columns).sortlevel().sortlevel(axis=1) - # axis 0 - result = df.loc(axis=0)['A1':'A3',:,['C1','C3']] - expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if ( - a == 'A1' or a == 'A2' or a == 'A3') and (c == 'C1' or c == 'C3')]] + result = df.loc(axis=0)['A1':'A3', :, ['C1', 'C3']] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (a == 'A1' or a == 'A2' or a == 'A3') and ( + c == 'C1' or c == 'C3')]] assert_frame_equal(result, expected) - result = df.loc(axis='index')[:,:,['C1','C3']] - expected = df.loc[[ tuple([a,b,c,d]) for a,b,c,d in df.index.values if ( - c == 'C1' or c == 'C3')]] + result = df.loc(axis='index')[:, :, ['C1', 'C3']] + expected = df.loc[[tuple([a, b, c, d]) + for a, b, c, d in df.index.values + if (c == 'C1' or c == 'C3')]] assert_frame_equal(result, expected) # axis 1 - result = df.loc(axis=1)[:,'foo'] - expected = df.loc[:,(slice(None),'foo')] + result = df.loc(axis=1)[:, 'foo'] + expected = df.loc[:, (slice(None), 'foo')] assert_frame_equal(result, expected) - result = df.loc(axis='columns')[:,'foo'] - expected = df.loc[:,(slice(None),'foo')] + result = df.loc(axis='columns')[:, 'foo'] + expected = df.loc[:, (slice(None), 'foo')] assert_frame_equal(result, expected) # invalid axis def f(): - df.loc(axis=-1)[:,:,['C1','C3']] + df.loc(axis=-1)[:, :, ['C1', 'C3']] + self.assertRaises(ValueError, f) def f(): - df.loc(axis=2)[:,:,['C1','C3']] + df.loc(axis=2)[:, :, ['C1', 'C3']] + self.assertRaises(ValueError, f) def f(): - df.loc(axis='foo')[:,:,['C1','C3']] + df.loc(axis='foo')[:, :, ['C1', 'C3']] + self.assertRaises(ValueError, f) def test_per_axis_per_level_setitem(self): @@ -2235,119 +2463,132 @@ def test_per_axis_per_level_setitem(self): idx = pd.IndexSlice # test multi-index slicing with per axis and per index controls - index = MultiIndex.from_tuples([('A',1),('A',2),('A',3),('B',1)], - names=['one','two']) - columns = MultiIndex.from_tuples([('a','foo'),('a','bar'),('b','foo'),('b','bah')], + index = MultiIndex.from_tuples([('A', 1), ('A', 2), + ('A', 3), ('B', 1)], + names=['one', 'two']) + columns = MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), + ('b', 'foo'), ('b', 'bah')], names=['lvl0', 'lvl1']) - df_orig = DataFrame(np.arange(16,dtype='int64').reshape(4, 4), index=index, columns=columns) + df_orig = DataFrame( + np.arange(16, dtype='int64').reshape( + 4, 4), index=index, columns=columns) df_orig = df_orig.sortlevel(axis=0).sortlevel(axis=1) # identity df = df_orig.copy() - df.loc[(slice(None),slice(None)),:] = 100 + df.loc[(slice(None), slice(None)), :] = 100 expected = df_orig.copy() - expected.iloc[:,:] = 100 + expected.iloc[:, :] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc(axis=0)[:,:] = 100 + df.loc(axis=0)[:, :] = 100 expected = df_orig.copy() - expected.iloc[:,:] = 100 + expected.iloc[:, :] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[(slice(None),slice(None)),(slice(None),slice(None))] = 100 + df.loc[(slice(None), slice(None)), (slice(None), slice(None))] = 100 expected = df_orig.copy() - expected.iloc[:,:] = 100 + expected.iloc[:, :] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[:,(slice(None),slice(None))] = 100 + df.loc[:, (slice(None), slice(None))] = 100 expected = df_orig.copy() - expected.iloc[:,:] = 100 + expected.iloc[:, :] = 100 assert_frame_equal(df, expected) # index df = df_orig.copy() - df.loc[(slice(None),[1]),:] = 100 + df.loc[(slice(None), [1]), :] = 100 expected = df_orig.copy() - expected.iloc[[0,3]] = 100 + expected.iloc[[0, 3]] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[(slice(None),1),:] = 100 + df.loc[(slice(None), 1), :] = 100 expected = df_orig.copy() - expected.iloc[[0,3]] = 100 + expected.iloc[[0, 3]] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc(axis=0)[:,1] = 100 + df.loc(axis=0)[:, 1] = 100 expected = df_orig.copy() - expected.iloc[[0,3]] = 100 + expected.iloc[[0, 3]] = 100 assert_frame_equal(df, expected) # columns df = df_orig.copy() - df.loc[:,(slice(None),['foo'])] = 100 + df.loc[:, (slice(None), ['foo'])] = 100 expected = df_orig.copy() - expected.iloc[:,[1,3]] = 100 + expected.iloc[:, [1, 3]] = 100 assert_frame_equal(df, expected) # both df = df_orig.copy() - df.loc[(slice(None),1),(slice(None),['foo'])] = 100 + df.loc[(slice(None), 1), (slice(None), ['foo'])] = 100 expected = df_orig.copy() - expected.iloc[[0,3],[1,3]] = 100 + expected.iloc[[0, 3], [1, 3]] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[idx[:,1],idx[:,['foo']]] = 100 + df.loc[idx[:, 1], idx[:, ['foo']]] = 100 expected = df_orig.copy() - expected.iloc[[0,3],[1,3]] = 100 + expected.iloc[[0, 3], [1, 3]] = 100 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc['A','a'] = 100 + df.loc['A', 'a'] = 100 expected = df_orig.copy() - expected.iloc[0:3,0:2] = 100 + expected.iloc[0:3, 0:2] = 100 assert_frame_equal(df, expected) # setting with a list-like df = df_orig.copy() - df.loc[(slice(None),1),(slice(None),['foo'])] = np.array([[100, 100], [100, 100]],dtype='int64') + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [[100, 100], [100, 100]], dtype='int64') expected = df_orig.copy() - expected.iloc[[0,3],[1,3]] = 100 + expected.iloc[[0, 3], [1, 3]] = 100 assert_frame_equal(df, expected) # not enough values df = df_orig.copy() + def f(): - df.loc[(slice(None),1),(slice(None),['foo'])] = np.array([[100], [100, 100]],dtype='int64') + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [[100], [100, 100]], dtype='int64') + self.assertRaises(ValueError, f) + def f(): - df.loc[(slice(None),1),(slice(None),['foo'])] = np.array([100, 100, 100, 100],dtype='int64') + df.loc[(slice(None), 1), (slice(None), ['foo'])] = np.array( + [100, 100, 100, 100], dtype='int64') + self.assertRaises(ValueError, f) # with an alignable rhs df = df_orig.copy() - df.loc[(slice(None),1),(slice(None),['foo'])] = df.loc[(slice(None),1),(slice(None),['foo'])] * 5 + df.loc[(slice(None), 1), (slice(None), ['foo'])] = df.loc[(slice( + None), 1), (slice(None), ['foo'])] * 5 expected = df_orig.copy() - expected.iloc[[0,3],[1,3]] = expected.iloc[[0,3],[1,3]] * 5 + expected.iloc[[0, 3], [1, 3]] = expected.iloc[[0, 3], [1, 3]] * 5 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[(slice(None),1),(slice(None),['foo'])] *= df.loc[(slice(None),1),(slice(None),['foo'])] + df.loc[(slice(None), 1), (slice(None), ['foo'])] *= df.loc[(slice( + None), 1), (slice(None), ['foo'])] expected = df_orig.copy() - expected.iloc[[0,3],[1,3]] *= expected.iloc[[0,3],[1,3]] + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] assert_frame_equal(df, expected) - rhs = df_orig.loc[(slice(None),1),(slice(None),['foo'])].copy() - rhs.loc[:,('c','bah')] = 10 + rhs = df_orig.loc[(slice(None), 1), (slice(None), ['foo'])].copy() + rhs.loc[:, ('c', 'bah')] = 10 df = df_orig.copy() - df.loc[(slice(None),1),(slice(None),['foo'])] *= rhs + df.loc[(slice(None), 1), (slice(None), ['foo'])] *= rhs expected = df_orig.copy() - expected.iloc[[0,3],[1,3]] *= expected.iloc[[0,3],[1,3]] + expected.iloc[[0, 3], [1, 3]] *= expected.iloc[[0, 3], [1, 3]] assert_frame_equal(df, expected) def test_multiindex_setitem(self): @@ -2362,118 +2603,128 @@ def test_multiindex_setitem(self): index=arrays, columns=['A', 'B', 'C']).sort_index() - expected = df_orig.loc[['bar']]*2 + expected = df_orig.loc[['bar']] * 2 df = df_orig.copy() df.loc[['bar']] *= 2 - assert_frame_equal(df.loc[['bar']],expected) + assert_frame_equal(df.loc[['bar']], expected) # raise because these have differing levels def f(): df.loc['bar'] *= 2 + self.assertRaises(TypeError, f) # from SO - #http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation + # http://stackoverflow.com/questions/24572040/pandas-access-the-level-of-multiindex-for-inplace-operation df_orig = DataFrame.from_dict({'price': { ('DE', 'Coal', 'Stock'): 2, ('DE', 'Gas', 'Stock'): 4, ('DE', 'Elec', 'Demand'): 1, ('FR', 'Gas', 'Stock'): 5, ('FR', 'Solar', 'SupIm'): 0, - ('FR', 'Wind', 'SupIm'): 0}}) - df_orig.index = MultiIndex.from_tuples(df_orig.index, names=['Sit', 'Com', 'Type']) + ('FR', 'Wind', 'SupIm'): 0 + }}) + df_orig.index = MultiIndex.from_tuples(df_orig.index, + names=['Sit', 'Com', 'Type']) expected = df_orig.copy() - expected.iloc[[0,2,3]] *= 2 + expected.iloc[[0, 2, 3]] *= 2 idx = pd.IndexSlice df = df_orig.copy() - df.loc[idx[:,:,'Stock'],:] *= 2 + df.loc[idx[:, :, 'Stock'], :] *= 2 assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[idx[:,:,'Stock'],'price'] *= 2 + df.loc[idx[:, :, 'Stock'], 'price'] *= 2 assert_frame_equal(df, expected) def test_getitem_multiindex(self): - - # GH 5725 - # the 'A' happens to be a valid Timestamp so the doesn't raise the appropriate - # error, only in PY3 of course! - index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + index = MultiIndex(levels=[['D', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, + 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=['tag', 'day']) - arr = np.random.randn(len(index),1) - df = DataFrame(arr,index=index,columns=['val']) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=['val']) result = df.val['D'] - expected = Series(arr.ravel()[0:3],name='val',index=Index([26,37,57],name='day')) - assert_series_equal(result,expected) + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + assert_series_equal(result, expected) def f(): df.val['A'] + self.assertRaises(KeyError, f) def f(): df.val['X'] + self.assertRaises(KeyError, f) # A is treated as a special Timestamp - index = MultiIndex(levels=[['A', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, 82]], - labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + index = MultiIndex(levels=[['A', 'B', 'C'], [0, 26, 27, 37, 57, 67, 75, + 82]], + labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], names=['tag', 'day']) - df = DataFrame(arr,index=index,columns=['val']) + df = DataFrame(arr, index=index, columns=['val']) result = df.val['A'] - expected = Series(arr.ravel()[0:3],name='val',index=Index([26,37,57],name='day')) - assert_series_equal(result,expected) + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + assert_series_equal(result, expected) def f(): df.val['X'] - self.assertRaises(KeyError, f) + self.assertRaises(KeyError, f) # GH 7866 # multi-index slicing with missing indexers - s = pd.Series(np.arange(9,dtype='int64'), - index=pd.MultiIndex.from_product([['A','B','C'],['foo','bar','baz']], - names=['one','two']) - ).sortlevel() + s = pd.Series(np.arange(9, dtype='int64'), + index=pd.MultiIndex.from_product( + [['A', 'B', 'C'], ['foo', 'bar', 'baz']], + names=['one', 'two'])).sortlevel() - expected = pd.Series(np.arange(3,dtype='int64'), - index=pd.MultiIndex.from_product([['A'],['foo','bar','baz']], - names=['one','two']) - ).sortlevel() + expected = pd.Series(np.arange(3, dtype='int64'), + index=pd.MultiIndex.from_product( + [['A'], ['foo', 'bar', 'baz']], + names=['one', 'two'])).sortlevel() result = s.loc[['A']] - assert_series_equal(result,expected) - result = s.loc[['A','D']] - assert_series_equal(result,expected) + assert_series_equal(result, expected) + result = s.loc[['A', 'D']] + assert_series_equal(result, expected) # not any values found - self.assertRaises(KeyError, lambda : s.loc[['D']]) + self.assertRaises(KeyError, lambda: s.loc[['D']]) # empty ok result = s.loc[[]] expected = s.iloc[[]] - assert_series_equal(result,expected) + assert_series_equal(result, expected) idx = pd.IndexSlice - expected = pd.Series([0,3,6], - index=pd.MultiIndex.from_product([['A','B','C'],['foo']], - names=['one','two']) - ).sortlevel() + expected = pd.Series([0, 3, 6], index=pd.MultiIndex.from_product( + [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sortlevel() - result = s.loc[idx[:,['foo']]] - assert_series_equal(result,expected) - result = s.loc[idx[:,['foo','bah']]] - assert_series_equal(result,expected) + result = s.loc[idx[:, ['foo']]] + assert_series_equal(result, expected) + result = s.loc[idx[:, ['foo', 'bah']]] + assert_series_equal(result, expected) # GH 8737 # empty indexer - multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], ['alpha', 'beta'])) - df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + multi_index = pd.MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) + df = DataFrame( + np.random.randn(5, 6), index=range(5), columns=multi_index) df = df.sortlevel(0, axis=1) - expected = DataFrame(index=range(5),columns=multi_index.reindex([])[0]) + expected = DataFrame(index=range(5), + columns=multi_index.reindex([])[0]) result1 = df.loc[:, ([], slice(None))] result2 = df.loc[:, (['foo'], [])] assert_frame_equal(result1, expected) @@ -2481,12 +2732,12 @@ def f(): # regression from < 0.14.0 # GH 7914 - df = DataFrame([[np.mean, np.median],['mean','median']], - columns=MultiIndex.from_tuples([('functs','mean'), - ('functs','median')]), + df = DataFrame([[np.mean, np.median], ['mean', 'median']], + columns=MultiIndex.from_tuples([('functs', 'mean'), + ('functs', 'median')]), index=['function', 'name']) - result = df.loc['function',('functs','mean')] - self.assertEqual(result,np.mean) + result = df.loc['function', ('functs', 'mean')] + self.assertEqual(result, np.mean) def test_setitem_dtype_upcast(self): @@ -2495,33 +2746,34 @@ def test_setitem_dtype_upcast(self): df['c'] = np.nan self.assertEqual(df['c'].dtype, np.float64) - df.ix[0,'c'] = 'foo' - expected = DataFrame([{"a": 1, "c" : 'foo'}, {"a": 3, "b": 2, "c" : np.nan}]) - assert_frame_equal(df,expected) + df.ix[0, 'c'] = 'foo' + expected = DataFrame([{"a": 1, + "c": 'foo'}, {"a": 3, + "b": 2, + "c": np.nan}]) + assert_frame_equal(df, expected) # GH10280 - df = DataFrame(np.arange(6,dtype='int64').reshape(2, 3), + df = DataFrame(np.arange(6, dtype='int64').reshape(2, 3), index=list('ab'), columns=['foo', 'bar', 'baz']) for val in [3.14, 'wxyz']: left = df.copy() left.loc['a', 'bar'] = val - right = DataFrame([[0, val, 2], [3, 4, 5]], - index=list('ab'), + right = DataFrame([[0, val, 2], [3, 4, 5]], index=list('ab'), columns=['foo', 'bar', 'baz']) assert_frame_equal(left, right) self.assertTrue(com.is_integer_dtype(left['foo'])) self.assertTrue(com.is_integer_dtype(left['baz'])) - left = DataFrame(np.arange(6,dtype='int64').reshape(2, 3) / 10.0, + left = DataFrame(np.arange(6, dtype='int64').reshape(2, 3) / 10.0, index=list('ab'), columns=['foo', 'bar', 'baz']) left.loc['a', 'bar'] = 'wxyz' - right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], - index=list('ab'), + right = DataFrame([[0, 'wxyz', .2], [.3, .4, .5]], index=list('ab'), columns=['foo', 'bar', 'baz']) assert_frame_equal(left, right) @@ -2530,67 +2782,83 @@ def test_setitem_dtype_upcast(self): def test_setitem_iloc(self): - # setitem with an iloc list - df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]) - df.iloc[[0,1],[1,2]] - df.iloc[[0,1],[1,2]] += 100 + df = DataFrame( + np.arange(9).reshape((3, 3)), index=["A", "B", "C"], + columns=["A", "B", "C"]) + df.iloc[[0, 1], [1, 2]] + df.iloc[[0, 1], [1, 2]] += 100 - expected = DataFrame(np.array([0,101,102,3,104,105,6,7,8]).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]) - assert_frame_equal(df,expected) + expected = DataFrame( + np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), + index=["A", "B", "C"], columns=["A", "B", "C"]) + assert_frame_equal(df, expected) def test_dups_fancy_indexing(self): # GH 3455 from pandas.util.testing import makeCustomDataframe as mkdf - df= mkdf(10, 3) - df.columns = ['a','a','b'] - cols = ['b','a'] - result = df[['b','a']].columns - expected = Index(['b','a','a']) + df = mkdf(10, 3) + df.columns = ['a', 'a', 'b'] + result = df[['b', 'a']].columns + expected = Index(['b', 'a', 'a']) self.assertTrue(result.equals(expected)) # across dtypes - df = DataFrame([[1,2,1.,2.,3.,'foo','bar']], columns=list('aaaaaaa')) + df = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']], + columns=list('aaaaaaa')) df.head() str(df) - result = DataFrame([[1,2,1.,2.,3.,'foo','bar']]) + result = DataFrame([[1, 2, 1., 2., 3., 'foo', 'bar']]) result.columns = list('aaaaaaa') - df_v = df.iloc[:,4] - res_v = result.iloc[:,4] + # TODO(wesm): unused? + df_v = df.iloc[:, 4] # noqa + res_v = result.iloc[:, 4] # noqa - assert_frame_equal(df,result) + assert_frame_equal(df, result) # GH 3561, dups not in selected order - df = DataFrame({'test': [5,7,9,11], 'test1': [4.,5,6,7], 'other': list('abcd') }, index=['A', 'A', 'B', 'C']) + df = DataFrame( + {'test': [5, 7, 9, 11], + 'test1': [4., 5, 6, 7], + 'other': list('abcd')}, index=['A', 'A', 'B', 'C']) rows = ['C', 'B'] - expected = DataFrame({'test' : [11,9], 'test1': [ 7., 6], 'other': ['d','c']},index=rows) + expected = DataFrame( + {'test': [11, 9], + 'test1': [7., 6], + 'other': ['d', 'c']}, index=rows) result = df.ix[rows] assert_frame_equal(result, expected) result = df.ix[Index(rows)] assert_frame_equal(result, expected) - rows = ['C','B','E'] - expected = DataFrame({'test' : [11,9,np.nan], 'test1': [7.,6,np.nan], 'other': ['d','c',np.nan]},index=rows) + rows = ['C', 'B', 'E'] + expected = DataFrame( + {'test': [11, 9, np.nan], + 'test1': [7., 6, np.nan], + 'other': ['d', 'c', np.nan]}, index=rows) result = df.ix[rows] assert_frame_equal(result, expected) # see GH5553, make sure we use the right indexer - rows = ['F','G','H','C','B','E'] - expected = DataFrame({'test' : [np.nan,np.nan,np.nan,11,9,np.nan], - 'test1': [np.nan,np.nan,np.nan,7.,6,np.nan], - 'other': [np.nan,np.nan,np.nan,'d','c',np.nan]},index=rows) + rows = ['F', 'G', 'H', 'C', 'B', 'E'] + expected = DataFrame({'test': [np.nan, np.nan, np.nan, 11, 9, np.nan], + 'test1': [np.nan, np.nan, np.nan, 7., 6, np.nan], + 'other': [np.nan, np.nan, np.nan, + 'd', 'c', np.nan]}, + index=rows) result = df.ix[rows] assert_frame_equal(result, expected) - # inconsistent returns for unique/duplicate indices when values are missing - df = DataFrame(randn(4,3),index=list('ABCD')) + # inconsistent returns for unique/duplicate indices when values are + # missing + df = DataFrame(randn(4, 3), index=list('ABCD')) expected = df.ix[['E']] - dfnu = DataFrame(randn(5,3),index=list('AABCD')) + dfnu = DataFrame(randn(5, 3), index=list('AABCD')) result = dfnu.ix[['E']] assert_frame_equal(result, expected) @@ -2603,146 +2871,216 @@ def test_dups_fancy_indexing(self): assert_frame_equal(result, expected, check_index_type=False) df = DataFrame({"A": list('abc')}) - result = df.ix[[0,8,0]] + result = df.ix[[0, 8, 0]] expected = DataFrame({"A": ['a', np.nan, 'a']}, index=[0, 8, 0]) assert_frame_equal(result, expected, check_index_type=False) # non unique with non unique selector df = DataFrame({'test': [5, 7, 9, 11]}, index=['A', 'A', 'B', 'C']) - expected = DataFrame({'test' : [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) + expected = DataFrame( + {'test': [5, 7, 5, 7, np.nan]}, index=['A', 'A', 'A', 'A', 'E']) result = df.ix[['A', 'A', 'E']] assert_frame_equal(result, expected) # GH 5835 # dups on index and missing values - df = DataFrame(np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) + df = DataFrame( + np.random.randn(5, 5), columns=['A', 'B', 'B', 'B', 'A']) - expected = pd.concat([df.ix[:,['A','B']],DataFrame(np.nan,columns=['C'],index=df.index)],axis=1) - result = df.ix[:,['A','B','C']] + expected = pd.concat( + [df.ix[:, ['A', 'B']], DataFrame(np.nan, columns=['C'], + index=df.index)], axis=1) + result = df.ix[:, ['A', 'B', 'C']] assert_frame_equal(result, expected) # GH 6504, multi-axis indexing - df = DataFrame(np.random.randn(9,2), index=[1,1,1,2,2,2,3,3,3], columns=['a', 'b']) + df = DataFrame( + np.random.randn( + 9, 2), index=[1, 1, 1, 2, 2, 2, 3, 3, 3], columns=['a', 'b']) expected = df.iloc[0:6] result = df.loc[[1, 2]] assert_frame_equal(result, expected) expected = df - result = df.loc[:,['a', 'b']] + result = df.loc[:, ['a', 'b']] assert_frame_equal(result, expected) - expected = df.iloc[0:6,:] + expected = df.iloc[0:6, :] result = df.loc[[1, 2], ['a', 'b']] assert_frame_equal(result, expected) def test_indexing_mixed_frame_bug(self): # GH3492 - df=DataFrame({'a':{1:'aaa',2:'bbb',3:'ccc'},'b':{1:111,2:222,3:333}}) + df = DataFrame({'a': {1: 'aaa', + 2: 'bbb', + 3: 'ccc'}, + 'b': {1: 111, + 2: 222, + 3: 333}}) # this works, new column is created correctly - df['test']=df['a'].apply(lambda x: '_' if x=='aaa' else x) + df['test'] = df['a'].apply(lambda x: '_' if x == 'aaa' else x) # this does not work, ie column test is not changed - idx=df['test']=='_' - temp=df.ix[idx,'a'].apply(lambda x: '-----' if x=='aaa' else x) - df.ix[idx,'test']=temp - self.assertEqual(df.iloc[0,2], '-----') + idx = df['test'] == '_' + temp = df.ix[idx, 'a'].apply(lambda x: '-----' if x == 'aaa' else x) + df.ix[idx, 'test'] = temp + self.assertEqual(df.iloc[0, 2], '-----') - #if I look at df, then element [0,2] equals '_'. If instead I type df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I get '_'. + # if I look at df, then element [0,2] equals '_'. If instead I type + # df.ix[idx,'test'], I get '-----', finally by typing df.iloc[0,2] I + # get '_'. def test_multitype_list_index_access(self): - #GH 10610 - df = pd.DataFrame(np.random.random((10, 5)), columns=["a"] + [20, 21, 22, 23]) + # GH 10610 + df = pd.DataFrame(np.random.random((10, 5)), + columns=["a"] + [20, 21, 22, 23]) with self.assertRaises(IndexError): - vals = df[[22, 26, -8]] + df[[22, 26, -8]] self.assertEqual(df[21].shape[0], df.shape[0]) def test_set_index_nan(self): # GH 3586 - df = DataFrame({'PRuid': {17: 'nonQC', 18: 'nonQC', 19: 'nonQC', 20: '10', 21: '11', 22: '12', 23: '13', - 24: '24', 25: '35', 26: '46', 27: '47', 28: '48', 29: '59', 30: '10'}, - 'QC': {17: 0.0, 18: 0.0, 19: 0.0, 20: nan, 21: nan, 22: nan, 23: nan, 24: 1.0, 25: nan, - 26: nan, 27: nan, 28: nan, 29: nan, 30: nan}, - 'data': {17: 7.9544899999999998, 18: 8.0142609999999994, 19: 7.8591520000000008, 20: 0.86140349999999999, - 21: 0.87853110000000001, 22: 0.8427041999999999, 23: 0.78587700000000005, 24: 0.73062459999999996, - 25: 0.81668560000000001, 26: 0.81927080000000008, 27: 0.80705009999999999, 28: 0.81440240000000008, - 29: 0.80140849999999997, 30: 0.81307740000000006}, - 'year': {17: 2006, 18: 2007, 19: 2008, 20: 1985, 21: 1985, 22: 1985, 23: 1985, - 24: 1985, 25: 1985, 26: 1985, 27: 1985, 28: 1985, 29: 1985, 30: 1986}}).reset_index() - - result = df.set_index(['year','PRuid','QC']).reset_index().reindex(columns=df.columns) - assert_frame_equal(result,df) + df = DataFrame({'PRuid': {17: 'nonQC', + 18: 'nonQC', + 19: 'nonQC', + 20: '10', + 21: '11', + 22: '12', + 23: '13', + 24: '24', + 25: '35', + 26: '46', + 27: '47', + 28: '48', + 29: '59', + 30: '10'}, + 'QC': {17: 0.0, + 18: 0.0, + 19: 0.0, + 20: nan, + 21: nan, + 22: nan, + 23: nan, + 24: 1.0, + 25: nan, + 26: nan, + 27: nan, + 28: nan, + 29: nan, + 30: nan}, + 'data': {17: 7.9544899999999998, + 18: 8.0142609999999994, + 19: 7.8591520000000008, + 20: 0.86140349999999999, + 21: 0.87853110000000001, + 22: 0.8427041999999999, + 23: 0.78587700000000005, + 24: 0.73062459999999996, + 25: 0.81668560000000001, + 26: 0.81927080000000008, + 27: 0.80705009999999999, + 28: 0.81440240000000008, + 29: 0.80140849999999997, + 30: 0.81307740000000006}, + 'year': {17: 2006, + 18: 2007, + 19: 2008, + 20: 1985, + 21: 1985, + 22: 1985, + 23: 1985, + 24: 1985, + 25: 1985, + 26: 1985, + 27: 1985, + 28: 1985, + 29: 1985, + 30: 1986}}).reset_index() + + result = df.set_index(['year', 'PRuid', 'QC']).reset_index().reindex( + columns=df.columns) + assert_frame_equal(result, df) def test_multi_nan_indexing(self): # GH 3588 - df = DataFrame({"a":['R1', 'R2', np.nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, np.nan , 20]}) - result = df.set_index(['a','b'], drop=False) - expected = DataFrame({"a":['R1', 'R2', np.nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, np.nan , 20]}, - index = [Index(['R1','R2',np.nan,'R4'],name='a'),Index(['C1','C2','C3','C4'],name='b')]) - assert_frame_equal(result,expected) - + df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], + 'b': ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20]}) + result = df.set_index(['a', 'b'], drop=False) + expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'], + 'b': ["C1", "C2", "C3", "C4"], + "c": [10, 15, np.nan, 20]}, + index=[Index(['R1', 'R2', np.nan, 'R4'], + name='a'), + Index(['C1', 'C2', 'C3', 'C4'], name='b')]) + assert_frame_equal(result, expected) def test_iloc_panel_issue(self): # GH 3617 p = Panel(randn(4, 4, 4)) - self.assertEqual(p.iloc[:3, :3, :3].shape, (3,3,3)) - self.assertEqual(p.iloc[1, :3, :3].shape, (3,3)) - self.assertEqual(p.iloc[:3, 1, :3].shape, (3,3)) - self.assertEqual(p.iloc[:3, :3, 1].shape, (3,3)) - self.assertEqual(p.iloc[1, 1, :3].shape, (3,)) - self.assertEqual(p.iloc[1, :3, 1].shape, (3,)) - self.assertEqual(p.iloc[:3, 1, 1].shape, (3,)) + self.assertEqual(p.iloc[:3, :3, :3].shape, (3, 3, 3)) + self.assertEqual(p.iloc[1, :3, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, 1, :3].shape, (3, 3)) + self.assertEqual(p.iloc[:3, :3, 1].shape, (3, 3)) + self.assertEqual(p.iloc[1, 1, :3].shape, (3, )) + self.assertEqual(p.iloc[1, :3, 1].shape, (3, )) + self.assertEqual(p.iloc[:3, 1, 1].shape, (3, )) def test_panel_getitem(self): - # GH4016, date selection returns a frame when a partial string selection + # GH4016, date selection returns a frame when a partial string + # selection ind = date_range(start="2000", freq="D", periods=1000) - df = DataFrame(np.random.randn(len(ind), 5), index=ind, columns=list('ABCDE')) - panel = Panel(dict([ ('frame_'+c,df) for c in list('ABC') ])) + df = DataFrame( + np.random.randn( + len(ind), 5), index=ind, columns=list('ABCDE')) + panel = Panel(dict([('frame_' + c, df) for c in list('ABC')])) test2 = panel.ix[:, "2002":"2002-12-31"] test1 = panel.ix[:, "2002"] - tm.assert_panel_equal(test1,test2) + tm.assert_panel_equal(test1, test2) # GH8710 # multi-element getting with a list panel = tm.makePanel() - expected = panel.iloc[[0,1]] + expected = panel.iloc[[0, 1]] - result = panel.loc[['ItemA','ItemB']] - tm.assert_panel_equal(result,expected) + result = panel.loc[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) - result = panel.loc[['ItemA','ItemB'],:,:] - tm.assert_panel_equal(result,expected) + result = panel.loc[['ItemA', 'ItemB'], :, :] + tm.assert_panel_equal(result, expected) - result = panel[['ItemA','ItemB']] - tm.assert_panel_equal(result,expected) + result = panel[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) result = panel.loc['ItemA':'ItemB'] - tm.assert_panel_equal(result,expected) + tm.assert_panel_equal(result, expected) result = panel.ix['ItemA':'ItemB'] - tm.assert_panel_equal(result,expected) + tm.assert_panel_equal(result, expected) - result = panel.ix[['ItemA','ItemB']] - tm.assert_panel_equal(result,expected) + result = panel.ix[['ItemA', 'ItemB']] + tm.assert_panel_equal(result, expected) # with an object-like # GH 9140 class TestObject: + def __str__(self): return "TestObject" obj = TestObject() - p = Panel(np.random.randn(1,5,4), items=[obj], - major_axis = date_range('1/1/2000', periods=5), + p = Panel(np.random.randn(1, 5, 4), items=[obj], + major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B', 'C', 'D']) expected = p.iloc[0] @@ -2754,16 +3092,19 @@ def test_panel_setitem(self): # GH 7763 # loc and setitem have setting differences np.random.seed(0) - index=range(3) + index = range(3) columns = list('abc') - panel = Panel({'A' : DataFrame(np.random.randn(3, 3), index=index, columns=columns), - 'B' : DataFrame(np.random.randn(3, 3), index=index, columns=columns), - 'C' : DataFrame(np.random.randn(3, 3), index=index, columns=columns) - }) + panel = Panel( + {'A': DataFrame( + np.random.randn(3, 3), index=index, columns=columns), + 'B': DataFrame( + np.random.randn(3, 3), index=index, columns=columns), + 'C': DataFrame( + np.random.randn(3, 3), index=index, columns=columns)}) - replace = DataFrame(np.eye(3,3), index=range(3), columns=columns) - expected = Panel({ 'A' : replace, 'B' : replace, 'C' : replace }) + replace = DataFrame(np.eye(3, 3), index=range(3), columns=columns) + expected = Panel({'A': replace, 'B': replace, 'C': replace}) p = panel.copy() for idx in list('ABC'): @@ -2772,113 +3113,138 @@ def test_panel_setitem(self): p = panel.copy() for idx in list('ABC'): - p.loc[idx,:,:] = replace + p.loc[idx, :, :] = replace tm.assert_panel_equal(p, expected) - def test_panel_setitem_with_multiindex(self): # 10360 # failing with a multi-index - arr = np.array([[[1,2,3],[0,0,0]],[[0,0,0],[0,0,0]]],dtype=np.float64) + arr = np.array( + [[[1, 2, 3], [0, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) # reg index - axes = dict(items=['A', 'B'], major_axis=[0, 1], minor_axis=['X', 'Y' ,'Z']) + axes = dict(items=['A', 'B'], major_axis=[0, 1], + minor_axis=['X', 'Y', 'Z']) p1 = Panel(0., **axes) p1.iloc[0, 0, :] = [1, 2, 3] expected = Panel(arr, **axes) tm.assert_panel_equal(p1, expected) # multi-indexes - axes['items'] = pd.MultiIndex.from_tuples([('A','a'), ('B','b')]) + axes['items'] = pd.MultiIndex.from_tuples([('A', 'a'), ('B', 'b')]) p2 = Panel(0., **axes) p2.iloc[0, 0, :] = [1, 2, 3] expected = Panel(arr, **axes) tm.assert_panel_equal(p2, expected) - axes['major_axis']=pd.MultiIndex.from_tuples([('A',1),('A',2)]) + axes['major_axis'] = pd.MultiIndex.from_tuples([('A', 1), ('A', 2)]) p3 = Panel(0., **axes) p3.iloc[0, 0, :] = [1, 2, 3] expected = Panel(arr, **axes) tm.assert_panel_equal(p3, expected) - axes['minor_axis']=pd.MultiIndex.from_product([['X'],range(3)]) + axes['minor_axis'] = pd.MultiIndex.from_product([['X'], range(3)]) p4 = Panel(0., **axes) p4.iloc[0, 0, :] = [1, 2, 3] expected = Panel(arr, **axes) tm.assert_panel_equal(p4, expected) - arr = np.array([[[1,0,0],[2,0,0]],[[0,0,0],[0,0,0]]],dtype=np.float64) + arr = np.array( + [[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]], dtype=np.float64) p5 = Panel(0., **axes) p5.iloc[0, :, 0] = [1, 2] expected = Panel(arr, **axes) tm.assert_panel_equal(p5, expected) def test_panel_assignment(self): - # GH3777 - wp = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B', 'C', 'D']) - wp2 = Panel(randn(2, 5, 4), items=['Item1', 'Item2'], major_axis=date_range('1/1/2000', periods=5), minor_axis=['A', 'B', 'C', 'D']) - expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + wp = Panel( + randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + wp2 = Panel( + randn(2, 5, 4), items=['Item1', 'Item2'], + major_axis=date_range('1/1/2000', periods=5), + minor_axis=['A', 'B', 'C', 'D']) + + # TODO: unused? + # expected = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] def f(): - wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] + wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[ + ['Item1', 'Item2'], :, ['A', 'B']] + self.assertRaises(NotImplementedError, f) - #wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] - #result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] - #tm.assert_panel_equal(result,expected) + # to_assign = wp2.loc[['Item1', 'Item2'], :, ['A', 'B']] + # wp.loc[['Item1', 'Item2'], :, ['A', 'B']] = to_assign + # result = wp.loc[['Item1', 'Item2'], :, ['A', 'B']] + # tm.assert_panel_equal(result,expected) def test_multiindex_assignment(self): # GH3777 part 2 # mixed dtype - df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3), + df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), columns=list('abc'), - index=[[4,4,8],[8,10,12]]) + index=[[4, 4, 8], [8, 10, 12]]) df['d'] = np.nan - arr = np.array([0.,1.]) + arr = np.array([0., 1.]) - df.ix[4,'d'] = arr - assert_series_equal(df.ix[4,'d'],Series(arr,index=[8,10],name='d')) + df.ix[4, 'd'] = arr + assert_series_equal(df.ix[4, 'd'], Series(arr, index=[8, 10], + name='d')) # single dtype - df = DataFrame(np.random.randint(5,10,size=9).reshape(3, 3), + df = DataFrame(np.random.randint(5, 10, size=9).reshape(3, 3), columns=list('abc'), - index=[[4,4,8],[8,10,12]]) + index=[[4, 4, 8], [8, 10, 12]]) - df.ix[4,'c'] = arr - assert_series_equal(df.ix[4,'c'],Series(arr,index=[8,10],name='c',dtype='int64')) + df.ix[4, 'c'] = arr + assert_series_equal(df.ix[4, 'c'], Series(arr, index=[8, 10], name='c', + dtype='int64')) # scalar ok - df.ix[4,'c'] = 10 - assert_series_equal(df.ix[4,'c'],Series(10,index=[8,10],name='c',dtype='int64')) + df.ix[4, 'c'] = 10 + assert_series_equal(df.ix[4, 'c'], Series(10, index=[8, 10], name='c', + dtype='int64')) # invalid assignments def f(): - df.ix[4,'c'] = [0,1,2,3] + df.ix[4, 'c'] = [0, 1, 2, 3] + self.assertRaises(ValueError, f) def f(): - df.ix[4,'c'] = [0] + df.ix[4, 'c'] = [0] + self.assertRaises(ValueError, f) # groupby example NUM_ROWS = 100 NUM_COLS = 10 - col_names = ['A'+num for num in map(str,np.arange(NUM_COLS).tolist())] + col_names = ['A' + num + for num in map(str, np.arange(NUM_COLS).tolist())] index_cols = col_names[:5] - df = DataFrame(np.random.randint(5, size=(NUM_ROWS,NUM_COLS)), dtype=np.int64, columns=col_names) + df = DataFrame( + np.random.randint(5, size=(NUM_ROWS, NUM_COLS)), dtype=np.int64, + columns=col_names) df = df.set_index(index_cols).sort_index() grp = df.groupby(level=index_cols[:4]) df['new_col'] = np.nan f_index = np.arange(5) - def f(name,df2): - return Series(np.arange(df2.shape[0]),name=df2.index.values[0]).reindex(f_index) - new_df = pd.concat([ f(name,df2) for name, df2 in grp ],axis=1).T + + def f(name, df2): + return Series( + np.arange(df2.shape[0]), + name=df2.index.values[0]).reindex(f_index) + + # TODO(wesm): unused? + # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T # we are actually operating on a copy here # but in this case, that's ok @@ -2889,49 +3255,49 @@ def f(name,df2): def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df - df = DataFrame({'FC':['a','b','a','b','a','b'], - 'PF':[0,0,0,0,1,1], - 'col1':lrange(6), - 'col2':lrange(6,12)}) - df.ix[1,0]=np.nan + df = DataFrame({'FC': ['a', 'b', 'a', 'b', 'a', 'b'], + 'PF': [0, 0, 0, 0, 1, 1], + 'col1': lrange(6), + 'col2': lrange(6, 12)}) + df.ix[1, 0] = np.nan df2 = df.copy() - mask=~df2.FC.isnull() - cols=['col1', 'col2'] + mask = ~df2.FC.isnull() + cols = ['col1', 'col2'] dft = df2 * 2 - dft.ix[3,3] = np.nan - - expected = DataFrame({'FC':['a',np.nan,'a','b','a','b'], - 'PF':[0,0,0,0,1,1], - 'col1':Series([0,1,4,6,8,10]), - 'col2':[12,7,16,np.nan,20,22]}) + dft.ix[3, 3] = np.nan + expected = DataFrame({'FC': ['a', np.nan, 'a', 'b', 'a', 'b'], + 'PF': [0, 0, 0, 0, 1, 1], + 'col1': Series([0, 1, 4, 6, 8, 10]), + 'col2': [12, 7, 16, np.nan, 20, 22]}) # frame on rhs - df2.ix[mask, cols]= dft.ix[mask, cols] - assert_frame_equal(df2,expected) + df2.ix[mask, cols] = dft.ix[mask, cols] + assert_frame_equal(df2, expected) - df2.ix[mask, cols]= dft.ix[mask, cols] - assert_frame_equal(df2,expected) + df2.ix[mask, cols] = dft.ix[mask, cols] + assert_frame_equal(df2, expected) # with an ndarray on rhs df2 = df.copy() - df2.ix[mask, cols]= dft.ix[mask, cols].values - assert_frame_equal(df2,expected) - df2.ix[mask, cols]= dft.ix[mask, cols].values - assert_frame_equal(df2,expected) + df2.ix[mask, cols] = dft.ix[mask, cols].values + assert_frame_equal(df2, expected) + df2.ix[mask, cols] = dft.ix[mask, cols].values + assert_frame_equal(df2, expected) # broadcasting on the rhs is required - df = DataFrame(dict(A = [1,2,0,0,0],B=[0,0,0,10,11],C=[0,0,0,10,11],D=[3,4,5,6,7])) + df = DataFrame(dict(A=[1, 2, 0, 0, 0], B=[0, 0, 0, 10, 11], C=[ + 0, 0, 0, 10, 11], D=[3, 4, 5, 6, 7])) expected = df.copy() mask = expected['A'] == 0 - for col in ['A','B']: - expected.loc[mask,col] = df['D'] + for col in ['A', 'B']: + expected.loc[mask, col] = df['D'] - df.loc[df['A']==0,['A','B']] = df['D'] - assert_frame_equal(df,expected) + df.loc[df['A'] == 0, ['A', 'B']] = df['D'] + assert_frame_equal(df, expected) def test_ix_assign_column_mixed(self): # GH #1142 @@ -2943,37 +3309,38 @@ def test_ix_assign_column_mixed(self): assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value - df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'}) + df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'}) expected = df.copy() for i in range(5): - indexer = i*2 - v = 1000 + i*200 + indexer = i * 2 + v = 1000 + i * 200 expected.ix[indexer, 'y'] = v self.assertEqual(expected.ix[indexer, 'y'], v) df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) # GH 4508, making sure consistency of assignments - df = DataFrame({'a':[1,2,3],'b':[0,1,2]}) - df.ix[[0,2,],'b'] = [100,-100] - expected = DataFrame({'a' : [1,2,3], 'b' : [100,1,-100] }) - assert_frame_equal(df,expected) + df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) + df.ix[[0, 2, ], 'b'] = [100, -100] + expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) + assert_frame_equal(df, expected) - df = pd.DataFrame({'a': lrange(4) }) + df = pd.DataFrame({'a': lrange(4)}) df['b'] = np.nan - df.ix[[1,3],'b'] = [100,-100] - expected = DataFrame({'a' : [0,1,2,3], 'b' : [np.nan,100,np.nan,-100] }) - assert_frame_equal(df,expected) + df.ix[[1, 3], 'b'] = [100, -100] + expected = DataFrame({'a': [0, 1, 2, 3], + 'b': [np.nan, 100, np.nan, -100]}) + assert_frame_equal(df, expected) # ok, but chained assignments are dangerous # if we turn off chained assignement it will work - with option_context('chained_assignment',None): - df = pd.DataFrame({'a': lrange(4) }) + with option_context('chained_assignment', None): + df = pd.DataFrame({'a': lrange(4)}) df['b'] = np.nan - df['b'].ix[[1,3]] = [100,-100] - assert_frame_equal(df,expected) + df['b'].ix[[1, 3]] = [100, -100] + assert_frame_equal(df, expected) def test_ix_get_set_consistency(self): @@ -2999,41 +3366,46 @@ def test_setitem_list(self): # GH 6043 # ix with a list - df = DataFrame(index=[0,1], columns=[0]) - df.ix[1,0] = [1,2,3] - df.ix[1,0] = [1,2] + df = DataFrame(index=[0, 1], columns=[0]) + df.ix[1, 0] = [1, 2, 3] + df.ix[1, 0] = [1, 2] - result = DataFrame(index=[0,1], columns=[0]) - result.ix[1,0] = [1,2] + result = DataFrame(index=[0, 1], columns=[0]) + result.ix[1, 0] = [1, 2] - assert_frame_equal(result,df) + assert_frame_equal(result, df) # ix with an object class TO(object): + def __init__(self, value): self.value = value + def __str__(self): return "[{0}]".format(self.value) + __repr__ = __str__ + def __eq__(self, other): return self.value == other.value + def view(self): return self - df = DataFrame(index=[0,1], columns=[0]) - df.ix[1,0] = TO(1) - df.ix[1,0] = TO(2) + df = DataFrame(index=[0, 1], columns=[0]) + df.ix[1, 0] = TO(1) + df.ix[1, 0] = TO(2) - result = DataFrame(index=[0,1], columns=[0]) - result.ix[1,0] = TO(2) + result = DataFrame(index=[0, 1], columns=[0]) + result.ix[1, 0] = TO(2) - assert_frame_equal(result,df) + assert_frame_equal(result, df) # remains object dtype even after setting it back - df = DataFrame(index=[0,1], columns=[0]) - df.ix[1,0] = TO(1) - df.ix[1,0] = np.nan - result = DataFrame(index=[0,1], columns=[0]) + df = DataFrame(index=[0, 1], columns=[0]) + df.ix[1, 0] = TO(1) + df.ix[1, 0] = np.nan + result = DataFrame(index=[0, 1], columns=[0]) assert_frame_equal(result, df) @@ -3041,37 +3413,40 @@ def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) - mask = (df.a%2 == 0) + mask = (df.a % 2 == 0) self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) mask.index = lrange(len(mask)) - self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask])) + self.assertRaises(NotImplementedError, df.iloc.__getitem__, + tuple([mask])) # ndarray ok - result = df.iloc[np.array([True] * len(mask),dtype=bool)] - assert_frame_equal(result,df) + result = df.iloc[np.array([True] * len(mask), dtype=bool)] + assert_frame_equal(result, df) # the possibilities locs = np.arange(4) - nums = 2**locs + nums = 2 ** locs reps = lmap(bin, nums) - df = DataFrame({'locs':locs, 'nums':nums}, reps) + df = DataFrame({'locs': locs, 'nums': nums}, reps) expected = { - (None,'') : '0b1100', - (None,'.loc') : '0b1100', - (None,'.iloc') : '0b1100', - ('index','') : '0b11', - ('index','.loc') : '0b11', - ('index','.iloc') : 'iLocation based boolean indexing cannot use an indexable as a mask', - ('locs','') : 'Unalignable boolean Series key provided', - ('locs','.loc') : 'Unalignable boolean Series key provided', - ('locs','.iloc') : 'iLocation based boolean indexing on an integer type is not available', - } + (None, ''): '0b1100', + (None, '.loc'): '0b1100', + (None, '.iloc'): '0b1100', + ('index', ''): '0b11', + ('index', '.loc'): '0b11', + ('index', '.iloc'): ('iLocation based boolean indexing ' + 'cannot use an indexable as a mask'), + ('locs', ''): 'Unalignable boolean Series key provided', + ('locs', '.loc'): 'Unalignable boolean Series key provided', + ('locs', '.iloc'): ('iLocation based boolean indexing on an ' + 'integer type is not available'), + } warnings.filterwarnings(action='ignore', category=UserWarning) result = dict() for idx in [None, 'index', 'locs']: - mask = (df.nums>2).values + mask = (df.nums > 2).values if idx: mask = Series(mask, list(reversed(getattr(df, idx)))) for method in ['', '.loc', '.iloc']: @@ -3084,53 +3459,75 @@ def test_iloc_mask(self): except Exception as e: ans = str(e) - key = tuple([idx,method]) + key = tuple([idx, method]) r = expected.get(key) if r != ans: - raise AssertionError("[%s] does not match [%s], received [%s]" % - (key,ans,r)) + raise AssertionError( + "[%s] does not match [%s], received [%s]" + % (key, ans, r)) warnings.filterwarnings(action='always', category=UserWarning) def test_ix_slicing_strings(self): - ##GH3836 - data = {'Classification': ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], - 'Random': [1,2,3,4,5], - 'X': ['correct', 'wrong','correct', 'correct','wrong']} + # GH3836 + data = {'Classification': + ['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'], + 'Random': [1, 2, 3, 4, 5], + 'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']} df = DataFrame(data) - x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'])] - df.ix[x.index,'X'] = df['Classification'] - - expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', 1: 'bbb', - 2: 'SA EQUITY', 3: 'SA SSF', 4: 'aaa'}, - 'Random': {0: 1, 1: 2, 2: 3, 3: 4, 4: 5}, - 'X': {0: 'correct', 1: 'bbb', 2: 'correct', - 3: 'correct', 4: 'aaa'}}) # bug was 4: 'bbb' + x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF' + ])] + df.ix[x.index, 'X'] = df['Classification'] + + expected = DataFrame({'Classification': {0: 'SA EQUITY CFD', + 1: 'bbb', + 2: 'SA EQUITY', + 3: 'SA SSF', + 4: 'aaa'}, + 'Random': {0: 1, + 1: 2, + 2: 3, + 3: 4, + 4: 5}, + 'X': {0: 'correct', + 1: 'bbb', + 2: 'correct', + 3: 'correct', + 4: 'aaa'}}) # bug was 4: 'bbb' assert_frame_equal(df, expected) def test_non_unique_loc(self): - ## GH3659 - ## non-unique indexer with loc slice - ## https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs + # GH3659 + # non-unique indexer with loc slice + # https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs # these are going to raise becuase the we are non monotonic - df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,None)])) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(0,None)])) - self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1,2)])) + df = DataFrame( + {'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([slice(1, None)])) + self.assertRaises(KeyError, df.loc.__getitem__, + tuple([slice(0, None)])) + self.assertRaises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)])) # monotonic are ok - df = DataFrame({'A' : [1,2,3,4,5,6], 'B' : [3,4,5,6,7,8]}, index = [0,1,0,1,2,3]).sort_index(axis=0) + df = DataFrame( + {'A': [1, 2, 3, 4, 5, 6], + 'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3]).sort_index( + axis=0) result = df.loc[1:] - expected = DataFrame({'A' : [2,4,5,6], 'B' : [4, 6,7,8]}, index = [1,1,2,3]) - assert_frame_equal(result,expected) + expected = DataFrame( + {'A': [2, 4, 5, 6], + 'B': [4, 6, 7, 8]}, index=[1, 1, 2, 3]) + assert_frame_equal(result, expected) result = df.loc[0:] - assert_frame_equal(result,df) + assert_frame_equal(result, df) result = df.loc[1:2] - expected = DataFrame({'A' : [2,4,5], 'B' : [4,6,7]}, index = [1,1,2]) - assert_frame_equal(result,expected) + expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]}, index=[1, 1, 2]) + assert_frame_equal(result, expected) def test_loc_name(self): # GH 3880 @@ -3147,30 +3544,31 @@ def test_loc_name(self): def test_iloc_non_unique_indexing(self): - #GH 4017, non-unique indexing (on the axis) - df = DataFrame({'A' : [0.1] * 3000, 'B' : [1] * 3000}) + # GH 4017, non-unique indexing (on the axis) + df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000}) idx = np.array(lrange(30)) * 99 expected = df.iloc[idx] - df3 = pd.concat([df, 2*df, 3*df]) + df3 = pd.concat([df, 2 * df, 3 * df]) result = df3.iloc[idx] assert_frame_equal(result, expected) - df2 = DataFrame({'A' : [0.1] * 1000, 'B' : [1] * 1000}) - df2 = pd.concat([df2, 2*df2, 3*df2]) + df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000}) + df2 = pd.concat([df2, 2 * df2, 3 * df2]) sidx = df2.index.to_series() - expected = df2.iloc[idx[idx<=sidx.max()]] + expected = df2.iloc[idx[idx <= sidx.max()]] new_list = [] for r, s in expected.iterrows(): new_list.append(s) - new_list.append(s*2) - new_list.append(s*3) + new_list.append(s * 2) + new_list.append(s * 3) expected = DataFrame(new_list) - expected = pd.concat([ expected, DataFrame(index=idx[idx>sidx.max()]) ]) + expected = pd.concat([expected, DataFrame(index=idx[idx > sidx.max()]) + ]) result = df2.loc[idx] assert_frame_equal(result, expected, check_index_type=False) @@ -3186,27 +3584,31 @@ def test_mi_access(self): 5 f B 6 A2 6 """ - df = pd.read_csv(StringIO(data),sep='\s+',index_col=0) + df = pd.read_csv(StringIO(data), sep='\s+', index_col=0) df2 = df.set_index(['main', 'sub']).T.sort_index(1) - index = Index(['h1','h3','h5']) - columns = MultiIndex.from_tuples([('A','A1')],names=['main','sub']) - expected = DataFrame([['a',1,1]],index=columns,columns=index).T + index = Index(['h1', 'h3', 'h5']) + columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) + expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T - result = df2.loc[:,('A','A1')] - assert_frame_equal(result,expected) + result = df2.loc[:, ('A', 'A1')] + assert_frame_equal(result, expected) - result = df2[('A','A1')] - assert_frame_equal(result,expected) + result = df2[('A', 'A1')] + assert_frame_equal(result, expected) # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index - # as of 4879, this returns a Series (which is similar to what happens with a non-unique) - expected = Series(['a',1,1], index=['h1','h3','h5'], name='A1') + # as of 4879, this returns a Series (which is similar to what happens + # with a non-unique) + expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') result = df2['A']['A1'] assert_series_equal(result, expected) # selecting a non_unique from the 2nd level - expected = DataFrame([['d',4,4],['e',5,5]],index=Index(['B2','B2'],name='sub'),columns=['h1','h3','h5'],).T + expected = DataFrame([['d', 4, 4], ['e', 5, 5]], + index=Index( + ['B2', 'B2'], name='sub'), + columns=['h1', 'h3', 'h5'], ).T result = df2['A']['B2'] assert_frame_equal(result, expected) @@ -3216,97 +3618,108 @@ def test_non_unique_loc_memory_error(self): # non_unique index with a large selection triggers a memory error columns = list('ABCDEFG') - def gen_test(l,l2): - return pd.concat([ DataFrame(randn(l,len(columns)),index=lrange(l),columns=columns), - DataFrame(np.ones((l2,len(columns))),index=[0]*l2,columns=columns) ]) + def gen_test(l, l2): + return pd.concat([DataFrame( + randn(l, len(columns)), index=lrange( + l), columns=columns), DataFrame( + np.ones((l2, len(columns) + )), index=[0] * l2, columns=columns)]) - def gen_expected(df,mask): + def gen_expected(df, mask): l = len(mask) - return pd.concat([ - df.take([0],convert=False), - DataFrame(np.ones((l,len(columns))),index=[0]*l,columns=columns), - df.take(mask[1:],convert=False) ]) + return pd.concat([df.take([0], convert=False), + DataFrame(np.ones((l, len(columns))), + index=[0] * l, + columns=columns), + df.take(mask[1:], convert=False)]) - df = gen_test(900,100) + df = gen_test(900, 100) self.assertFalse(df.index.is_unique) mask = np.arange(100) result = df.loc[mask] - expected = gen_expected(df,mask) - assert_frame_equal(result,expected) + expected = gen_expected(df, mask) + assert_frame_equal(result, expected) - df = gen_test(900000,100000) + df = gen_test(900000, 100000) self.assertFalse(df.index.is_unique) mask = np.arange(100000) result = df.loc[mask] - expected = gen_expected(df,mask) - assert_frame_equal(result,expected) + expected = gen_expected(df, mask) + assert_frame_equal(result, expected) def test_astype_assignment(self): # GH4312 (iloc) - df_orig = DataFrame([['1','2','3','.4',5,6.,'foo']],columns=list('ABCDEFG')) + df_orig = DataFrame( + [['1', '2', '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) df = df_orig.copy() - df.iloc[:,0:2] = df.iloc[:,0:2].astype(np.int64) - expected = DataFrame([[1,2,'3','.4',5,6.,'foo']],columns=list('ABCDEFG')) - assert_frame_equal(df,expected) + df.iloc[:, 0:2] = df.iloc[:, 0:2].astype(np.int64) + expected = DataFrame( + [[1, 2, '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) + assert_frame_equal(df, expected) df = df_orig.copy() - df.iloc[:,0:2] = df.iloc[:,0:2]._convert(datetime=True, numeric=True) - expected = DataFrame([[1,2,'3','.4',5,6.,'foo']],columns=list('ABCDEFG')) - assert_frame_equal(df,expected) + df.iloc[:, 0:2] = df.iloc[:, 0:2]._convert(datetime=True, numeric=True) + expected = DataFrame( + [[1, 2, '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) + assert_frame_equal(df, expected) # GH5702 (loc) df = df_orig.copy() - df.loc[:,'A'] = df.loc[:,'A'].astype(np.int64) - expected = DataFrame([[1,'2','3','.4',5,6.,'foo']],columns=list('ABCDEFG')) - assert_frame_equal(df,expected) + df.loc[:, 'A'] = df.loc[:, 'A'].astype(np.int64) + expected = DataFrame( + [[1, '2', '3', '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) + assert_frame_equal(df, expected) df = df_orig.copy() - df.loc[:,['B','C']] = df.loc[:,['B','C']].astype(np.int64) - expected = DataFrame([['1',2,3,'.4',5,6.,'foo']],columns=list('ABCDEFG')) - assert_frame_equal(df,expected) + df.loc[:, ['B', 'C']] = df.loc[:, ['B', 'C']].astype(np.int64) + expected = DataFrame( + [['1', 2, 3, '.4', 5, 6., 'foo']], columns=list('ABCDEFG')) + assert_frame_equal(df, expected) # full replacements / no nans df = DataFrame({'A': [1., 2., 3., 4.]}) df.iloc[:, 0] = df['A'].astype(np.int64) expected = DataFrame({'A': [1, 2, 3, 4]}) - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) df = DataFrame({'A': [1., 2., 3., 4.]}) df.loc[:, 'A'] = df['A'].astype(np.int64) expected = DataFrame({'A': [1, 2, 3, 4]}) - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) def test_astype_assignment_with_dups(self): # GH 4686 # assignment with dups that has a dtype change df = DataFrame( - np.arange(3).reshape((1,3)), + np.arange(3).reshape((1, 3)), columns=pd.MultiIndex.from_tuples( [('A', '1'), ('B', '1'), ('A', '2')] - ), + ), dtype=object - ) + ) index = df.index.copy() df['A'] = df['A'].astype(np.float64) - result = df.get_dtype_counts().sort_index() - expected = Series({ 'float64' : 2, 'object' : 1 }).sort_index() self.assertTrue(df.index.equals(index)) + # TODO(wesm): unused variables + # result = df.get_dtype_counts().sort_index() + # expected = Series({'float64': 2, 'object': 1}).sort_index() + def test_dups_loc(self): # GH4726 # dup indexing with iloc/loc df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]], - columns=['a','a','a','a','a'], index=[1]) + columns=['a', 'a', 'a', 'a', 'a'], index=[1]) expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')], - index=['a','a','a','a','a'], name=1) + index=['a', 'a', 'a', 'a', 'a'], name=1) result = df.iloc[0] assert_series_equal(result, expected) @@ -3318,141 +3731,160 @@ def test_partial_setting(self): # GH2578, allow ix and friends to partially set - ### series ### - s_orig = Series([1,2,3]) + # series + s_orig = Series([1, 2, 3]) s = s_orig.copy() s[5] = 5 - expected = Series([1,2,3,5],index=[0,1,2,5]) - assert_series_equal(s,expected) + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + assert_series_equal(s, expected) s = s_orig.copy() s.loc[5] = 5 - expected = Series([1,2,3,5],index=[0,1,2,5]) - assert_series_equal(s,expected) + expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) + assert_series_equal(s, expected) s = s_orig.copy() s[5] = 5. - expected = Series([1,2,3,5.],index=[0,1,2,5]) - assert_series_equal(s,expected) + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + assert_series_equal(s, expected) s = s_orig.copy() s.loc[5] = 5. - expected = Series([1,2,3,5.],index=[0,1,2,5]) - assert_series_equal(s,expected) + expected = Series([1, 2, 3, 5.], index=[0, 1, 2, 5]) + assert_series_equal(s, expected) # iloc/iat raise s = s_orig.copy() + def f(): s.iloc[3] = 5. + self.assertRaises(IndexError, f) + def f(): s.iat[3] = 5. + self.assertRaises(IndexError, f) - ### frame ### + # ## frame ## - df_orig = DataFrame(np.arange(6).reshape(3,2),columns=['A','B'],dtype='int64') + df_orig = DataFrame( + np.arange(6).reshape(3, 2), columns=['A', 'B'], dtype='int64') # iloc/iat raise df = df_orig.copy() + def f(): - df.iloc[4,2] = 5. + df.iloc[4, 2] = 5. + self.assertRaises(IndexError, f) + def f(): - df.iat[4,2] = 5. + df.iat[4, 2] = 5. + self.assertRaises(IndexError, f) # row setting where it exists - expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] })) + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) df = df_orig.copy() df.iloc[1] = df.iloc[2] - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) - expected = DataFrame(dict({ 'A' : [0,4,4], 'B' : [1,5,5] })) + expected = DataFrame(dict({'A': [0, 4, 4], 'B': [1, 5, 5]})) df = df_orig.copy() df.loc[1] = df.loc[2] - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) # like 2578, partial setting with dtype preservation - expected = DataFrame(dict({ 'A' : [0,2,4,4], 'B' : [1,3,5,5] })) + expected = DataFrame(dict({'A': [0, 2, 4, 4], 'B': [1, 3, 5, 5]})) df = df_orig.copy() df.loc[3] = df.loc[2] - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) # single dtype frame, overwrite - expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : [0,2,4] })) + expected = DataFrame(dict({'A': [0, 2, 4], 'B': [0, 2, 4]})) df = df_orig.copy() - df.ix[:,'B'] = df.ix[:,'A'] - assert_frame_equal(df,expected) + df.ix[:, 'B'] = df.ix[:, 'A'] + assert_frame_equal(df, expected) # mixed dtype frame, overwrite - expected = DataFrame(dict({ 'A' : [0,2,4], 'B' : Series([0,2,4]) })) + expected = DataFrame(dict({'A': [0, 2, 4], 'B': Series([0, 2, 4])})) df = df_orig.copy() df['B'] = df['B'].astype(np.float64) - df.ix[:,'B'] = df.ix[:,'A'] - assert_frame_equal(df,expected) + df.ix[:, 'B'] = df.ix[:, 'A'] + assert_frame_equal(df, expected) # single dtype frame, partial setting expected = df_orig.copy() expected['C'] = df['A'] df = df_orig.copy() - df.ix[:,'C'] = df.ix[:,'A'] - assert_frame_equal(df,expected) + df.ix[:, 'C'] = df.ix[:, 'A'] + assert_frame_equal(df, expected) # mixed frame, partial setting expected = df_orig.copy() expected['C'] = df['A'] df = df_orig.copy() - df.ix[:,'C'] = df.ix[:,'A'] - assert_frame_equal(df,expected) + df.ix[:, 'C'] = df.ix[:, 'A'] + assert_frame_equal(df, expected) - ### panel ### - p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64') + # ## panel ## + p_orig = Panel( + np.arange(16).reshape(2, 4, 2), items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') # panel setting via item - p_orig = Panel(np.arange(16).reshape(2,4,2),items=['Item1','Item2'],major_axis=pd.date_range('2001/1/12',periods=4),minor_axis=['A','B'],dtype='float64') + p_orig = Panel( + np.arange(16).reshape(2, 4, 2), items=['Item1', 'Item2'], + major_axis=pd.date_range('2001/1/12', periods=4), + minor_axis=['A', 'B'], dtype='float64') expected = p_orig.copy() expected['Item3'] = expected['Item1'] p = p_orig.copy() p.loc['Item3'] = p['Item1'] - assert_panel_equal(p,expected) + assert_panel_equal(p, expected) # panel with aligned series expected = p_orig.copy() - expected = expected.transpose(2,1,0) - expected['C'] = DataFrame({ 'Item1' : [30,30,30,30], 'Item2' : [32,32,32,32] },index=p_orig.major_axis) - expected = expected.transpose(2,1,0) + expected = expected.transpose(2, 1, 0) + expected['C'] = DataFrame( + {'Item1': [30, 30, 30, 30], + 'Item2': [32, 32, 32, 32]}, index=p_orig.major_axis) + expected = expected.transpose(2, 1, 0) p = p_orig.copy() - p.loc[:,:,'C'] = Series([30,32],index=p_orig.items) - assert_panel_equal(p,expected) + p.loc[:, :, 'C'] = Series([30, 32], index=p_orig.items) + assert_panel_equal(p, expected) # GH 8473 dates = date_range('1/1/2000', periods=8) - df_orig = DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) + df_orig = DataFrame( + np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) - expected = pd.concat([df_orig,DataFrame({'A' : 7},index=[dates[-1]+1])]) + expected = pd.concat([df_orig, DataFrame( + {'A': 7}, index=[dates[-1] + 1])]) df = df_orig.copy() - df.loc[dates[-1]+1, 'A'] = 7 - assert_frame_equal(df,expected) + df.loc[dates[-1] + 1, 'A'] = 7 + assert_frame_equal(df, expected) df = df_orig.copy() - df.at[dates[-1]+1, 'A'] = 7 - assert_frame_equal(df,expected) + df.at[dates[-1] + 1, 'A'] = 7 + assert_frame_equal(df, expected) - expected = pd.concat([df_orig,DataFrame({0 : 7},index=[dates[-1]+1])],axis=1) + expected = pd.concat( + [df_orig, DataFrame({0: 7}, index=[dates[-1] + 1])], axis=1) df = df_orig.copy() - df.loc[dates[-1]+1, 0] = 7 - assert_frame_equal(df,expected) + df.loc[dates[-1] + 1, 0] = 7 + assert_frame_equal(df, expected) df = df_orig.copy() - df.at[dates[-1]+1, 0] = 7 - assert_frame_equal(df,expected) + df.at[dates[-1] + 1, 0] = 7 + assert_frame_equal(df, expected) def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes # by appending - df = DataFrame([[True, 1],[False, 2]], - columns = ["female","fitness"]) + df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) s = df.loc[1].copy() s.name = 2 @@ -3462,34 +3894,39 @@ def test_partial_setting_mixed_dtype(self): assert_frame_equal(df, expected) # columns will align - df = DataFrame(columns=['A','B']) - df.loc[0] = Series(1,index=range(4)) - assert_frame_equal(df,DataFrame(columns=['A','B'],index=[0])) + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=range(4)) + assert_frame_equal(df, DataFrame(columns=['A', 'B'], index=[0])) # columns will align - df = DataFrame(columns=['A','B']) - df.loc[0] = Series(1,index=['B']) - assert_frame_equal(df,DataFrame([[np.nan, 1]], columns=['A','B'],index=[0],dtype='float64')) + df = DataFrame(columns=['A', 'B']) + df.loc[0] = Series(1, index=['B']) + assert_frame_equal(df, DataFrame( + [[np.nan, 1]], columns=['A', 'B'], index=[0], dtype='float64')) # list-like must conform - df = DataFrame(columns=['A','B']) + df = DataFrame(columns=['A', 'B']) + def f(): - df.loc[0] = [1,2,3] + df.loc[0] = [1, 2, 3] + self.assertRaises(ValueError, f) # these are coerced to float unavoidably (as its a list-like to begin) - df = DataFrame(columns=['A','B']) - df.loc[3] = [6,7] - assert_frame_equal(df,DataFrame([[6,7]],index=[3],columns=['A','B'],dtype='float64')) + df = DataFrame(columns=['A', 'B']) + df.loc[3] = [6, 7] + assert_frame_equal(df, DataFrame( + [[6, 7]], index=[3], columns=['A', 'B'], dtype='float64')) def test_partial_setting_with_datetimelike_dtype(self): # GH9478 # a datetimeindex alignment issue with partial setting - df = pd.DataFrame(np.arange(6.).reshape(3,2), columns=list('AB'), - index=pd.date_range('1/1/2000', periods=3, freq='1H')) + df = pd.DataFrame(np.arange(6.).reshape(3, 2), columns=list('AB'), + index=pd.date_range('1/1/2000', periods=3, + freq='1H')) expected = df.copy() - expected['C'] = [expected.index[0]] + [pd.NaT,pd.NaT] + expected['C'] = [expected.index[0]] + [pd.NaT, pd.NaT] mask = df.A < 1 df.loc[mask, 'C'] = df.loc[mask].index @@ -3505,10 +3942,10 @@ def test_loc_setitem_datetime(self): lambda x: x.to_pydatetime(), lambda x: np.datetime64(x)]: df = pd.DataFrame() - df.loc[conv(dt1),'one'] = 100 - df.loc[conv(dt2),'one'] = 200 + df.loc[conv(dt1), 'one'] = 100 + df.loc[conv(dt2), 'one'] = 200 - expected = DataFrame({'one' : [100.0, 200.0]},index=[dt1, dt2]) + expected = DataFrame({'one': [100.0, 200.0]}, index=[dt1, dt2]) assert_frame_equal(df, expected) def test_series_partial_set(self): @@ -3534,7 +3971,7 @@ def test_series_partial_set(self): assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index - self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]]) + self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) result = ser.loc[[2, 2, 3]] @@ -3545,19 +3982,23 @@ def test_series_partial_set(self): assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[5, 3, 3]] + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[5, 3, 3]] assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[5, 4, 4]] + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[5, 4, 4]] assert_series_equal(result, expected, check_index_type=True) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]).loc[[7, 2, 2]] + result = Series([0.1, 0.2, 0.3, 0.4], + index=[4, 5, 6, 7]).loc[[7, 2, 2]] assert_series_equal(result, expected, check_index_type=True) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - result = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]).loc[[4, 5, 5]] + result = Series([0.1, 0.2, 0.3, 0.4], + index=[1, 2, 3, 4]).loc[[4, 5, 5]] assert_series_equal(result, expected, check_index_type=True) # iloc @@ -3578,7 +4019,8 @@ def test_series_partial_set_with_name(self): assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([3, 2, 3, 'x'], dtype='object', name='idx') - expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name='s') + expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, + name='s') result = ser.loc[[3, 2, 3, 'x']] assert_series_equal(result, expected, check_index_type=True) @@ -3593,7 +4035,7 @@ def test_series_partial_set_with_name(self): assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index - self.assertRaises(KeyError, lambda : ser.loc[[3, 3, 3]]) + self.assertRaises(KeyError, lambda: ser.loc[[3, 3, 3]]) exp_idx = Index([2, 2, 3], dtype='int64', name='idx') expected = Series([0.2, 0.2, np.nan], index=exp_idx, name='s') @@ -3609,31 +4051,35 @@ def test_series_partial_set_with_name(self): exp_idx = Index([5, 3, 3], dtype='int64', name='idx') expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[5, 3, 3]] + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 3, 3]] assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([5, 4, 4], dtype='int64', name='idx') expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[5, 4, 4]] + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[5, 4, 4]] assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([7, 2, 2], dtype='int64', name='idx') expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') idx = Index([4, 5, 6, 7], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[7, 2, 2]] + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[7, 2, 2]] assert_series_equal(result, expected, check_index_type=True) exp_idx = Index([4, 5, 5], dtype='int64', name='idx') expected = Series([0.4, np.nan, np.nan], index=exp_idx, name='s') idx = Index([1, 2, 3, 4], dtype='int64', name='idx') - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name='s').loc[[4, 5, 5]] + result = Series([0.1, 0.2, 0.3, 0.4], index=idx, + name='s').loc[[4, 5, 5]] assert_series_equal(result, expected, check_index_type=True) # iloc exp_idx = Index([2, 2, 1, 1], dtype='int64', name='idx') expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name='s') - result = ser.iloc[[1,1,0,0]] + result = ser.iloc[[1, 1, 0, 0]] assert_series_equal(result, expected, check_index_type=True) def test_series_partial_set_datetime(self): @@ -3646,12 +4092,16 @@ def test_series_partial_set_datetime(self): exp = Series([0.1, 0.2], index=idx, name='s') assert_series_equal(result, exp, check_index_type=True) - keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), Timestamp('2011-01-01')] - exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), name='s') + keys = [Timestamp('2011-01-02'), Timestamp('2011-01-02'), + Timestamp('2011-01-01')] + exp = Series([0.2, 0.2, 0.1], index=pd.DatetimeIndex(keys, name='idx'), + name='s') assert_series_equal(ser.loc[keys], exp, check_index_type=True) - keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), Timestamp('2011-01-03')] - exp = Series([np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name='idx'), name='s') + keys = [Timestamp('2011-01-03'), Timestamp('2011-01-02'), + Timestamp('2011-01-03')] + exp = Series([np.nan, 0.2, np.nan], + index=pd.DatetimeIndex(keys, name='idx'), name='s') assert_series_equal(ser.loc[keys], exp, check_index_type=True) def test_series_partial_set_period(self): @@ -3660,18 +4110,23 @@ def test_series_partial_set_period(self): idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx') ser = Series([0.1, 0.2], index=idx, name='s') - result = ser.loc[[pd.Period('2011-01-01', freq='D'), pd.Period('2011-01-02', freq='D')]] + result = ser.loc[[pd.Period('2011-01-01', freq='D'), pd.Period( + '2011-01-02', freq='D')]] exp = Series([0.1, 0.2], index=idx, name='s') assert_series_equal(result, exp, check_index_type=True) - keys = [pd.Period('2011-01-02', freq='D'), pd.Period('2011-01-02', freq='D'), + keys = [pd.Period('2011-01-02', freq='D'), + pd.Period('2011-01-02', freq='D'), pd.Period('2011-01-01', freq='D')] - exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), name='s') + exp = Series([0.2, 0.2, 0.1], index=pd.PeriodIndex(keys, name='idx'), + name='s') assert_series_equal(ser.loc[keys], exp, check_index_type=True) - keys = [pd.Period('2011-01-03', freq='D'), pd.Period('2011-01-02', freq='D'), + keys = [pd.Period('2011-01-03', freq='D'), + pd.Period('2011-01-02', freq='D'), pd.Period('2011-01-03', freq='D')] - exp = Series([np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name='idx'), name='s') + exp = Series([np.nan, 0.2, np.nan], + index=pd.PeriodIndex(keys, name='idx'), name='s') assert_series_equal(ser.loc[keys], exp, check_index_type=True) def test_partial_set_invalid(self): @@ -3684,20 +4139,26 @@ def test_partial_set_invalid(self): # don't allow not string inserts def f(): df.loc[100.0, :] = df.ix[0] + self.assertRaises(TypeError, f) + def f(): - df.loc[100,:] = df.ix[0] + df.loc[100, :] = df.ix[0] + self.assertRaises(TypeError, f) def f(): df.ix[100.0, :] = df.ix[0] + self.assertRaises(ValueError, f) + def f(): - df.ix[100,:] = df.ix[0] + df.ix[100, :] = df.ix[0] + self.assertRaises(ValueError, f) # allow object conversion here - df.loc['a',:] = df.ix[0] + df.loc['a', :] = df.ix[0] def test_partial_set_empty(self): @@ -3707,23 +4168,23 @@ def test_partial_set_empty(self): # series s = Series() s.loc[1] = 1 - assert_series_equal(s,Series([1],index=[1])) + assert_series_equal(s, Series([1], index=[1])) s.loc[3] = 3 - assert_series_equal(s,Series([1,3],index=[1,3])) + assert_series_equal(s, Series([1, 3], index=[1, 3])) s = Series() s.loc[1] = 1. - assert_series_equal(s,Series([1.],index=[1])) + assert_series_equal(s, Series([1.], index=[1])) s.loc[3] = 3. - assert_series_equal(s,Series([1.,3.],index=[1,3])) + assert_series_equal(s, Series([1., 3.], index=[1, 3])) s = Series() s.loc['foo'] = 1 - assert_series_equal(s,Series([1],index=['foo'])) + assert_series_equal(s, Series([1], index=['foo'])) s.loc['bar'] = 3 - assert_series_equal(s,Series([1,3],index=['foo','bar'])) + assert_series_equal(s, Series([1, 3], index=['foo', 'bar'])) s.loc[3] = 4 - assert_series_equal(s,Series([1,3,4],index=['foo','bar',3])) + assert_series_equal(s, Series([1, 3, 4], index=['foo', 'bar', 3])) # partially set with an empty object # frame @@ -3731,77 +4192,98 @@ def test_partial_set_empty(self): def f(): df.loc[1] = 1 + self.assertRaises(ValueError, f) + def f(): - df.loc[1] = Series([1],index=['foo']) + df.loc[1] = Series([1], index=['foo']) + self.assertRaises(ValueError, f) + def f(): - df.loc[:,1] = 1 + df.loc[:, 1] = 1 + self.assertRaises(ValueError, f) # these work as they don't really change # anything but the index # GH5632 - expected = DataFrame(columns=['foo'], index=pd.Index([], dtype='int64')) + expected = DataFrame(columns=['foo'], index=pd.Index( + [], dtype='int64')) + def f(): df = DataFrame() df['foo'] = Series([], dtype='object') return df + assert_frame_equal(f(), expected) + def f(): df = DataFrame() df['foo'] = Series(df.index) return df + assert_frame_equal(f(), expected) + def f(): df = DataFrame() df['foo'] = df.index return df + assert_frame_equal(f(), expected) - expected = DataFrame(columns=['foo'], index=pd.Index([], dtype='int64')) + expected = DataFrame(columns=['foo'], index=pd.Index( + [], dtype='int64')) expected['foo'] = expected['foo'].astype('float64') + def f(): df = DataFrame() df['foo'] = [] return df + assert_frame_equal(f(), expected) + def f(): df = DataFrame() df['foo'] = Series(range(len(df))) return df + assert_frame_equal(f(), expected) + def f(): df = DataFrame() df['foo'] = range(len(df)) return df + assert_frame_equal(f(), expected) df = DataFrame() df2 = DataFrame() df2[1] = Series([1], index=['foo']) - df.loc[:,1] = Series([1], index=['foo']) - assert_frame_equal(df,DataFrame([[1]], index=['foo'], columns=[1])) - assert_frame_equal(df,df2) + df.loc[:, 1] = Series([1], index=['foo']) + assert_frame_equal(df, DataFrame([[1]], index=['foo'], columns=[1])) + assert_frame_equal(df, df2) # no index to start - expected = DataFrame({ 0 : Series(1,index=range(4)) }, columns=['A','B',0]) + expected = DataFrame( + {0: Series(1, index=range(4))}, columns=['A', 'B', 0]) - df = DataFrame(columns=['A','B']) + df = DataFrame(columns=['A', 'B']) df[0] = Series(1, index=range(4)) df.dtypes str(df) - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) - df = DataFrame(columns=['A','B']) - df.loc[:,0] = Series(1,index=range(4)) + df = DataFrame(columns=['A', 'B']) + df.loc[:, 0] = Series(1, index=range(4)) df.dtypes str(df) - assert_frame_equal(df,expected) + assert_frame_equal(df, expected) # GH5720, GH5744 # don't create rows when empty - expected = DataFrame(columns=['A', 'B', 'New'], index=pd.Index([], dtype='int64')) + expected = DataFrame(columns=['A', 'B', 'New'], index=pd.Index( + [], dtype='int64')) expected['A'] = expected['A'].astype('int64') expected['B'] = expected['B'].astype('float64') expected['New'] = expected['New'].astype('float64') @@ -3809,69 +4291,71 @@ def f(): y = df[df.A > 5] y['New'] = np.nan assert_frame_equal(y, expected) - #assert_frame_equal(y,expected) + # assert_frame_equal(y,expected) expected = DataFrame(columns=['a', 'b', 'c c', 'd']) expected['d'] = expected['d'].astype('int64') df = DataFrame(columns=['a', 'b', 'c c']) df['d'] = 3 assert_frame_equal(df, expected) - assert_series_equal(df['c c'],Series(name='c c',dtype=object)) + assert_series_equal(df['c c'], Series(name='c c', dtype=object)) # reindex columns is ok df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] - result = y.reindex(columns=['A','B','C']) - expected = DataFrame(columns=['A','B','C'], index=pd.Index([], dtype='int64')) + result = y.reindex(columns=['A', 'B', 'C']) + expected = DataFrame(columns=['A', 'B', 'C'], index=pd.Index( + [], dtype='int64')) expected['A'] = expected['A'].astype('int64') expected['B'] = expected['B'].astype('float64') expected['C'] = expected['C'].astype('float64') - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # GH 5756 # setting with empty Series df = DataFrame(Series()) - assert_frame_equal(df, DataFrame({ 0 : Series() })) + assert_frame_equal(df, DataFrame({0: Series()})) df = DataFrame(Series(name='foo')) - assert_frame_equal(df, DataFrame({ 'foo' : Series() })) + assert_frame_equal(df, DataFrame({'foo': Series()})) # GH 5932 # copy on empty with assignment fails df = DataFrame(index=[0]) df = df.copy() df['a'] = 0 - expected = DataFrame(0,index=[0],columns=['a']) + expected = DataFrame(0, index=[0], columns=['a']) assert_frame_equal(df, expected) # GH 6171 # consistency on empty frames df = DataFrame(columns=['x', 'y']) df['x'] = [1, 2] - expected = DataFrame(dict(x = [1,2], y = [np.nan,np.nan])) + expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) assert_frame_equal(df, expected, check_dtype=False) df = DataFrame(columns=['x', 'y']) df['x'] = ['1', '2'] - expected = DataFrame(dict(x = ['1','2'], y = [np.nan,np.nan]),dtype=object) + expected = DataFrame( + dict(x=['1', '2'], y=[np.nan, np.nan]), dtype=object) assert_frame_equal(df, expected) df = DataFrame(columns=['x', 'y']) df.loc[0, 'x'] = 1 - expected = DataFrame(dict(x = [1], y = [np.nan])) + expected = DataFrame(dict(x=[1], y=[np.nan])) assert_frame_equal(df, expected, check_dtype=False) def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem df = tm.makeDataFrame() - df['A'] # cache series + df['A'] # cache series df.ix["Hello Friend"] = df.ix[0] self.assertIn("Hello Friend", df['A'].index) self.assertIn("Hello Friend", df['B'].index) panel = tm.makePanel() - panel.ix[0] # get first item into cache + panel.ix[0] # get first item into cache panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 self.assertIn("A+1", panel.ix[0].columns) self.assertIn("A+1", panel.ix[1].columns) @@ -3887,33 +4371,38 @@ def test_cache_updating(self): # setting via chained assignment # but actually works, since everything is a view df.loc[0]['z'].iloc[0] = 1. - result = df.loc[(0,0),'z'] + result = df.loc[(0, 0), 'z'] self.assertEqual(result, 1) # correct setting - df.loc[(0,0),'z'] = 2 - result = df.loc[(0,0),'z'] + df.loc[(0, 0), 'z'] = 2 + result = df.loc[(0, 0), 'z'] self.assertEqual(result, 2) # 10264 - df = DataFrame(np.zeros((5,5),dtype='int64'),columns=['a','b','c','d','e'],index=range(5)) + df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e'], index=range(5)) df['f'] = 0 df.f.values[3] = 1 - y = df.iloc[np.arange(2,len(df))] + + # TODO(wesm): unused? + # y = df.iloc[np.arange(2, len(df))] + df.f.values[3] = 2 - expected = DataFrame(np.zeros((5,6),dtype='int64'),columns=['a','b','c','d','e','f'],index=range(5)) - expected.at[3,'f'] = 2 + expected = DataFrame(np.zeros((5, 6), dtype='int64'), columns=[ + 'a', 'b', 'c', 'd', 'e', 'f'], index=range(5)) + expected.at[3, 'f'] = 2 assert_frame_equal(df, expected) - expected = Series([0,0,0,2,0],name='f') + expected = Series([0, 0, 0, 2, 0], name='f') assert_series_equal(df.f, expected) def test_slice_consolidate_invalidate_item_cache(self): # this is chained assignment, but will 'work' - with option_context('chained_assignment',None): + with option_context('chained_assignment', None): # #3970 - df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) + df = DataFrame({"aa": lrange(5), "bb": [2.2] * 5}) # Creates a second float block df["cc"] = 0.0 @@ -3931,39 +4420,44 @@ def test_slice_consolidate_invalidate_item_cache(self): def test_setitem_cache_updating(self): # GH 5424 - cont = ['one', 'two','three', 'four', 'five', 'six', 'seven'] + cont = ['one', 'two', 'three', 'four', 'five', 'six', 'seven'] - for do_ref in [False,False]: - df = DataFrame({'a' : cont, "b":cont[3:]+cont[:3] ,'c' : np.arange(7)}) + for do_ref in [False, False]: + df = DataFrame({'a': cont, + "b": cont[3:] + cont[:3], + 'c': np.arange(7)}) # ref the cache if do_ref: - df.ix[0,"c"] + df.ix[0, "c"] # set it - df.ix[7,'c'] = 1 + df.ix[7, 'c'] = 1 - self.assertEqual(df.ix[0,'c'], 0.0) - self.assertEqual(df.ix[7,'c'], 1.0) + self.assertEqual(df.ix[0, 'c'], 0.0) + self.assertEqual(df.ix[7, 'c'], 1.0) # GH 7084 # not updating cache on series setting with slices - expected = DataFrame({'A': [600, 600, 600]}, index=date_range('5/7/2014', '5/9/2014')) - out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) + expected = DataFrame({'A': [600, 600, 600]}, + index=date_range('5/7/2014', '5/9/2014')) + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) df = DataFrame({'C': ['A', 'A', 'A'], 'D': [100, 200, 300]}) - #loop through df to update out + # loop through df to update out six = Timestamp('5/7/2014') eix = Timestamp('5/9/2014') for ix, row in df.iterrows(): - out.loc[six:eix,row['C']] = out.loc[six:eix,row['C']] + row['D'] + out.loc[six:eix, row['C']] = out.loc[six:eix, row['C']] + row['D'] assert_frame_equal(out, expected) assert_series_equal(out['A'], expected['A']) # try via a chain indexing # this actually works - out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) for ix, row in df.iterrows(): v = out[row['C']][six:eix] + row['D'] out[row['C']][six:eix] = v @@ -3971,9 +4465,10 @@ def test_setitem_cache_updating(self): assert_frame_equal(out, expected) assert_series_equal(out['A'], expected['A']) - out = DataFrame({'A': [0, 0, 0]}, index=date_range('5/7/2014', '5/9/2014')) + out = DataFrame({'A': [0, 0, 0]}, + index=date_range('5/7/2014', '5/9/2014')) for ix, row in df.iterrows(): - out.loc[six:eix,row['C']] += row['D'] + out.loc[six:eix, row['C']] += row['D'] assert_frame_equal(out, expected) assert_series_equal(out['A'], expected['A']) @@ -3988,89 +4483,109 @@ def test_setitem_chained_setfault(self): df = DataFrame({'response': np.array(data)}) mask = df.response == 'timeout' df.response[mask] = 'none' - assert_frame_equal(df, DataFrame({'response': mdata })) + assert_frame_equal(df, DataFrame({'response': mdata})) recarray = np.rec.fromarrays([data], names=['response']) df = DataFrame(recarray) mask = df.response == 'timeout' df.response[mask] = 'none' - assert_frame_equal(df, DataFrame({'response': mdata })) + assert_frame_equal(df, DataFrame({'response': mdata})) - df = DataFrame({'response': data, 'response1' : data }) + df = DataFrame({'response': data, 'response1': data}) mask = df.response == 'timeout' df.response[mask] = 'none' - assert_frame_equal(df, DataFrame({'response': mdata, 'response1' : data })) + assert_frame_equal(df, DataFrame({'response': mdata, + 'response1': data})) # GH 6056 - expected = DataFrame(dict(A = [np.nan,'bar','bah','foo','bar'])) - df = DataFrame(dict(A = np.array(['foo','bar','bah','foo','bar']))) + expected = DataFrame(dict(A=[np.nan, 'bar', 'bah', 'foo', 'bar'])) + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) df['A'].iloc[0] = np.nan result = df.head() assert_frame_equal(result, expected) - df = DataFrame(dict(A = np.array(['foo','bar','bah','foo','bar']))) + df = DataFrame(dict(A=np.array(['foo', 'bar', 'bah', 'foo', 'bar']))) df.A.iloc[0] = np.nan result = df.head() assert_frame_equal(result, expected) def test_detect_chained_assignment(self): - pd.set_option('chained_assignment','raise') + pd.set_option('chained_assignment', 'raise') # work with the chain - expected = DataFrame([[-5,1],[-6,3]],columns=list('AB')) - df = DataFrame(np.arange(4).reshape(2,2),columns=list('AB'),dtype='int64') + expected = DataFrame([[-5, 1], [-6, 3]], columns=list('AB')) + df = DataFrame( + np.arange(4).reshape(2, 2), columns=list('AB'), dtype='int64') self.assertIsNone(df.is_copy) df['A'][0] = -5 df['A'][1] = -6 assert_frame_equal(df, expected) # test with the chaining - df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) + df = DataFrame({'A': Series( + range(2), dtype='int64'), + 'B': np.array( + np.arange(2, 4), dtype=np.float64)}) self.assertIsNone(df.is_copy) + def f(): df['A'][0] = -5 + self.assertRaises(com.SettingWithCopyError, f) + def f(): df['A'][1] = np.nan + self.assertRaises(com.SettingWithCopyError, f) self.assertIsNone(df['A'].is_copy) # using a copy (the chain), fails - df = DataFrame({ 'A' : Series(range(2),dtype='int64'), 'B' : np.array(np.arange(2,4),dtype=np.float64)}) + df = DataFrame({'A': Series( + range(2), dtype='int64'), + 'B': np.array( + np.arange(2, 4), dtype=np.float64)}) + def f(): df.loc[0]['A'] = -5 + self.assertRaises(com.SettingWithCopyError, f) # doc example - df = DataFrame({'a' : ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c' : Series(range(7),dtype='int64') }) + df = DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six' + ], + 'c': Series( + range(7), dtype='int64')}) self.assertIsNone(df.is_copy) - expected = DataFrame({'a' : ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c' : [42,42,2,3,4,42,6]}) + expected = DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', + 'six'], + 'c': [42, 42, 2, 3, 4, 42, 6]}) def f(): indexer = df.a.str.startswith('o') df[indexer]['c'] = 42 + self.assertRaises(com.SettingWithCopyError, f) - expected = DataFrame({'A':[111,'bbb','ccc'],'B':[1,2,3]}) - df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) + expected = DataFrame({'A': [111, 'bbb', 'ccc'], 'B': [1, 2, 3]}) + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + def f(): df['A'][0] = 111 + self.assertRaises(com.SettingWithCopyError, f) + def f(): df.loc[0]['A'] = 111 + self.assertRaises(com.SettingWithCopyError, f) - df.loc[0,'A'] = 111 - assert_frame_equal(df,expected) + df.loc[0, 'A'] = 111 + assert_frame_equal(df, expected) # make sure that is_copy is picked up reconstruction # GH5475 - df = DataFrame({"A": [1,2]}) + df = DataFrame({"A": [1, 2]}) self.assertIsNone(df.is_copy) with tm.ensure_clean('__tmp__pickle') as path: df.to_pickle(path) @@ -4085,7 +4600,7 @@ def f(): def random_text(nobs=100): df = [] for i in range(nobs): - idx= np.random.randint(len(letters), size=2) + idx = np.random.randint(len(letters), size=2) idx.sort() df.append([letters[idx[0]:idx[1]]]) @@ -4094,30 +4609,30 @@ def random_text(nobs=100): df = random_text(100000) # always a copy - x = df.iloc[[0,1,2]] + x = df.iloc[[0, 1, 2]] self.assertIsNotNone(x.is_copy) - x = df.iloc[[0,1,2,4]] + x = df.iloc[[0, 1, 2, 4]] self.assertIsNotNone(x.is_copy) # explicity copy - indexer = df.letters.apply(lambda x : len(x) > 10) + indexer = df.letters.apply(lambda x: len(x) > 10) df = df.ix[indexer].copy() self.assertIsNone(df.is_copy) df['letters'] = df['letters'].apply(str.lower) # implicity take df = random_text(100000) - indexer = df.letters.apply(lambda x : len(x) > 10) + indexer = df.letters.apply(lambda x: len(x) > 10) df = df.ix[indexer] self.assertIsNotNone(df.is_copy) df['letters'] = df['letters'].apply(str.lower) # implicity take 2 df = random_text(100000) - indexer = df.letters.apply(lambda x : len(x) > 10) + indexer = df.letters.apply(lambda x: len(x) > 10) df = df.ix[indexer] self.assertIsNotNone(df.is_copy) - df.loc[:,'letters'] = df['letters'].apply(str.lower) + df.loc[:, 'letters'] = df['letters'].apply(str.lower) # should be ok even though it's a copy! self.assertIsNone(df.is_copy) @@ -4125,77 +4640,99 @@ def random_text(nobs=100): self.assertIsNone(df.is_copy) df = random_text(100000) - indexer = df.letters.apply(lambda x : len(x) > 10) - df.ix[indexer,'letters'] = df.ix[indexer,'letters'].apply(str.lower) + indexer = df.letters.apply(lambda x: len(x) > 10) + df.ix[indexer, 'letters'] = df.ix[indexer, 'letters'].apply(str.lower) # an identical take, so no copy - df = DataFrame({'a' : [1]}).dropna() + df = DataFrame({'a': [1]}).dropna() self.assertIsNone(df.is_copy) df['a'] += 1 # inplace ops - # original from: http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug + # original from: + # http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug a = [12, 23] b = [123, None] c = [1234, 2345] d = [12345, 23456] - tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), ('ears', 'right')] - events = {('eyes', 'left'): a, ('eyes', 'right'): b, ('ears', 'left'): c, ('ears', 'right'): d} + tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'), + ('ears', 'right')] + events = {('eyes', 'left'): a, + ('eyes', 'right'): b, + ('ears', 'left'): c, + ('ears', 'right'): d} multiind = MultiIndex.from_tuples(tuples, names=['part', 'side']) zed = DataFrame(events, index=['a', 'b'], columns=multiind) + def f(): zed['eyes']['right'].fillna(value=555, inplace=True) + self.assertRaises(com.SettingWithCopyError, f) - df = DataFrame(np.random.randn(10,4)) - s = df.iloc[:,0].sort_values() - assert_series_equal(s,df.iloc[:,0].sort_values()) - assert_series_equal(s,df[0].sort_values()) + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0].sort_values() + assert_series_equal(s, df.iloc[:, 0].sort_values()) + assert_series_equal(s, df[0].sort_values()) # false positives GH6025 - df = DataFrame ({'column1':['a', 'a', 'a'], 'column2': [4,8,9] }) + df = DataFrame({'column1': ['a', 'a', 'a'], 'column2': [4, 8, 9]}) str(df) df['column1'] = df['column1'] + 'b' str(df) - df = df [df['column2']!=8] + df = df[df['column2'] != 8] str(df) df['column1'] = df['column1'] + 'c' str(df) - # from SO: http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc - df = DataFrame(np.arange(0,9), columns=['count']) + # from SO: + # http://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc + df = DataFrame(np.arange(0, 9), columns=['count']) df['group'] = 'b' + def f(): df.iloc[0:5]['group'] = 'a' + self.assertRaises(com.SettingWithCopyError, f) # mixed type setting # same dtype & changing dtype - df = DataFrame(dict(A=date_range('20130101',periods=5),B=np.random.randn(5),C=np.arange(5,dtype='int64'),D=list('abcde'))) + df = DataFrame(dict(A=date_range('20130101', periods=5), + B=np.random.randn(5), + C=np.arange(5, dtype='int64'), + D=list('abcde'))) def f(): df.ix[2]['D'] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + def f(): df.ix[2]['C'] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) + def f(): df['C'][2] = 'foo' + self.assertRaises(com.SettingWithCopyError, f) def test_setting_with_copy_bug(self): # operating on a copy - df = pd.DataFrame({'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}) + df = pd.DataFrame({'a': list(range(4)), + 'b': list('ab..'), + 'c': ['a', 'b', np.nan, 'd']}) mask = pd.isnull(df.c) def f(): df[['c']][mask] = df[['b']][mask] + self.assertRaises(com.SettingWithCopyError, f) # invalid warning as we are returning a new object # GH 8730 - df1 = DataFrame({'x': Series(['a','b','c']), 'y': Series(['d','e','f'])}) + df1 = DataFrame({'x': Series(['a', 'b', 'c']), + 'y': Series(['d', 'e', 'f'])}) df2 = df1[['x']] # this should not raise @@ -4204,24 +4741,173 @@ def f(): def test_detect_chained_assignment_warnings(self): # warnings - with option_context('chained_assignment','warn'): - df = DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - with tm.assert_produces_warning(expected_warning=com.SettingWithCopyWarning): + with option_context('chained_assignment', 'warn'): + df = DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + with tm.assert_produces_warning( + expected_warning=com.SettingWithCopyWarning): df.loc[0]['A'] = 111 def test_float64index_slicing_bug(self): # GH 5557, related to slicing a float index - ser = {256: 2321.0, 1: 78.0, 2: 2716.0, 3: 0.0, 4: 369.0, 5: 0.0, 6: 269.0, 7: 0.0, 8: 0.0, 9: 0.0, 10: 3536.0, 11: 0.0, 12: 24.0, 13: 0.0, 14: 931.0, 15: 0.0, 16: 101.0, 17: 78.0, 18: 9643.0, 19: 0.0, 20: 0.0, 21: 0.0, 22: 63761.0, 23: 0.0, 24: 446.0, 25: 0.0, 26: 34773.0, 27: 0.0, 28: 729.0, 29: 78.0, 30: 0.0, 31: 0.0, 32: 3374.0, 33: 0.0, 34: 1391.0, 35: 0.0, 36: 361.0, 37: 0.0, 38: 61808.0, 39: 0.0, 40: 0.0, 41: 0.0, 42: 6677.0, 43: 0.0, 44: 802.0, 45: 0.0, 46: 2691.0, 47: 0.0, 48: 3582.0, 49: 0.0, 50: 734.0, 51: 0.0, 52: 627.0, 53: 70.0, 54: 2584.0, 55: 0.0, 56: 324.0, 57: 0.0, 58: 605.0, 59: 0.0, 60: 0.0, 61: 0.0, 62: 3989.0, 63: 10.0, 64: 42.0, 65: 0.0, 66: 904.0, 67: 0.0, 68: 88.0, 69: 70.0, 70: 8172.0, 71: 0.0, 72: 0.0, 73: 0.0, 74: 64902.0, 75: 0.0, 76: 347.0, 77: 0.0, 78: 36605.0, 79: 0.0, 80: 379.0, 81: 70.0, 82: 0.0, 83: 0.0, 84: 3001.0, 85: 0.0, 86: 1630.0, 87: 7.0, 88: 364.0, 89: 0.0, 90: 67404.0, 91: 9.0, 92: 0.0, 93: 0.0, 94: 7685.0, 95: 0.0, 96: 1017.0, 97: 0.0, 98: 2831.0, 99: 0.0, 100: 2963.0, 101: 0.0, 102: 854.0, 103: 0.0, 104: 0.0, 105: 0.0, 106: 0.0, 107: 0.0, 108: 0.0, 109: 0.0, 110: 0.0, 111: 0.0, 112: 0.0, 113: 0.0, 114: 0.0, 115: 0.0, 116: 0.0, 117: 0.0, 118: 0.0, 119: 0.0, 120: 0.0, 121: 0.0, 122: 0.0, 123: 0.0, 124: 0.0, 125: 0.0, 126: 67744.0, 127: 22.0, 128: 264.0, 129: 0.0, 260: 197.0, 268: 0.0, 265: 0.0, 269: 0.0, 261: 0.0, 266: 1198.0, 267: 0.0, 262: 2629.0, 258: 775.0, 257: 0.0, 263: 0.0, 259: 0.0, 264: 163.0, 250: 10326.0, 251: 0.0, 252: 1228.0, 253: 0.0, 254: 2769.0, 255: 0.0} + ser = {256: 2321.0, + 1: 78.0, + 2: 2716.0, + 3: 0.0, + 4: 369.0, + 5: 0.0, + 6: 269.0, + 7: 0.0, + 8: 0.0, + 9: 0.0, + 10: 3536.0, + 11: 0.0, + 12: 24.0, + 13: 0.0, + 14: 931.0, + 15: 0.0, + 16: 101.0, + 17: 78.0, + 18: 9643.0, + 19: 0.0, + 20: 0.0, + 21: 0.0, + 22: 63761.0, + 23: 0.0, + 24: 446.0, + 25: 0.0, + 26: 34773.0, + 27: 0.0, + 28: 729.0, + 29: 78.0, + 30: 0.0, + 31: 0.0, + 32: 3374.0, + 33: 0.0, + 34: 1391.0, + 35: 0.0, + 36: 361.0, + 37: 0.0, + 38: 61808.0, + 39: 0.0, + 40: 0.0, + 41: 0.0, + 42: 6677.0, + 43: 0.0, + 44: 802.0, + 45: 0.0, + 46: 2691.0, + 47: 0.0, + 48: 3582.0, + 49: 0.0, + 50: 734.0, + 51: 0.0, + 52: 627.0, + 53: 70.0, + 54: 2584.0, + 55: 0.0, + 56: 324.0, + 57: 0.0, + 58: 605.0, + 59: 0.0, + 60: 0.0, + 61: 0.0, + 62: 3989.0, + 63: 10.0, + 64: 42.0, + 65: 0.0, + 66: 904.0, + 67: 0.0, + 68: 88.0, + 69: 70.0, + 70: 8172.0, + 71: 0.0, + 72: 0.0, + 73: 0.0, + 74: 64902.0, + 75: 0.0, + 76: 347.0, + 77: 0.0, + 78: 36605.0, + 79: 0.0, + 80: 379.0, + 81: 70.0, + 82: 0.0, + 83: 0.0, + 84: 3001.0, + 85: 0.0, + 86: 1630.0, + 87: 7.0, + 88: 364.0, + 89: 0.0, + 90: 67404.0, + 91: 9.0, + 92: 0.0, + 93: 0.0, + 94: 7685.0, + 95: 0.0, + 96: 1017.0, + 97: 0.0, + 98: 2831.0, + 99: 0.0, + 100: 2963.0, + 101: 0.0, + 102: 854.0, + 103: 0.0, + 104: 0.0, + 105: 0.0, + 106: 0.0, + 107: 0.0, + 108: 0.0, + 109: 0.0, + 110: 0.0, + 111: 0.0, + 112: 0.0, + 113: 0.0, + 114: 0.0, + 115: 0.0, + 116: 0.0, + 117: 0.0, + 118: 0.0, + 119: 0.0, + 120: 0.0, + 121: 0.0, + 122: 0.0, + 123: 0.0, + 124: 0.0, + 125: 0.0, + 126: 67744.0, + 127: 22.0, + 128: 264.0, + 129: 0.0, + 260: 197.0, + 268: 0.0, + 265: 0.0, + 269: 0.0, + 261: 0.0, + 266: 1198.0, + 267: 0.0, + 262: 2629.0, + 258: 775.0, + 257: 0.0, + 263: 0.0, + 259: 0.0, + 264: 163.0, + 250: 10326.0, + 251: 0.0, + 252: 1228.0, + 253: 0.0, + 254: 2769.0, + 255: 0.0} # smoke test for the repr s = Series(ser) - result = s.value_counts() + result = s.value_counts() str(result) def test_floating_index_doc_example(self): index = Index([1.5, 2, 3, 4.5, 5]) - s = Series(range(5),index=index) + s = Series(range(5), index=index) self.assertEqual(s[3], 2) self.assertEqual(s.ix[3], 2) self.assertEqual(s.loc[3], 2) @@ -4258,20 +4944,20 @@ def test_floating_index(self): # value not found (and no fallbacking at all) # scalar integers - self.assertRaises(KeyError, lambda : s.loc[4]) - self.assertRaises(KeyError, lambda : s.ix[4]) - self.assertRaises(KeyError, lambda : s[4]) + self.assertRaises(KeyError, lambda: s.loc[4]) + self.assertRaises(KeyError, lambda: s.ix[4]) + self.assertRaises(KeyError, lambda: s[4]) # fancy floats/integers create the correct entry (as nan) # fancy tests expected = Series([2, 0], index=Float64Index([5.0, 0.0])) - for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float + for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float assert_series_equal(s[fancy_idx], expected) assert_series_equal(s.loc[fancy_idx], expected) assert_series_equal(s.ix[fancy_idx], expected) expected = Series([2, 0], index=Index([5, 0], dtype='int64')) - for fancy_idx in [[5, 0], np.array([5, 0])]: #int + for fancy_idx in [[5, 0], np.array([5, 0])]: # int assert_series_equal(s[fancy_idx], expected) assert_series_equal(s.loc[fancy_idx], expected) assert_series_equal(s.ix[fancy_idx], expected) @@ -4311,39 +4997,41 @@ def test_floating_index(self): assert_series_equal(result1, result3) # list selection - result1 = s[[0.0,5,10]] - result2 = s.loc[[0.0,5,10]] - result3 = s.ix[[0.0,5,10]] - result4 = s.iloc[[0,2,4]] + result1 = s[[0.0, 5, 10]] + result2 = s.loc[[0.0, 5, 10]] + result3 = s.ix[[0.0, 5, 10]] + result4 = s.iloc[[0, 2, 4]] assert_series_equal(result1, result2) assert_series_equal(result1, result3) assert_series_equal(result1, result4) - result1 = s[[1.6,5,10]] - result2 = s.loc[[1.6,5,10]] - result3 = s.ix[[1.6,5,10]] + result1 = s[[1.6, 5, 10]] + result2 = s.loc[[1.6, 5, 10]] + result3 = s.ix[[1.6, 5, 10]] assert_series_equal(result1, result2) assert_series_equal(result1, result3) - assert_series_equal(result1, Series([np.nan,2,4],index=[1.6,5,10])) + assert_series_equal(result1, Series( + [np.nan, 2, 4], index=[1.6, 5, 10])) - result1 = s[[0,1,2]] - result2 = s.ix[[0,1,2]] - result3 = s.loc[[0,1,2]] + result1 = s[[0, 1, 2]] + result2 = s.ix[[0, 1, 2]] + result3 = s.loc[[0, 1, 2]] assert_series_equal(result1, result2) assert_series_equal(result1, result3) - assert_series_equal(result1, Series([0.0,np.nan,np.nan],index=[0,1,2])) + assert_series_equal(result1, Series( + [0.0, np.nan, np.nan], index=[0, 1, 2])) result1 = s.loc[[2.5, 5]] result2 = s.ix[[2.5, 5]] assert_series_equal(result1, result2) - assert_series_equal(result1, Series([1,2],index=[2.5,5.0])) + assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0])) result1 = s[[2.5]] result2 = s.ix[[2.5]] result3 = s.loc[[2.5]] assert_series_equal(result1, result2) assert_series_equal(result1, result3) - assert_series_equal(result1, Series([1],index=[2.5])) + assert_series_equal(result1, Series([1], index=[2.5])) def test_scalar_indexer(self): # float indexing checked above @@ -4368,8 +5056,8 @@ def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): self.assertRaises(getitem, lambda: s[3.5]) for index in [tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeIntIndex, tm.makeRangeIndex, - tm.makeDateIndex, tm.makePeriodIndex]: + tm.makeIntIndex, tm.makeRangeIndex, tm.makeDateIndex, + tm.makePeriodIndex]: check_invalid(index()) check_invalid(Index(np.arange(5) * 2.5), loc=KeyError, @@ -4378,7 +5066,7 @@ def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): def check_index(index, error): index = index() - s = Series(np.arange(len(index)),index=index) + s = Series(np.arange(len(index)), index=index) # positional selection result1 = s[5] @@ -4387,8 +5075,8 @@ def check_index(index, error): result4 = s.iloc[5.0] # by value - self.assertRaises(error, lambda : s.loc[5]) - self.assertRaises(error, lambda : s.loc[5.0]) + self.assertRaises(error, lambda: s.loc[5]) + self.assertRaises(error, lambda: s.loc[5.0]) # this is fallback, so it works result5 = s.ix[5] @@ -4401,15 +5089,16 @@ def check_index(index, error): self.assertEqual(result1, result6) # string-like - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex ]: + for index in [tm.makeStringIndex, tm.makeUnicodeIndex]: check_index(index, KeyError) # datetimelike - for index in [ tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + for index in [tm.makeDateIndex, tm.makeTimedeltaIndex, + tm.makePeriodIndex]: check_index(index, TypeError) # exact indexing when found on IntIndex - s = Series(np.arange(10),dtype='int64') + s = Series(np.arange(10), dtype='int64') result1 = s[5.0] result2 = s.loc[5.0] @@ -4424,24 +5113,23 @@ def check_index(index, error): self.assertEqual(result1, result6) def test_slice_indexer(self): - def check_iloc_compat(s): # invalid type for iloc (but works with a warning) # check_stacklevel=False -> impossible to get it right for all # index types - with self.assert_produces_warning( - FutureWarning, check_stacklevel=False): + with self.assert_produces_warning(FutureWarning, + check_stacklevel=False): s.iloc[6.0:8] - with self.assert_produces_warning( - FutureWarning, check_stacklevel=False): + with self.assert_produces_warning(FutureWarning, + check_stacklevel=False): s.iloc[6.0:8.0] - with self.assert_produces_warning( - FutureWarning, check_stacklevel=False): + with self.assert_produces_warning(FutureWarning, + check_stacklevel=False): s.iloc[6:8.0] def check_slicing_positional(index): - s = Series(np.arange(len(index))+10,index=index) + s = Series(np.arange(len(index)) + 10, index=index) # these are all positional result1 = s[2:5] @@ -4451,26 +5139,27 @@ def check_slicing_positional(index): assert_series_equal(result1, result3) # loc will fail - self.assertRaises(TypeError, lambda : s.loc[2:5]) + self.assertRaises(TypeError, lambda: s.loc[2:5]) # make all float slicing fail - self.assertRaises(TypeError, lambda : s[2.0:5]) - self.assertRaises(TypeError, lambda : s[2.0:5.0]) - self.assertRaises(TypeError, lambda : s[2:5.0]) + self.assertRaises(TypeError, lambda: s[2.0:5]) + self.assertRaises(TypeError, lambda: s[2.0:5.0]) + self.assertRaises(TypeError, lambda: s[2:5.0]) - self.assertRaises(TypeError, lambda : s.ix[2.0:5]) - self.assertRaises(TypeError, lambda : s.ix[2.0:5.0]) - self.assertRaises(TypeError, lambda : s.ix[2:5.0]) + self.assertRaises(TypeError, lambda: s.ix[2.0:5]) + self.assertRaises(TypeError, lambda: s.ix[2.0:5.0]) + self.assertRaises(TypeError, lambda: s.ix[2:5.0]) - self.assertRaises(TypeError, lambda : s.loc[2.0:5]) - self.assertRaises(TypeError, lambda : s.loc[2.0:5.0]) - self.assertRaises(TypeError, lambda : s.loc[2:5.0]) + self.assertRaises(TypeError, lambda: s.loc[2.0:5]) + self.assertRaises(TypeError, lambda: s.loc[2.0:5.0]) + self.assertRaises(TypeError, lambda: s.loc[2:5.0]) check_iloc_compat(s) # all index types except int, float - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, + tm.makePeriodIndex]: check_slicing_positional(index()) ############ @@ -4524,30 +5213,32 @@ def check_slicing_positional(index): # these are valid for all methods # these are treated like labels (e.g. the rhs IS included) def compare(slicers, expected): - for method in [lambda x: x, lambda x: x.loc, lambda x: x.ix ]: + for method in [lambda x: x, lambda x: x.loc, lambda x: x.ix]: for slices in slicers: result = method(s)[slices] assert_series_equal(result, expected) - compare([slice(6.0,8),slice(6.0,8.0),slice(6,8.0)], - s[(s.index>=6.0)&(s.index<=8)]) - compare([slice(6.5,8),slice(6.5,8.5)], - s[(s.index>=6.5)&(s.index<=8.5)]) - compare([slice(6,8.5)], - s[(s.index>=6.0)&(s.index<=8.5)]) - compare([slice(6.5,6.5)], - s[(s.index>=6.5)&(s.index<=6.5)]) + compare([slice(6.0, 8), slice(6.0, 8.0), slice(6, 8.0)], + s[(s.index >= 6.0) & (s.index <= 8)]) + compare([slice(6.5, 8), slice(6.5, 8.5)], + s[(s.index >= 6.5) & (s.index <= 8.5)]) + compare([slice(6, 8.5)], s[(s.index >= 6.0) & (s.index <= 8.5)]) + compare([slice(6.5, 6.5)], s[(s.index >= 6.5) & (s.index <= 6.5)]) check_iloc_compat(s) def test_set_ix_out_of_bounds_axis_0(self): - df = pd.DataFrame(randn(2, 5), index=["row%s" % i for i in range(2)], columns=["col%s" % i for i in range(5)]) + df = pd.DataFrame( + randn(2, 5), index=["row%s" % i for i in range(2)], + columns=["col%s" % i for i in range(5)]) self.assertRaises(ValueError, df.ix.__setitem__, (2, 0), 100) def test_set_ix_out_of_bounds_axis_1(self): - df = pd.DataFrame(randn(5, 2), index=["row%s" % i for i in range(5)], columns=["col%s" % i for i in range(2)]) - self.assertRaises(ValueError, df.ix.__setitem__, (0 , 2), 100) + df = pd.DataFrame( + randn(5, 2), index=["row%s" % i for i in range(5)], + columns=["col%s" % i for i in range(2)]) + self.assertRaises(ValueError, df.ix.__setitem__, (0, 2), 100) def test_iloc_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf @@ -4559,8 +5250,8 @@ def test_iloc_empty_list_indexer_is_ok(self): assert_frame_equal(df.iloc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True) # horizontal empty - assert_frame_equal(df.iloc[[]], df.iloc[:0, :], - check_index_type=True, check_column_type=True) + assert_frame_equal(df.iloc[[]], df.iloc[:0, :], check_index_type=True, + check_column_type=True) def test_loc_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf @@ -4572,21 +5263,21 @@ def test_loc_empty_list_indexer_is_ok(self): assert_frame_equal(df.loc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True) # horizontal empty - assert_frame_equal(df.loc[[]], df.iloc[:0, :], - check_index_type=True, check_column_type=True) + assert_frame_equal(df.loc[[]], df.iloc[:0, :], check_index_type=True, + check_column_type=True) def test_ix_empty_list_indexer_is_ok(self): from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) # vertical empty - assert_frame_equal(df.ix[:, []], df.iloc[:, :0], - check_index_type=True, check_column_type=True) + assert_frame_equal(df.ix[:, []], df.iloc[:, :0], check_index_type=True, + check_column_type=True) # horizontal empty - assert_frame_equal(df.ix[[], :], df.iloc[:0, :], - check_index_type=True, check_column_type=True) + assert_frame_equal(df.ix[[], :], df.iloc[:0, :], check_index_type=True, + check_column_type=True) # horizontal empty - assert_frame_equal(df.ix[[]], df.iloc[:0, :], - check_index_type=True, check_column_type=True) + assert_frame_equal(df.ix[[]], df.iloc[:0, :], check_index_type=True, + check_column_type=True) def test_deprecate_float_indexers(self): @@ -4600,29 +5291,34 @@ def test_deprecate_float_indexers(self): def check_index(index): i = index(5) - for s in [ Series(np.arange(len(i)),index=i), DataFrame(np.random.randn(len(i),len(i)),index=i,columns=i) ]: - self.assertRaises(FutureWarning, lambda : - s.iloc[3.0]) + for s in [Series( + np.arange(len(i)), index=i), DataFrame( + np.random.randn( + len(i), len(i)), index=i, columns=i)]: + self.assertRaises(FutureWarning, lambda: s.iloc[3.0]) # setting def f(): s.iloc[3.0] = 0 + self.assertRaises(FutureWarning, f) # fallsback to position selection ,series only - s = Series(np.arange(len(i)),index=i) + s = Series(np.arange(len(i)), index=i) s[3] - self.assertRaises(FutureWarning, lambda : s[3.0]) + self.assertRaises(FutureWarning, lambda: s[3.0]) - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makeTimedeltaIndex, tm.makePeriodIndex ]: + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makeTimedeltaIndex, + tm.makePeriodIndex]: check_index(index) # ints i = index(5) - for s in [ Series(np.arange(len(i))), DataFrame(np.random.randn(len(i),len(i)),index=i,columns=i) ]: - self.assertRaises(FutureWarning, lambda : - s.iloc[3.0]) + for s in [Series(np.arange(len(i))), DataFrame( + np.random.randn( + len(i), len(i)), index=i, columns=i)]: + self.assertRaises(FutureWarning, lambda: s.iloc[3.0]) # on some arch's this doesn't provide a warning (and thus raise) # and some it does @@ -4634,20 +5330,23 @@ def f(): # setting def f(): s.iloc[3.0] = 0 + self.assertRaises(FutureWarning, f) # floats: these are all ok! i = np.arange(5.) - for s in [ Series(np.arange(len(i)),index=i), DataFrame(np.random.randn(len(i),len(i)),index=i,columns=i) ]: + for s in [Series( + np.arange(len(i)), index=i), DataFrame( + np.random.randn( + len(i), len(i)), index=i, columns=i)]: with tm.assert_produces_warning(False): s[3.0] with tm.assert_produces_warning(False): s[3] - self.assertRaises(FutureWarning, lambda : - s.iloc[3.0]) + self.assertRaises(FutureWarning, lambda: s.iloc[3.0]) with tm.assert_produces_warning(False): s.iloc[3] @@ -4660,6 +5359,7 @@ def f(): def f(): s.iloc[3.0] = 0 + self.assertRaises(FutureWarning, f) # slices @@ -4668,33 +5368,35 @@ def f(): tm.makeDateIndex, tm.makePeriodIndex]: index = index(5) - for s in [Series(range(5), index=index), - DataFrame(np.random.randn(5, 2), index=index)]: + for s in [Series( + range(5), index=index), DataFrame( + np.random.randn(5, 2), index=index)]: # getitem - self.assertRaises(FutureWarning, lambda: - s.iloc[3.0:4]) - self.assertRaises(FutureWarning, lambda: - s.iloc[3.0:4.0]) - self.assertRaises(FutureWarning, lambda: - s.iloc[3:4.0]) + self.assertRaises(FutureWarning, lambda: s.iloc[3.0:4]) + self.assertRaises(FutureWarning, lambda: s.iloc[3.0:4.0]) + self.assertRaises(FutureWarning, lambda: s.iloc[3:4.0]) # setitem def f(): s.iloc[3.0:4] = 0 + self.assertRaises(FutureWarning, f) + def f(): s.iloc[3:4.0] = 0 + self.assertRaises(FutureWarning, f) + def f(): s.iloc[3.0:4.0] = 0 + self.assertRaises(FutureWarning, f) warnings.filterwarnings(action='ignore', category=FutureWarning) def test_float_index_to_mixed(self): - df = DataFrame({0.0: np.random.rand(10), - 1.0: np.random.rand(10)}) + df = DataFrame({0.0: np.random.rand(10), 1.0: np.random.rand(10)}) df['a'] = 10 tm.assert_frame_equal(DataFrame({0.0: df[0.0], 1.0: df[1.0], @@ -4709,15 +5411,15 @@ def test_duplicate_ix_returns_series(self): tm.assert_series_equal(r, e) def test_float_index_non_scalar_assignment(self): - df = DataFrame({'a': [1,2,3], 'b': [3,4,5]},index=[1.,2.,3.]) + df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df.loc[df.index[:2]] = 1 - expected = DataFrame({'a':[1,1,3],'b':[1,1,5]},index=df.index) + expected = DataFrame({'a': [1, 1, 3], 'b': [1, 1, 5]}, index=df.index) tm.assert_frame_equal(expected, df) - df = DataFrame({'a': [1,2,3], 'b': [3,4,5]},index=[1.,2.,3.]) + df = DataFrame({'a': [1, 2, 3], 'b': [3, 4, 5]}, index=[1., 2., 3.]) df2 = df.copy() df.loc[df.index] = df.loc[df.index] - tm.assert_frame_equal(df,df2) + tm.assert_frame_equal(df, df2) def test_float_index_at_iat(self): s = pd.Series([1, 2, 3], index=[0.1, 0.2, 0.3]) @@ -4759,7 +5461,7 @@ def run_tests(df, rhs, right): df = pd.DataFrame(xs, columns=cols, index=list('abcde')) # right hand side; permute the indices and multiplpy by -2 - rhs = - 2 * df.iloc[3:0:-1, 2:0:-1] + rhs = -2 * df.iloc[3:0:-1, 2:0:-1] # expected `right` result; just multiply by -2 right = df.copy() @@ -4808,15 +5510,15 @@ def assert_slices_equivalent(l_slc, i_slc): assert_slices_equivalent(SLC[::-1], SLC[::-1]) assert_slices_equivalent(SLC['d'::-1], SLC[15::-1]) - assert_slices_equivalent(SLC[('d',)::-1], SLC[15::-1]) + assert_slices_equivalent(SLC[('d', )::-1], SLC[15::-1]) assert_slices_equivalent(SLC[:'d':-1], SLC[:11:-1]) - assert_slices_equivalent(SLC[:('d',):-1], SLC[:11:-1]) + assert_slices_equivalent(SLC[:('d', ):-1], SLC[:11:-1]) assert_slices_equivalent(SLC['d':'b':-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC[('d',):'b':-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC['d':('b',):-1], SLC[15:3:-1]) - assert_slices_equivalent(SLC[('d',):('b',):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[('d', ):'b':-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC['d':('b', ):-1], SLC[15:3:-1]) + assert_slices_equivalent(SLC[('d', ):('b', ):-1], SLC[15:3:-1]) assert_slices_equivalent(SLC['b':'d':-1], SLC[:0]) assert_slices_equivalent(SLC[('c', 2)::-1], SLC[10::-1]) @@ -4844,12 +5546,12 @@ def test_indexing_assignment_dict_already_exists(self): def test_indexing_dtypes_on_empty(self): # Check that .iloc and .ix return correct dtypes GH9983 - df = DataFrame({'a':[1,2,3],'b':['b','b2','b3']}) - df2 = df.ix[[],:] + df = DataFrame({'a': [1, 2, 3], 'b': ['b', 'b2', 'b3']}) + df2 = df.ix[[], :] - self.assertEqual(df2.loc[:,'a'].dtype, np.int64) - assert_series_equal(df2.loc[:,'a'], df2.iloc[:,0]) - assert_series_equal(df2.loc[:,'a'], df2.ix[:,0]) + self.assertEqual(df2.loc[:, 'a'].dtype, np.int64) + assert_series_equal(df2.loc[:, 'a'], df2.iloc[:, 0]) + assert_series_equal(df2.loc[:, 'a'], df2.ix[:, 0]) def test_range_in_series_indexing(self): # range can cause an indexing error @@ -4857,24 +5559,24 @@ def test_range_in_series_indexing(self): for x in [5, 999999, 1000000]: s = pd.Series(index=range(x)) s.loc[range(1)] = 42 - assert_series_equal(s.loc[range(1)],Series(42.0,index=[0])) + assert_series_equal(s.loc[range(1)], Series(42.0, index=[0])) s.loc[range(2)] = 43 - assert_series_equal(s.loc[range(2)],Series(43.0,index=[0,1])) + assert_series_equal(s.loc[range(2)], Series(43.0, index=[0, 1])) @slow def test_large_dataframe_indexing(self): - #GH10692 - result = DataFrame({'x': range(10**6)},dtype='int64') + # GH10692 + result = DataFrame({'x': range(10 ** 6)}, dtype='int64') result.loc[len(result)] = len(result) + 1 - expected = DataFrame({'x': range(10**6 + 1)},dtype='int64') + expected = DataFrame({'x': range(10 ** 6 + 1)}, dtype='int64') assert_frame_equal(result, expected) @slow def test_large_mi_dataframe_indexing(self): - #GH10645 - result = MultiIndex.from_arrays([range(10**6), range(10**6)]) - assert(not (10**6, 0) in result) + # GH10645 + result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)]) + assert (not (10 ** 6, 0) in result) def test_non_reducing_slice(self): df = pd.DataFrame([[0, 1], [2, 3]]) @@ -4923,76 +5625,93 @@ class TestCategoricalIndex(tm.TestCase): def setUp(self): - self.df = DataFrame({'A' : np.arange(6,dtype='int64'), - 'B' : Series(list('aabbca')).astype('category',categories=list('cab')) }).set_index('B') - self.df2 = DataFrame({'A' : np.arange(6,dtype='int64'), - 'B' : Series(list('aabbca')).astype('category',categories=list('cabe')) }).set_index('B') - self.df3 = DataFrame({'A' : np.arange(6,dtype='int64'), - 'B' : Series([1,1,2,1,3,2]).astype('category',categories=[3,2,1],ordered=True) }).set_index('B') - self.df4 = DataFrame({'A' : np.arange(6,dtype='int64'), - 'B' : Series([1,1,2,1,3,2]).astype('category',categories=[3,2,1],ordered=False) }).set_index('B') - + self.df = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': Series(list('aabbca')).astype( + 'category', categories=list( + 'cab'))}).set_index('B') + self.df2 = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': Series(list('aabbca')).astype( + 'category', categories=list( + 'cabe'))}).set_index('B') + self.df3 = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': (Series([1, 1, 2, 1, 3, 2]) + .astype('category', categories=[3, 2, 1], + ordered=True))}).set_index('B') + self.df4 = DataFrame({'A': np.arange(6, dtype='int64'), + 'B': (Series([1, 1, 2, 1, 3, 2]) + .astype('category', categories=[3, 2, 1], + ordered=False))}).set_index('B') def test_loc_scalar(self): - result = self.df.loc['a'] - expected = DataFrame({'A' : [0,1,5], - 'B' : Series(list('aaa')).astype('category',categories=list('cab')) }).set_index('B') + expected = (DataFrame({'A': [0, 1, 5], + 'B': (Series(list('aaa')) + .astype('category', + categories=list('cab')))}) + .set_index('B')) assert_frame_equal(result, expected) - df = self.df.copy() df.loc['a'] = 20 - expected = DataFrame({'A' : [20,20,2,3,4,20], - 'B' : Series(list('aabbca')).astype('category',categories=list('cab')) }).set_index('B') + expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20], + 'B': (Series(list('aabbca')) + .astype('category', + categories=list('cab')))}) + .set_index('B')) assert_frame_equal(df, expected) # value not in the categories - self.assertRaises(KeyError, lambda : df.loc['d']) + self.assertRaises(KeyError, lambda: df.loc['d']) def f(): df.loc['d'] = 10 + self.assertRaises(TypeError, f) def f(): - df.loc['d','A'] = 10 + df.loc['d', 'A'] = 10 + self.assertRaises(TypeError, f) def f(): - df.loc['d','C'] = 10 + df.loc['d', 'C'] = 10 + self.assertRaises(TypeError, f) def test_loc_listlike(self): # list of labels - result = self.df.loc[['c','a']] - expected = self.df.iloc[[4,0,1,5]] + result = self.df.loc[['c', 'a']] + expected = self.df.iloc[[4, 0, 1, 5]] assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.loc[['a','b','e']] - exp_index = pd.CategoricalIndex(list('aaabbe'), categories=list('cabe'), name='B') - expected = DataFrame({'A' : [0,1,5,2,3,np.nan]}, index=exp_index) + result = self.df2.loc[['a', 'b', 'e']] + exp_index = pd.CategoricalIndex( + list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) assert_frame_equal(result, expected, check_index_type=True) # element in the categories but not in the values - self.assertRaises(KeyError, lambda : self.df2.loc['e']) + self.assertRaises(KeyError, lambda: self.df2.loc['e']) # assign is ok df = self.df2.copy() df.loc['e'] = 20 - result = df.loc[['a','b','e']] - exp_index = pd.CategoricalIndex(list('aaabbe'), categories=list('cabe'), name='B') - expected = DataFrame({'A' : [0, 1, 5, 2, 3, 20]}, index=exp_index) + result = df.loc[['a', 'b', 'e']] + exp_index = pd.CategoricalIndex( + list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index) assert_frame_equal(result, expected) df = self.df2.copy() - result = df.loc[['a','b','e']] - exp_index = pd.CategoricalIndex(list('aaabbe'), categories=list('cabe'), name='B') - expected = DataFrame({'A' : [0, 1, 5, 2, 3, np.nan]}, index=exp_index) + result = df.loc[['a', 'b', 'e']] + exp_index = pd.CategoricalIndex( + list('aaabbe'), categories=list('cabe'), name='B') + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index) assert_frame_equal(result, expected, check_index_type=True) # not all labels in the categories - self.assertRaises(KeyError, lambda : self.df2.loc[['a','d']]) + self.assertRaises(KeyError, lambda: self.df2.loc[['a', 'd']]) def test_loc_listlike_dtypes(self): # GH 11586 @@ -5003,15 +5722,21 @@ def test_loc_listlike_dtypes(self): # unique slice res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b'])) + exp = DataFrame({'A': [1, 2], + 'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] - exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=pd.CategoricalIndex(['a', 'a', 'b'])) + exp = DataFrame({'A': [1, 1, 2], + 'B': [4, 4, 5]}, + index=pd.CategoricalIndex(['a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp(KeyError, 'a list-indexer must only include values that are in the categories'): + with tm.assertRaisesRegexp( + KeyError, + 'a list-indexer must only include values that are ' + 'in the categories'): df.loc[['a', 'x']] # duplicated categories and codes @@ -5020,38 +5745,53 @@ def test_loc_listlike_dtypes(self): # unique slice res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 3, 2], 'B': [4, 6, 5]}, index=pd.CategoricalIndex(['a', 'a', 'b'])) + exp = DataFrame({'A': [1, 3, 2], + 'B': [4, 6, 5]}, + index=pd.CategoricalIndex(['a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] - exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [4, 6, 4, 6, 5]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) + exp = DataFrame( + {'A': [1, 3, 1, 3, 2], + 'B': [4, 6, 4, 6, 5 + ]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'])) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp(KeyError, 'a list-indexer must only include values that are in the categories'): + with tm.assertRaisesRegexp( + KeyError, + 'a list-indexer must only include values ' + 'that are in the categories'): df.loc[['a', 'x']] # contains unused category - index = pd.CategoricalIndex(['a', 'b', 'a', 'c'], categories=list('abcde')) + index = pd.CategoricalIndex( + ['a', 'b', 'a', 'c'], categories=list('abcde')) df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index) res = df.loc[['a', 'b']] - exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]}, - index=pd.CategoricalIndex(['a', 'a', 'b'], categories=list('abcde'))) + exp = DataFrame({'A': [1, 3, 2], + 'B': [5, 7, 6]}, index=pd.CategoricalIndex( + ['a', 'a', 'b'], categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) res = df.loc[['a', 'e']] exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]}, - index=pd.CategoricalIndex(['a', 'a', 'e'], categories=list('abcde'))) + index=pd.CategoricalIndex(['a', 'a', 'e'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) # duplicated slice res = df.loc[['a', 'a', 'b']] exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]}, - index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'], categories=list('abcde'))) + index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'], + categories=list('abcde'))) tm.assert_frame_equal(res, exp, check_index_type=True) - with tm.assertRaisesRegexp(KeyError, 'a list-indexer must only include values that are in the categories'): + with tm.assertRaisesRegexp( + KeyError, + 'a list-indexer must only include values ' + 'that are in the categories'): df.loc[['a', 'x']] def test_read_only_source(self): @@ -5063,99 +5803,109 @@ def test_read_only_source(self): ro_array.setflags(write=False) ro_df = DataFrame(ro_array) - assert_frame_equal(rw_df.iloc[[1,2,3]],ro_df.iloc[[1,2,3]]) - assert_frame_equal(rw_df.iloc[[1]],ro_df.iloc[[1]]) - assert_series_equal(rw_df.iloc[1],ro_df.iloc[1]) - assert_frame_equal(rw_df.iloc[1:3],ro_df.iloc[1:3]) + assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]]) + assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]]) + assert_series_equal(rw_df.iloc[1], ro_df.iloc[1]) + assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3]) - assert_frame_equal(rw_df.loc[[1,2,3]],ro_df.loc[[1,2,3]]) - assert_frame_equal(rw_df.loc[[1]],ro_df.loc[[1]]) - assert_series_equal(rw_df.loc[1],ro_df.loc[1]) - assert_frame_equal(rw_df.loc[1:3],ro_df.loc[1:3]) + assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]]) + assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]]) + assert_series_equal(rw_df.loc[1], ro_df.loc[1]) + assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3]) def test_reindexing(self): # reindexing # convert to a regular index - result = self.df2.reindex(['a','b','e']) - expected = DataFrame({'A' : [0,1,5,2,3,np.nan], - 'B' : Series(list('aaabbe')) }).set_index('B') + result = self.df2.reindex(['a', 'b', 'e']) + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan], + 'B': Series(list('aaabbe'))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(['a','b']) - expected = DataFrame({'A' : [0,1,5,2,3], - 'B' : Series(list('aaabb')) }).set_index('B') + result = self.df2.reindex(['a', 'b']) + expected = DataFrame({'A': [0, 1, 5, 2, 3], + 'B': Series(list('aaabb'))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['e']) - expected = DataFrame({'A' : [np.nan], - 'B' : Series(['e']) }).set_index('B') + expected = DataFrame({'A': [np.nan], + 'B': Series(['e'])}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['d']) - expected = DataFrame({'A' : [np.nan], - 'B' : Series(['d']) }).set_index('B') + expected = DataFrame({'A': [np.nan], + 'B': Series(['d'])}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # since we are actually reindexing with a Categorical # then return a Categorical cats = list('cabe') - result = self.df2.reindex(pd.Categorical(['a','d'],categories=cats)) - expected = DataFrame({'A' : [0,1,5,np.nan], - 'B' : Series(list('aaad')).astype('category',categories=cats) }).set_index('B') + result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats)) + expected = DataFrame({'A': [0, 1, 5, np.nan], + 'B': Series(list('aaad')).astype( + 'category', categories=cats)}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(pd.Categorical(['a'],categories=cats)) - expected = DataFrame({'A' : [0,1,5], - 'B' : Series(list('aaa')).astype('category',categories=cats) }).set_index('B') + result = self.df2.reindex(pd.Categorical(['a'], categories=cats)) + expected = DataFrame({'A': [0, 1, 5], + 'B': Series(list('aaa')).astype( + 'category', categories=cats)}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(['a','b','e']) - expected = DataFrame({'A' : [0,1,5,2,3,np.nan], - 'B' : Series(list('aaabbe')) }).set_index('B') + result = self.df2.reindex(['a', 'b', 'e']) + expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan], + 'B': Series(list('aaabbe'))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(['a','b']) - expected = DataFrame({'A' : [0,1,5,2,3], - 'B' : Series(list('aaabb')) }).set_index('B') + result = self.df2.reindex(['a', 'b']) + expected = DataFrame({'A': [0, 1, 5, 2, 3], + 'B': Series(list('aaabb'))}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) result = self.df2.reindex(['e']) - expected = DataFrame({'A' : [np.nan], - 'B' : Series(['e']) }).set_index('B') + expected = DataFrame({'A': [np.nan], + 'B': Series(['e'])}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # give back the type of categorical that we received - result = self.df2.reindex(pd.Categorical(['a','d'],categories=cats,ordered=True)) - expected = DataFrame({'A' : [0,1,5,np.nan], - 'B' : Series(list('aaad')).astype('category',categories=cats,ordered=True) }).set_index('B') + result = self.df2.reindex(pd.Categorical( + ['a', 'd'], categories=cats, ordered=True)) + expected = DataFrame( + {'A': [0, 1, 5, np.nan], + 'B': Series(list('aaad')).astype('category', categories=cats, + ordered=True)}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) - result = self.df2.reindex(pd.Categorical(['a','d'],categories=['a','d'])) - expected = DataFrame({'A' : [0,1,5,np.nan], - 'B' : Series(list('aaad')).astype('category',categories=['a','d']) }).set_index('B') + result = self.df2.reindex(pd.Categorical( + ['a', 'd'], categories=['a', 'd'])) + expected = DataFrame({'A': [0, 1, 5, np.nan], + 'B': Series(list('aaad')).astype( + 'category', categories=['a', 'd' + ])}).set_index('B') assert_frame_equal(result, expected, check_index_type=True) # passed duplicate indexers are not allowed - self.assertRaises(ValueError, lambda : self.df2.reindex(['a','a'])) + self.assertRaises(ValueError, lambda: self.df2.reindex(['a', 'a'])) # args NotImplemented ATM - self.assertRaises(NotImplementedError, lambda : self.df2.reindex(['a'],method='ffill')) - self.assertRaises(NotImplementedError, lambda : self.df2.reindex(['a'],level=1)) - self.assertRaises(NotImplementedError, lambda : self.df2.reindex(['a'],limit=2)) + self.assertRaises(NotImplementedError, + lambda: self.df2.reindex(['a'], method='ffill')) + self.assertRaises(NotImplementedError, + lambda: self.df2.reindex(['a'], level=1)) + self.assertRaises(NotImplementedError, + lambda: self.df2.reindex(['a'], limit=2)) def test_loc_slice(self): - # slicing # not implemented ATM # GH9748 - self.assertRaises(TypeError, lambda : self.df.loc[1:5]) + self.assertRaises(TypeError, lambda: self.df.loc[1:5]) - #result = df.loc[1:5] - #expected = df.iloc[[1,2,3,4]] - #assert_frame_equal(result, expected) + # result = df.loc[1:5] + # expected = df.iloc[[1,2,3,4]] + # assert_frame_equal(result, expected) def test_boolean_selection(self): @@ -5164,19 +5914,19 @@ def test_boolean_selection(self): result = df3[df3.index == 'a'] expected = df3.iloc[[]] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) result = df4[df4.index == 'a'] expected = df4.iloc[[]] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) result = df3[df3.index == 1] - expected = df3.iloc[[0,1,3]] - assert_frame_equal(result,expected) + expected = df3.iloc[[0, 1, 3]] + assert_frame_equal(result, expected) result = df4[df4.index == 1] - expected = df4.iloc[[0,1,3]] - assert_frame_equal(result,expected) + expected = df4.iloc[[0, 1, 3]] + assert_frame_equal(result, expected) # since we have an ordered categorical @@ -5186,11 +5936,11 @@ def test_boolean_selection(self): # name=u'B') result = df3[df3.index < 2] expected = df3.iloc[[4]] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) result = df3[df3.index > 1] expected = df3.iloc[[]] - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # unordered # cannot be compared @@ -5199,8 +5949,9 @@ def test_boolean_selection(self): # categories=[3, 2, 1], # ordered=False, # name=u'B') - self.assertRaises(TypeError, lambda : df4[df4.index < 2]) - self.assertRaises(TypeError, lambda : df4[df4.index > 1]) + self.assertRaises(TypeError, lambda: df4[df4.index < 2]) + self.assertRaises(TypeError, lambda: df4[df4.index > 1]) + class TestSeriesNoneCoercion(tm.TestCase): EXPECTED_RESULTS = [ @@ -5224,9 +5975,9 @@ def test_coercion_with_setitem(self): expected_series = Series(expected_result) assert_attr_equal('dtype', start_series, expected_series) - tm.assert_numpy_array_equal( - start_series.values, - expected_series.values, strict_nan=True) + tm.assert_numpy_array_equal(start_series.values, + expected_series.values, + strict_nan=True) def test_coercion_with_loc_setitem(self): for start_data, expected_result in self.EXPECTED_RESULTS: @@ -5236,9 +5987,9 @@ def test_coercion_with_loc_setitem(self): expected_series = Series(expected_result) assert_attr_equal('dtype', start_series, expected_series) - tm.assert_numpy_array_equal( - start_series.values, - expected_series.values, strict_nan=True) + tm.assert_numpy_array_equal(start_series.values, + expected_series.values, + strict_nan=True) def test_coercion_with_setitem_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: @@ -5248,9 +5999,9 @@ def test_coercion_with_setitem_and_series(self): expected_series = Series(expected_result) assert_attr_equal('dtype', start_series, expected_series) - tm.assert_numpy_array_equal( - start_series.values, - expected_series.values, strict_nan=True) + tm.assert_numpy_array_equal(start_series.values, + expected_series.values, + strict_nan=True) def test_coercion_with_loc_and_series(self): for start_data, expected_result in self.EXPECTED_RESULTS: @@ -5260,9 +6011,9 @@ def test_coercion_with_loc_and_series(self): expected_series = Series(expected_result) assert_attr_equal('dtype', start_series, expected_series) - tm.assert_numpy_array_equal( - start_series.values, - expected_series.values, strict_nan=True) + tm.assert_numpy_array_equal(start_series.values, + expected_series.values, + strict_nan=True) class TestDataframeNoneCoercion(tm.TestCase): @@ -5286,54 +6037,63 @@ def test_coercion_with_loc(self): expected_dataframe = DataFrame({'foo': expected_result}) - assert_attr_equal('dtype', start_dataframe['foo'], expected_dataframe['foo']) - tm.assert_numpy_array_equal( - start_dataframe['foo'].values, - expected_dataframe['foo'].values, strict_nan=True) + assert_attr_equal('dtype', start_dataframe['foo'], + expected_dataframe['foo']) + tm.assert_numpy_array_equal(start_dataframe['foo'].values, + expected_dataframe['foo'].values, + strict_nan=True) def test_coercion_with_setitem_and_dataframe(self): for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: start_dataframe = DataFrame({'foo': start_data}) - start_dataframe[start_dataframe['foo'] == start_dataframe['foo'][0]] = None + start_dataframe[start_dataframe['foo'] == start_dataframe['foo'][ + 0]] = None expected_dataframe = DataFrame({'foo': expected_result}) - assert_attr_equal('dtype', start_dataframe['foo'], expected_dataframe['foo']) - tm.assert_numpy_array_equal( - start_dataframe['foo'].values, - expected_dataframe['foo'].values, strict_nan=True) + assert_attr_equal('dtype', start_dataframe['foo'], + expected_dataframe['foo']) + tm.assert_numpy_array_equal(start_dataframe['foo'].values, + expected_dataframe['foo'].values, + strict_nan=True) def test_none_coercion_loc_and_dataframe(self): for start_data, expected_result, in self.EXPECTED_SINGLE_ROW_RESULTS: start_dataframe = DataFrame({'foo': start_data}) - start_dataframe.loc[start_dataframe['foo'] == start_dataframe['foo'][0]] = None + start_dataframe.loc[start_dataframe['foo'] == start_dataframe[ + 'foo'][0]] = None expected_dataframe = DataFrame({'foo': expected_result}) - assert_attr_equal('dtype', start_dataframe['foo'], expected_dataframe['foo']) - tm.assert_numpy_array_equal( - start_dataframe['foo'].values, - expected_dataframe['foo'].values, strict_nan=True) + assert_attr_equal('dtype', start_dataframe['foo'], + expected_dataframe['foo']) + tm.assert_numpy_array_equal(start_dataframe['foo'].values, + expected_dataframe['foo'].values, + strict_nan=True) def test_none_coercion_mixed_dtypes(self): start_dataframe = DataFrame({ 'a': [1, 2, 3], 'b': [1.0, 2.0, 3.0], - 'c': [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)], - 'd': ['a', 'b', 'c']}) + 'c': [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, + 3)], + 'd': ['a', 'b', 'c'] + }) start_dataframe.iloc[0] = None expected_dataframe = DataFrame({ 'a': [np.nan, 2, 3], 'b': [np.nan, 2.0, 3.0], 'c': [NaT, datetime(2000, 1, 2), datetime(2000, 1, 3)], - 'd': [None, 'b', 'c']}) + 'd': [None, 'b', 'c'] + }) for column in expected_dataframe.columns: - assert_attr_equal('dtype', start_dataframe[column], expected_dataframe[column]) - tm.assert_numpy_array_equal( - start_dataframe[column].values, - expected_dataframe[column].values, strict_nan=True) + assert_attr_equal('dtype', start_dataframe[column], + expected_dataframe[column]) + tm.assert_numpy_array_equal(start_dataframe[column].values, + expected_dataframe[column].values, + strict_nan=True) if __name__ == '__main__': diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 23e8aad01bf52..69e05e1f4e7ca 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -8,31 +8,30 @@ import re import itertools -from pandas import Index, MultiIndex, DataFrame, DatetimeIndex, Series, Categorical +from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, + Series, Categorical) from pandas.compat import OrderedDict, lrange from pandas.sparse.array import SparseArray -from pandas.core.internals import (BlockPlacement, SingleBlockManager, make_block, - BlockManager) +from pandas.core.internals import (BlockPlacement, SingleBlockManager, + make_block, BlockManager) import pandas.core.common as com -import pandas.core.internals as internals import pandas.util.testing as tm import pandas as pd -from pandas.util.testing import ( - assert_almost_equal, assert_frame_equal, randn, assert_series_equal) +from pandas.util.testing import (assert_almost_equal, assert_frame_equal, + randn, assert_series_equal) from pandas.compat import zip, u def assert_block_equal(left, right): assert_almost_equal(left.values, right.values) - assert(left.dtype == right.dtype) + assert (left.dtype == right.dtype) assert_almost_equal(left.mgr_locs, right.mgr_locs) def get_numeric_mat(shape): arr = np.arange(shape[0]) - return np.lib.stride_tricks.as_strided( - x=arr, shape=shape, - strides=(arr.itemsize,) + (0,) * (len(shape) - 1)).copy() + return np.lib.stride_tricks.as_strided(x=arr, shape=shape, strides=( + arr.itemsize, ) + (0, ) * (len(shape) - 1)).copy() N = 10 @@ -59,14 +58,13 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): num_items = len(placement) if item_shape is None: - item_shape = (N,) + item_shape = (N, ) - shape = (num_items,) + item_shape + shape = (num_items, ) + item_shape mat = get_numeric_mat(shape) - if typestr in ('float', 'f8', 'f4', 'f2', - 'int', 'i8', 'i4', 'i2', 'i1', + if typestr in ('float', 'f8', 'f4', 'f2', 'int', 'i8', 'i4', 'i2', 'i1', 'uint', 'u8', 'u4', 'u2', 'u1'): values = mat.astype(typestr) + num_offset elif typestr in ('complex', 'c16', 'c8'): @@ -74,7 +72,7 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): elif typestr in ('object', 'string', 'O'): values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset], shape) - elif typestr in ('b','bool',): + elif typestr in ('b', 'bool', ): values = np.ones(shape, dtype=np.bool_) elif typestr in ('datetime', 'dt', 'M8[ns]'): values = (mat * 1e9).astype('M8[ns]') @@ -87,10 +85,11 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): values = DatetimeIndex(np.arange(N) * 1e9, tz=tz) elif typestr in ('timedelta', 'td', 'm8[ns]'): values = (mat * 1).astype('m8[ns]') - elif typestr in ('category',): - values = Categorical([1,1,2,2,3,3,3,3,4,4]) - elif typestr in ('category2',): - values = Categorical(['a','a','a','a','b','b','c','c','c','d']) + elif typestr in ('category', ): + values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) + elif typestr in ('category2', ): + values = Categorical(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd' + ]) elif typestr in ('sparse', 'sparse_na'): # FIXME: doesn't support num_rows != 10 assert shape[-1] == 10 @@ -140,7 +139,7 @@ def create_mgr(descr, item_shape=None): """ if item_shape is None: - item_shape = (N,) + item_shape = (N, ) offset = 0 mgr_items = [] @@ -167,15 +166,16 @@ def create_mgr(descr, item_shape=None): num_offset = 0 for blockstr, placement in block_placements.items(): typestr = blockstr.split('-')[0] - blocks.append(create_block(typestr, placement, item_shape=item_shape, - num_offset=num_offset,)) + blocks.append(create_block(typestr, + placement, + item_shape=item_shape, + num_offset=num_offset, )) num_offset += len(placement) return BlockManager(sorted(blocks, key=lambda b: b.mgr_locs[0]), [mgr_items] + [np.arange(n) for n in item_shape]) - class TestBlock(tm.TestCase): _multiprocess_can_split_ = True @@ -198,7 +198,6 @@ def test_constructor(self): self.assertEqual(int32block.dtype, np.int32) def test_pickle(self): - def _check(blk): assert_block_equal(self.round_trip_pickle(blk), blk) @@ -221,10 +220,8 @@ def test_merge(self): ref_cols = Index(['e', 'a', 'b', 'd', 'f']) - ablock = make_block(avals, - ref_cols.get_indexer(['e', 'b'])) - bblock = make_block(bvals, - ref_cols.get_indexer(['a', 'd'])) + ablock = make_block(avals, ref_cols.get_indexer(['e', 'b'])) + bblock = make_block(bvals, ref_cols.get_indexer(['a', 'd'])) merged = ablock.merge(bblock) assert_almost_equal(merged.mgr_locs, [0, 1, 2, 3]) assert_almost_equal(merged.values[[0, 2]], avals) @@ -284,21 +281,9 @@ def test_split_block_at(self): self.assertEqual(len(bs), 1) self.assertTrue(np.array_equal(bs[0].items, ['a', 'c'])) - bblock = get_bool_ex(['f']) - bs = list(bblock.split_block_at('f')) - self.assertEqual(len(bs), 0) - - def test_get(self): - pass - - def test_set(self): - pass - - def test_fillna(self): - pass - - def test_repr(self): - pass + # bblock = get_bool_ex(['f']) + # bs = list(bblock.split_block_at('f')) + # self.assertEqual(len(bs), 0) class TestDatetimeBlock(tm.TestCase): @@ -312,8 +297,7 @@ def test_try_coerce_arg(self): self.assertTrue(pd.Timestamp(none_coerced) is pd.NaT) # coerce different types of date bojects - vals = (np.datetime64('2010-10-10'), - datetime(2010, 10, 10), + vals = (np.datetime64('2010-10-10'), datetime(2010, 10, 10), date(2010, 10, 10)) for val in vals: coerced = block._try_coerce_args(block.values, val)[2] @@ -325,9 +309,10 @@ class TestBlockManager(tm.TestCase): _multiprocess_can_split_ = True def setUp(self): - self.mgr = create_mgr('a: f8; b: object; c: f8; d: object; e: f8;' - 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' - 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') + self.mgr = create_mgr( + 'a: f8; b: object; c: f8; d: object; e: f8;' + 'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;' + 'k: M8[ns, US/Eastern]; l: M8[ns, CET];') def test_constructor_corner(self): pass @@ -352,8 +337,8 @@ def test_is_indexed_like(self): self.assertTrue(mgr1._is_indexed_like(mgr2)) self.assertTrue(mgr1._is_indexed_like(mgr3)) - self.assertFalse(mgr1._is_indexed_like( - mgr1.get_slice(slice(-1), axis=1))) + self.assertFalse(mgr1._is_indexed_like(mgr1.get_slice( + slice(-1), axis=1))) def test_duplicate_ref_loc_failure(self): tmp_mgr = create_mgr('a:bool; a: f8') @@ -421,8 +406,7 @@ def test_get_scalar(self): def test_get(self): cols = Index(list('abc')) values = np.random.rand(3, 3) - block = make_block(values=values.copy(), - placement=np.arange(3)) + block = make_block(values=values.copy(), placement=np.arange(3)) mgr = BlockManager(blocks=[block], axes=[cols, np.arange(3)]) assert_almost_equal(mgr.get('a', fastpath=False), values[0]) @@ -433,7 +417,7 @@ def test_get(self): assert_almost_equal(mgr.get('c').internal_values(), values[2]) def test_set(self): - mgr = create_mgr('a,b,c: int', item_shape=(3,)) + mgr = create_mgr('a,b,c: int', item_shape=(3, )) mgr.set('d', np.array(['foo'] * 3)) mgr.set('b', np.array(['bar'] * 3)) @@ -467,16 +451,17 @@ def test_set_change_dtype(self): mgr2.set('quux', randn(N)) self.assertEqual(mgr2.get('quux').dtype, np.float_) - def test_set_change_dtype_slice(self): # GH8850 - cols = MultiIndex.from_tuples([('1st','a'), ('2nd','b'), ('3rd','c')]) + def test_set_change_dtype_slice(self): # GH8850 + cols = MultiIndex.from_tuples([('1st', 'a'), ('2nd', 'b'), ('3rd', 'c') + ]) df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols) df['2nd'] = df['2nd'] * 2.0 self.assertEqual(sorted(df.blocks.keys()), ['float64', 'int64']) - assert_frame_equal(df.blocks['float64'], - DataFrame([[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])) - assert_frame_equal(df.blocks['int64'], - DataFrame([[3], [6]], columns=cols[2:])) + assert_frame_equal(df.blocks['float64'], DataFrame( + [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2])) + assert_frame_equal(df.blocks['int64'], DataFrame( + [[3], [6]], columns=cols[2:])) def test_copy(self): cp = self.mgr.copy(deep=False) @@ -489,14 +474,14 @@ def test_copy(self): cp = self.mgr.copy(deep=True) for blk, cp_blk in zip(self.mgr.blocks, cp.blocks): - # copy assertion - # we either have a None for a base or in case of some blocks it is an array (e.g. datetimetz), - # but was copied + # copy assertion we either have a None for a base or in case of + # some blocks it is an array (e.g. datetimetz), but was copied self.assertTrue(cp_blk.equals(blk)) if cp_blk.values.base is not None and blk.values.base is not None: self.assertFalse(cp_blk.values.base is blk.values.base) else: - self.assertTrue(cp_blk.values.base is None and blk.values.base is None) + self.assertTrue(cp_blk.values.base is None and blk.values.base + is None) def test_sparse(self): mgr = create_mgr('a: sparse-1; b: sparse-2') @@ -592,11 +577,11 @@ def _compare(old_mgr, new_mgr): # noops mgr = create_mgr('f: i8; g: f8') new_mgr = mgr.convert() - _compare(mgr,new_mgr) + _compare(mgr, new_mgr) mgr = create_mgr('a, b: object; f: i8; g: f8') new_mgr = mgr.convert() - _compare(mgr,new_mgr) + _compare(mgr, new_mgr) # convert mgr = create_mgr('a,b,foo: object; f: i8; g: f8') @@ -628,53 +613,53 @@ def _compare(old_mgr, new_mgr): def test_interleave(self): - # self - for dtype in ['f8','i8','object','bool','complex','M8[ns]','m8[ns]']: + for dtype in ['f8', 'i8', 'object', 'bool', 'complex', 'M8[ns]', + 'm8[ns]']: mgr = create_mgr('a: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype,dtype) + self.assertEqual(mgr.as_matrix().dtype, dtype) mgr = create_mgr('a: {0}; b: {0}'.format(dtype)) - self.assertEqual(mgr.as_matrix().dtype,dtype) + self.assertEqual(mgr.as_matrix().dtype, dtype) # will be converted according the actual dtype of the underlying mgr = create_mgr('a: category') - self.assertEqual(mgr.as_matrix().dtype,'i8') + self.assertEqual(mgr.as_matrix().dtype, 'i8') mgr = create_mgr('a: category; b: category') - self.assertEqual(mgr.as_matrix().dtype,'i8'), + self.assertEqual(mgr.as_matrix().dtype, 'i8'), mgr = create_mgr('a: category; b: category2') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: category2') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: category2; b: category2') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') # combinations mgr = create_mgr('a: f8') - self.assertEqual(mgr.as_matrix().dtype,'f8') + self.assertEqual(mgr.as_matrix().dtype, 'f8') mgr = create_mgr('a: f8; b: i8') - self.assertEqual(mgr.as_matrix().dtype,'f8') + self.assertEqual(mgr.as_matrix().dtype, 'f8') mgr = create_mgr('a: f4; b: i8') - self.assertEqual(mgr.as_matrix().dtype,'f4') + self.assertEqual(mgr.as_matrix().dtype, 'f4') mgr = create_mgr('a: f4; b: i8; d: object') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: bool; b: i8') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: complex') - self.assertEqual(mgr.as_matrix().dtype,'complex') + self.assertEqual(mgr.as_matrix().dtype, 'complex') mgr = create_mgr('a: f8; b: category') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: M8[ns]; b: category') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: M8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: M8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: m8[ns]; b: bool') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: m8[ns]; b: i8') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') mgr = create_mgr('a: M8[ns]; b: m8[ns]') - self.assertEqual(mgr.as_matrix().dtype,'object') + self.assertEqual(mgr.as_matrix().dtype, 'object') def test_interleave_non_unique_cols(self): df = DataFrame([ @@ -718,20 +703,32 @@ def test_reindex_items(self): reindexed = mgr.reindex_axis(['g', 'c', 'a', 'd'], axis=0) self.assertEqual(reindexed.nblocks, 2) assert_almost_equal(reindexed.items, ['g', 'c', 'a', 'd']) - assert_almost_equal(mgr.get('g',fastpath=False), reindexed.get('g',fastpath=False)) - assert_almost_equal(mgr.get('c',fastpath=False), reindexed.get('c',fastpath=False)) - assert_almost_equal(mgr.get('a',fastpath=False), reindexed.get('a',fastpath=False)) - assert_almost_equal(mgr.get('d',fastpath=False), reindexed.get('d',fastpath=False)) - assert_almost_equal(mgr.get('g').internal_values(), reindexed.get('g').internal_values()) - assert_almost_equal(mgr.get('c').internal_values(), reindexed.get('c').internal_values()) - assert_almost_equal(mgr.get('a').internal_values(), reindexed.get('a').internal_values()) - assert_almost_equal(mgr.get('d').internal_values(), reindexed.get('d').internal_values()) + assert_almost_equal( + mgr.get('g', fastpath=False), reindexed.get('g', fastpath=False)) + assert_almost_equal( + mgr.get('c', fastpath=False), reindexed.get('c', fastpath=False)) + assert_almost_equal( + mgr.get('a', fastpath=False), reindexed.get('a', fastpath=False)) + assert_almost_equal( + mgr.get('d', fastpath=False), reindexed.get('d', fastpath=False)) + assert_almost_equal( + mgr.get('g').internal_values(), + reindexed.get('g').internal_values()) + assert_almost_equal( + mgr.get('c').internal_values(), + reindexed.get('c').internal_values()) + assert_almost_equal( + mgr.get('a').internal_values(), + reindexed.get('a').internal_values()) + assert_almost_equal( + mgr.get('d').internal_values(), + reindexed.get('d').internal_values()) def test_multiindex_xs(self): mgr = create_mgr('a,b,c: f8; d,e,f: i8') - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) @@ -745,48 +742,63 @@ def test_multiindex_xs(self): def test_get_numeric_data(self): mgr = create_mgr('int: int; float: float; complex: complex;' 'str: object; bool: bool; obj: object; dt: datetime', - item_shape=(3,)) + item_shape=(3, )) mgr.set('obj', np.array([1, 2, 3], dtype=np.object_)) numeric = mgr.get_numeric_data() assert_almost_equal(numeric.items, ['int', 'float', 'complex', 'bool']) - assert_almost_equal(mgr.get('float',fastpath=False), numeric.get('float',fastpath=False)) - assert_almost_equal(mgr.get('float').internal_values(), numeric.get('float').internal_values()) + assert_almost_equal( + mgr.get('float', fastpath=False), numeric.get('float', + fastpath=False)) + assert_almost_equal( + mgr.get('float').internal_values(), + numeric.get('float').internal_values()) # Check sharing numeric.set('float', np.array([100., 200., 300.])) - assert_almost_equal(mgr.get('float',fastpath=False), np.array([100., 200., 300.])) - assert_almost_equal(mgr.get('float').internal_values(), np.array([100., 200., 300.])) + assert_almost_equal( + mgr.get('float', fastpath=False), np.array([100., 200., 300.])) + assert_almost_equal( + mgr.get('float').internal_values(), np.array([100., 200., 300.])) numeric2 = mgr.get_numeric_data(copy=True) assert_almost_equal(numeric.items, ['int', 'float', 'complex', 'bool']) numeric2.set('float', np.array([1000., 2000., 3000.])) - assert_almost_equal(mgr.get('float',fastpath=False), np.array([100., 200., 300.])) - assert_almost_equal(mgr.get('float').internal_values(), np.array([100., 200., 300.])) + assert_almost_equal( + mgr.get('float', fastpath=False), np.array([100., 200., 300.])) + assert_almost_equal( + mgr.get('float').internal_values(), np.array([100., 200., 300.])) def test_get_bool_data(self): mgr = create_mgr('int: int; float: float; complex: complex;' 'str: object; bool: bool; obj: object; dt: datetime', - item_shape=(3,)) + item_shape=(3, )) mgr.set('obj', np.array([True, False, True], dtype=np.object_)) bools = mgr.get_bool_data() assert_almost_equal(bools.items, ['bool']) - assert_almost_equal(mgr.get('bool',fastpath=False), bools.get('bool',fastpath=False)) - assert_almost_equal(mgr.get('bool').internal_values(), bools.get('bool').internal_values()) + assert_almost_equal( + mgr.get('bool', fastpath=False), bools.get('bool', fastpath=False)) + assert_almost_equal( + mgr.get('bool').internal_values(), + bools.get('bool').internal_values()) bools.set('bool', np.array([True, False, True])) - assert_almost_equal(mgr.get('bool',fastpath=False), [True, False, True]) - assert_almost_equal(mgr.get('bool').internal_values(), [True, False, True]) + assert_almost_equal( + mgr.get('bool', fastpath=False), [True, False, True]) + assert_almost_equal( + mgr.get('bool').internal_values(), [True, False, True]) # Check sharing bools2 = mgr.get_bool_data(copy=True) bools2.set('bool', np.array([False, True, False])) - assert_almost_equal(mgr.get('bool',fastpath=False), [True, False, True]) - assert_almost_equal(mgr.get('bool').internal_values(), [True, False, True]) + assert_almost_equal( + mgr.get('bool', fastpath=False), [True, False, True]) + assert_almost_equal( + mgr.get('bool').internal_values(), [True, False, True]) def test_unicode_repr_doesnt_raise(self): - str_repr = repr(create_mgr(u('b,\u05d0: object'))) + repr(create_mgr(u('b,\u05d0: object'))) def test_missing_unicode_key(self): df = DataFrame({"a": [1]}) @@ -809,12 +821,12 @@ def test_equals_block_order_different_dtypes(self): # GH 9330 mgr_strings = [ - "a:i8;b:f8", # basic case - "a:i8;b:f8;c:c8;d:b", # many types - "a:i8;e:dt;f:td;g:string", # more types - "a:i8;b:category;c:category2;d:category2", # categories - "c:sparse;d:sparse_na;b:f8", # sparse - ] + "a:i8;b:f8", # basic case + "a:i8;b:f8;c:c8;d:b", # many types + "a:i8;e:dt;f:td;g:string", # more types + "a:i8;b:category;c:category2;d:category2", # categories + "c:sparse;d:sparse_na;b:f8", # sparse + ] for mgr_string in mgr_strings: bm = create_mgr(mgr_string) @@ -841,7 +853,7 @@ class TestIndexing(object): MANAGERS = [ create_single_mgr('f8', N), create_single_mgr('i8', N), - #create_single_mgr('sparse', N), + # create_single_mgr('sparse', N), create_single_mgr('sparse_na', N), # 2-dim @@ -849,7 +861,7 @@ class TestIndexing(object): create_mgr('a,b,c,d,e,f: i8', item_shape=(N,)), create_mgr('a,b: f8; c,d: i8; e,f: string', item_shape=(N,)), create_mgr('a,b: f8; c,d: i8; e,f: f8', item_shape=(N,)), - #create_mgr('a: sparse', item_shape=(N,)), + # create_mgr('a: sparse', item_shape=(N,)), create_mgr('a: sparse_na', item_shape=(N,)), # 3-dim @@ -872,9 +884,10 @@ def assert_slice_ok(mgr, axis, slobj): if isinstance(slobj, np.ndarray): ax = mgr.axes[axis] if len(ax) and len(slobj) and len(slobj) != len(ax): - slobj = np.concatenate([slobj, np.zeros(len(ax)-len(slobj),dtype=bool)]) + slobj = np.concatenate([slobj, np.zeros( + len(ax) - len(slobj), dtype=bool)]) sliced = mgr.get_slice(slobj, axis=axis) - mat_slobj = (slice(None),) * axis + (slobj,) + mat_slobj = (slice(None), ) * axis + (slobj, ) assert_almost_equal(mat[mat_slobj], sliced.as_matrix()) assert_almost_equal(mgr.axes[axis][slobj], sliced.axes[axis]) @@ -897,8 +910,8 @@ def assert_slice_ok(mgr, axis, slobj): if mgr.shape[ax] >= 3: yield (assert_slice_ok, mgr, ax, np.arange(mgr.shape[ax]) % 3 == 0) - yield (assert_slice_ok, mgr, ax, - np.array([True, True, False], dtype=np.bool_)) + yield (assert_slice_ok, mgr, ax, np.array( + [True, True, False], dtype=np.bool_)) # fancy indexer yield assert_slice_ok, mgr, ax, [] @@ -912,10 +925,8 @@ def test_take(self): def assert_take_ok(mgr, axis, indexer): mat = mgr.as_matrix() taken = mgr.take(indexer, axis) - assert_almost_equal(np.take(mat, indexer, axis), - taken.as_matrix()) - assert_almost_equal(mgr.axes[axis].take(indexer), - taken.axes[axis]) + assert_almost_equal(np.take(mat, indexer, axis), taken.as_matrix()) + assert_almost_equal(mgr.axes[axis].take(indexer), taken.axes[axis]) for mgr in self.MANAGERS: for ax in range(mgr.ndim): @@ -929,8 +940,7 @@ def assert_take_ok(mgr, axis, indexer): yield assert_take_ok, mgr, ax, [-1, -2, -3] def test_reindex_axis(self): - def assert_reindex_axis_is_ok(mgr, axis, new_labels, - fill_value): + def assert_reindex_axis_is_ok(mgr, axis, new_labels, fill_value): mat = mgr.as_matrix() indexer = mgr.axes[axis].get_indexer_for(new_labels) @@ -945,8 +955,8 @@ def assert_reindex_axis_is_ok(mgr, axis, new_labels, for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): yield assert_reindex_axis_is_ok, mgr, ax, [], fill_value - yield (assert_reindex_axis_is_ok, mgr, ax, - mgr.axes[ax], fill_value) + yield (assert_reindex_axis_is_ok, mgr, ax, mgr.axes[ax], + fill_value) yield (assert_reindex_axis_is_ok, mgr, ax, mgr.axes[ax][[0, 0, 0]], fill_value) yield (assert_reindex_axis_is_ok, mgr, ax, @@ -976,20 +986,18 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, for mgr in self.MANAGERS: for ax in range(mgr.ndim): for fill_value in (None, np.nan, 100.): - yield (assert_reindex_indexer_is_ok, mgr, ax, - [], [], fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - ['foo'] * mgr.shape[ax], np.arange(mgr.shape[ax]), + yield (assert_reindex_indexer_is_ok, mgr, ax, [], [], fill_value) + yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax], + np.arange(mgr.shape[ax]), fill_value) + yield (assert_reindex_indexer_is_ok, mgr, ax, ['foo'] * + mgr.shape[ax], np.arange(mgr.shape[ax]), fill_value) yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax][::-1], np.arange(mgr.shape[ax]), fill_value) - yield (assert_reindex_indexer_is_ok, mgr, ax, - mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], - fill_value) + yield (assert_reindex_indexer_is_ok, mgr, ax, mgr.axes[ax], + np.arange(mgr.shape[ax])[::-1], fill_value) yield (assert_reindex_indexer_is_ok, mgr, ax, ['foo', 'bar', 'baz'], [0, 0, 0], fill_value) yield (assert_reindex_indexer_is_ok, mgr, ax, @@ -1002,7 +1010,6 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, yield (assert_reindex_indexer_is_ok, mgr, ax, ['foo', 'bar', 'baz'], [0, 1, 2], fill_value) - # test_get_slice(slice_like, axis) # take(indexer, axis) # reindex_axis(new_labels, axis) @@ -1092,7 +1099,7 @@ def test_slice_iter(self): self.assertEqual(list(BlockPlacement(slice(3, 0, -1))), [3, 2, 1]) self.assertEqual(list(BlockPlacement(slice(3, None, -1))), - [3, 2, 1, 0]) + [3, 2, 1, 0]) def test_slice_to_array_conversion(self): def assert_as_array_equals(slc, asarray): @@ -1111,15 +1118,12 @@ def assert_as_array_equals(slc, asarray): def test_blockplacement_add(self): bpl = BlockPlacement(slice(0, 5)) self.assertEqual(bpl.add(1).as_slice, slice(1, 6, 1)) - self.assertEqual(bpl.add(np.arange(5)).as_slice, - slice(0, 10, 2)) - self.assertEqual(list(bpl.add(np.arange(5, 0, -1))), - [5, 5, 5, 5, 5]) + self.assertEqual(bpl.add(np.arange(5)).as_slice, slice(0, 10, 2)) + self.assertEqual(list(bpl.add(np.arange(5, 0, -1))), [5, 5, 5, 5, 5]) def test_blockplacement_add_int(self): def assert_add_equals(val, inc, result): - self.assertEqual(list(BlockPlacement(val).add(inc)), - result) + self.assertEqual(list(BlockPlacement(val).add(inc)), result) assert_add_equals(slice(0, 0), 0, []) assert_add_equals(slice(1, 4), 0, [1, 2, 3]) @@ -1145,13 +1149,7 @@ def assert_add_equals(val, inc, result): self.assertRaises(ValueError, lambda: BlockPlacement(slice(2, None, -1)).add(-1)) - # def test_blockplacement_array_add(self): - - # assert_add_equals(slice(0, 2), [0, 1, 1], [0, 2, 3]) - # assert_add_equals(slice(2, None, -1), [1, 1, 0], [3, 2, 0]) - if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_lib.py b/pandas/tests/test_lib.py index a24f71482c404..fc0030718f2c9 100644 --- a/pandas/tests/test_lib.py +++ b/pandas/tests/test_lib.py @@ -53,16 +53,19 @@ def test_maybe_indices_to_slice_left_edge(self): indices = np.arange(0, end, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + self.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + self.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # not slice - for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]: + for case in [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], + [2, 0, -2]]: indices = np.array(case, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertFalse(isinstance(maybe_slice, slice)) @@ -78,13 +81,15 @@ def test_maybe_indices_to_slice_right_edge(self): indices = np.arange(start, 99, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + self.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + self.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # not slice indices = np.array([97, 98, 99, 100], dtype=np.int64) @@ -145,13 +150,15 @@ def test_maybe_indices_to_slice_middle(self): indices = np.arange(start, end, step, dtype=np.int64) maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + self.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # reverse indices = indices[::-1] maybe_slice = lib.maybe_indices_to_slice(indices, len(target)) self.assertTrue(isinstance(maybe_slice, slice)) - self.assert_numpy_array_equal(target[indices], target[maybe_slice]) + self.assert_numpy_array_equal(target[indices], + target[maybe_slice]) # not slice for case in [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]: @@ -162,7 +169,7 @@ def test_maybe_indices_to_slice_middle(self): self.assert_numpy_array_equal(target[indices], target[maybe_slice]) def test_isinf_scalar(self): - #GH 11352 + # GH 11352 self.assertTrue(lib.isposinf_scalar(float('inf'))) self.assertTrue(lib.isposinf_scalar(np.inf)) self.assertFalse(lib.isposinf_scalar(-np.inf)) @@ -175,6 +182,7 @@ def test_isinf_scalar(self): self.assertFalse(lib.isneginf_scalar(1)) self.assertFalse(lib.isneginf_scalar('a')) + class Testisscalar(tm.TestCase): def test_isscalar_builtin_scalars(self): @@ -197,7 +205,7 @@ def test_isscalar_builtin_nonscalars(self): self.assertFalse(lib.isscalar([])) self.assertFalse(lib.isscalar([1])) self.assertFalse(lib.isscalar(())) - self.assertFalse(lib.isscalar((1,))) + self.assertFalse(lib.isscalar((1, ))) self.assertFalse(lib.isscalar(slice(None))) self.assertFalse(lib.isscalar(Ellipsis)) @@ -213,8 +221,7 @@ def test_isscalar_numpy_array_scalars(self): self.assertTrue(lib.isscalar(np.timedelta64(1, 'h'))) def test_isscalar_numpy_zerodim_arrays(self): - for zerodim in [np.array(1), - np.array('foobar'), + for zerodim in [np.array(1), np.array('foobar'), np.array(np.datetime64('2014-01-01')), np.array(np.timedelta64(1, 'h'))]: self.assertFalse(lib.isscalar(zerodim)) diff --git a/pandas/tests/test_msgpack/test_case.py b/pandas/tests/test_msgpack/test_case.py index 187668b242495..5e8bbff390d07 100644 --- a/pandas/tests/test_msgpack/test_case.py +++ b/pandas/tests/test_msgpack/test_case.py @@ -10,68 +10,75 @@ def check(length, obj): "%r length should be %r but get %r" % (obj, length, len(v)) assert unpackb(v, use_list=0) == obj + def test_1(): for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, - -((1<<5)-1), -(1<<5)]: + -((1 << 5) - 1), -(1 << 5)]: check(1, o) + def test_2(): - for o in [1 << 7, (1 << 8) - 1, - -((1<<5)+1), -(1<<7) - ]: + for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]: check(2, o) + def test_3(): - for o in [1 << 8, (1 << 16) - 1, - -((1<<7)+1), -(1<<15)]: + for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]: check(3, o) + def test_5(): - for o in [1 << 16, (1 << 32) - 1, - -((1<<15)+1), -(1<<31)]: + for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]: check(5, o) + def test_9(): - for o in [1 << 32, (1 << 64) - 1, - -((1<<31)+1), -(1<<63), - 1.0, 0.1, -0.1, -1.0]: + for o in [1 << 32, (1 << 64) - 1, -((1 << 31) + 1), -(1 << 63), 1.0, 0.1, + -0.1, -1.0]: check(9, o) def check_raw(overhead, num): check(num + overhead, b" " * num) + def test_fixraw(): check_raw(1, 0) - check_raw(1, (1<<5) - 1) + check_raw(1, (1 << 5) - 1) + def test_raw16(): - check_raw(3, 1<<5) - check_raw(3, (1<<16) - 1) + check_raw(3, 1 << 5) + check_raw(3, (1 << 16) - 1) + def test_raw32(): - check_raw(5, 1<<16) + check_raw(5, 1 << 16) def check_array(overhead, num): - check(num + overhead, (None,) * num) + check(num + overhead, (None, ) * num) + def test_fixarray(): check_array(1, 0) check_array(1, (1 << 4) - 1) + def test_array16(): check_array(3, 1 << 4) - check_array(3, (1<<16)-1) + check_array(3, (1 << 16) - 1) + def test_array32(): - check_array(5, (1<<16)) + check_array(5, (1 << 16)) def match(obj, buf): assert packb(obj) == buf assert unpackb(buf, use_list=0) == obj + def test_match(): cases = [ (None, b'\xc0'), @@ -84,19 +91,26 @@ def test_match(): (-1, b'\xff'), (-33, b'\xd0\xdf'), (-129, b'\xd1\xff\x7f'), - ({1:1}, b'\x81\x01\x01'), + ({1: 1}, b'\x81\x01\x01'), (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), ((), b'\x90'), - (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), - (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), + (tuple(range(15)), (b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" + b"\x0a\x0b\x0c\x0d\x0e")), + (tuple(range(16)), (b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07" + b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f")), ({}, b'\x80'), - (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), - (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), - ] + (dict([(x, x) for x in range(15)]), + (b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07' + b'\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e')), + (dict([(x, x) for x in range(16)]), + (b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06' + b'\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e' + b'\x0f\x0f')), + ] for v, p in cases: match(v, p) + def test_unicode(): assert unpackb(packb('foobar'), use_list=1) == b'foobar' - diff --git a/pandas/tests/test_msgpack/test_except.py b/pandas/tests/test_msgpack/test_except.py index a0239336ca20d..79290ebb891fd 100644 --- a/pandas/tests/test_msgpack/test_except.py +++ b/pandas/tests/test_msgpack/test_except.py @@ -2,14 +2,13 @@ # coding: utf-8 import unittest -import nose - -import datetime from pandas.msgpack import packb, unpackb + class DummyException(Exception): pass + class TestExceptions(unittest.TestCase): def test_raise_on_find_unsupported_value(self): @@ -19,11 +18,17 @@ def test_raise_on_find_unsupported_value(self): def test_raise_from_object_hook(self): def hook(obj): raise DummyException + self.assertRaises(DummyException, unpackb, packb({}), object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) - self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) + self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), + object_hook=hook) + self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), + object_pairs_hook=hook) + self.assertRaises(DummyException, unpackb, + packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) + self.assertRaises(DummyException, unpackb, + packb({'fizz': {'buzz': 'spam'}}), + object_pairs_hook=hook) def test_invalidvalue(self): self.assertRaises(ValueError, unpackb, b'\xd9\x97#DL_') diff --git a/pandas/tests/test_msgpack/test_extension.py b/pandas/tests/test_msgpack/test_extension.py index 3172605c0aae1..97f0962a753d9 100644 --- a/pandas/tests/test_msgpack/test_extension.py +++ b/pandas/tests/test_msgpack/test_extension.py @@ -9,40 +9,42 @@ def p(s): packer = msgpack.Packer() packer.pack_ext_type(0x42, s) return packer.bytes() - assert p(b'A') == b'\xd4\x42A' # fixext 1 - assert p(b'AB') == b'\xd5\x42AB' # fixext 2 - assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 - assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 - assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 - assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 - assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 - assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 + + assert p(b'A') == b'\xd4\x42A' # fixext 1 + assert p(b'AB') == b'\xd5\x42AB' # fixext 2 + assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 + assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 + assert p(b'A' * 16) == b'\xd8\x42' + b'A' * 16 # fixext 16 + assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 + assert p(b'A' * 0x0123) == b'\xc8\x01\x23\x42' + b'A' * 0x0123 # ext 16 + assert (p(b'A' * 0x00012345) == + b'\xc9\x00\x01\x23\x45\x42' + b'A' * 0x00012345) # ext 32 def test_unpack_ext_type(): def check(b, expected): assert msgpack.unpackb(b) == expected - check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 - check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 - check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 - check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 - check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 - check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 - check(b'\xc8\x01\x23\x42' + b'A'*0x0123, - ExtType(0x42, b'A'*0x0123)) # ext 16 - check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, - ExtType(0x42, b'A'*0x00012345)) # ext 32 + check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 + check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 + check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 + check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 + check(b'\xd8\x42' + b'A' * 16, ExtType(0x42, b'A' * 16)) # fixext 16 + check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 + check(b'\xc8\x01\x23\x42' + b'A' * 0x0123, + ExtType(0x42, b'A' * 0x0123)) # ext 16 + check(b'\xc9\x00\x01\x23\x45\x42' + b'A' * 0x00012345, + ExtType(0x42, b'A' * 0x00012345)) # ext 32 def test_extension_type(): def default(obj): print('default called', obj) if isinstance(obj, array.array): - typecode = 123 # application specific typecode + typecode = 123 # application specific typecode data = obj.tostring() return ExtType(typecode, data) - raise TypeError("Unknwon type object %r" % (obj,)) + raise TypeError("Unknwon type object %r" % (obj, )) def ext_hook(code, data): print('ext_hook called', code, data) diff --git a/pandas/tests/test_msgpack/test_format.py b/pandas/tests/test_msgpack/test_format.py index 706c48436d7d3..203726ae6a5f9 100644 --- a/pandas/tests/test_msgpack/test_format.py +++ b/pandas/tests/test_msgpack/test_format.py @@ -3,68 +3,90 @@ from pandas.msgpack import unpackb + def check(src, should, use_list=0): assert unpackb(src, use_list=use_list) == should + def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", - (None, False, True,)) + check(b"\x93\xc0\xc2\xc3", (None, False, True, )) + def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", - ((0,64,127,), (-32,-16,-1,),) - ) + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, + 64, + 127, ), + (-32, + -16, + -1, ), )) + def testFixArray(): - check(b"\x92\x90\x91\x91\xc0", - ((),((None,),),), - ) + check(b"\x92\x90\x91\x91\xc0", ((), ((None, ), ), ), ) + def testFixRaw(): - check(b"\x94\xa0\xa1a\xa2bc\xa3def", - (b"", b"a", b"bc", b"def",), - ) + check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def", ), ) + def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", - {False: {None: None}, True:{None:{}}}, - ) + check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", + {False: {None: None}, + True: {None: {}}}, ) + def testUnsignedInt(): - check( - b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" + check(b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), - ) + (0, + 128, + 255, + 0, + 32768, + 65535, + 0, + 2147483648, + 4294967295, ), ) + def testSignedInt(): check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) + b"\xd2\xff\xff\xff\xff", (0, + -128, + -1, + 0, + -32768, + -1, + 0, + -2147483648, + -1, )) + def testRaw(): check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" - b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - (b"", b"a", b"ab", b"", b"a", b"ab")) + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + (b"", b"a", b"ab", b"", b"a", b"ab")) + def testArray(): check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" - b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" - b"\xc2\xc3", - ((), (None,), (False,True), (), (None,), (False,True)) - ) + b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" + b"\xc2\xc3", ((), (None, ), (False, True), (), (None, ), + (False, True))) + def testMap(): - check( - b"\x96" - b"\xde\x00\x00" - b"\xde\x00\x01\xc0\xc2" - b"\xde\x00\x02\xc0\xc2\xc3\xc2" - b"\xdf\x00\x00\x00\x00" - b"\xdf\x00\x00\x00\x01\xc0\xc2" - b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ({}, {None: False}, {True: False, None: False}, {}, - {None: False}, {True: False, None: False})) + check(b"\x96" + b"\xde\x00\x00" + b"\xde\x00\x01\xc0\xc2" + b"\xde\x00\x02\xc0\xc2\xc3\xc2" + b"\xdf\x00\x00\x00\x00" + b"\xdf\x00\x00\x00\x01\xc0\xc2" + b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", ({}, {None: False}, + {True: False, + None: False}, {}, + {None: False}, + {True: False, + None: False})) diff --git a/pandas/tests/test_msgpack/test_limits.py b/pandas/tests/test_msgpack/test_limits.py index d9aa957182d65..2cf52aae65f2a 100644 --- a/pandas/tests/test_msgpack/test_limits.py +++ b/pandas/tests/test_msgpack/test_limits.py @@ -1,33 +1,33 @@ #!/usr/bin/env python # coding: utf-8 -from __future__ import absolute_import, division, print_function, unicode_literals +from __future__ import (absolute_import, division, print_function, + unicode_literals) import pandas.util.testing as tm from pandas.msgpack import packb, unpackb, Packer, Unpacker, ExtType + class TestLimits(tm.TestCase): + def test_integer(self): x = -(2 ** 63) assert unpackb(packb(x)) == x - self.assertRaises((OverflowError, ValueError), packb, x-1) + self.assertRaises((OverflowError, ValueError), packb, x - 1) x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - self.assertRaises((OverflowError, ValueError), packb, x+1) - + self.assertRaises((OverflowError, ValueError), packb, x + 1) def test_array_header(self): packer = Packer() - packer.pack_array_header(2**32-1) + packer.pack_array_header(2 ** 32 - 1) self.assertRaises((OverflowError, ValueError), - packer.pack_array_header, 2**32) - + packer.pack_array_header, 2 ** 32) def test_map_header(self): packer = Packer() - packer.pack_map_header(2**32-1) + packer.pack_map_header(2 ** 32 - 1) self.assertRaises((OverflowError, ValueError), - packer.pack_array_header, 2**32) - + packer.pack_array_header, 2 ** 32) def test_max_str_len(self): d = 'x' * 3 @@ -41,7 +41,6 @@ def test_max_str_len(self): unpacker.feed(packed) self.assertRaises(ValueError, unpacker.unpack) - def test_max_bin_len(self): d = b'x' * 3 packed = packb(d, use_bin_type=True) @@ -54,7 +53,6 @@ def test_max_bin_len(self): unpacker.feed(packed) self.assertRaises(ValueError, unpacker.unpack) - def test_max_array_len(self): d = [1, 2, 3] packed = packb(d) @@ -67,7 +65,6 @@ def test_max_array_len(self): unpacker.feed(packed) self.assertRaises(ValueError, unpacker.unpack) - def test_max_map_len(self): d = {1: 2, 3: 4, 5: 6} packed = packb(d) @@ -80,7 +77,6 @@ def test_max_map_len(self): unpacker.feed(packed) self.assertRaises(ValueError, unpacker.unpack) - def test_max_ext_len(self): d = ExtType(42, b"abc") packed = packb(d) diff --git a/pandas/tests/test_msgpack/test_newspec.py b/pandas/tests/test_msgpack/test_newspec.py index 8532ab8cfb1a4..4eb9a0425c57b 100644 --- a/pandas/tests/test_msgpack/test_newspec.py +++ b/pandas/tests/test_msgpack/test_newspec.py @@ -66,23 +66,27 @@ def test_bin32(): assert b[5:] == data assert unpackb(b) == data + def test_ext(): def check(ext, packed): assert packb(ext) == packed assert unpackb(packed) == ext - check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 - check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 - check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 - check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 - check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 + + check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 + check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 + check(ExtType(0x42, b'Z' * 4), b'\xd6\x42' + b'Z' * 4) # fixext 4 + check(ExtType(0x42, b'Z' * 8), b'\xd7\x42' + b'Z' * 8) # fixext 8 + check(ExtType(0x42, b'Z' * 16), b'\xd8\x42' + b'Z' * 16) # fixext 16 # ext 8 check(ExtType(0x42, b''), b'\xc7\x00\x42') - check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) + check(ExtType(0x42, b'Z' * 255), b'\xc7\xff\x42' + b'Z' * 255) # ext 16 - check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) - check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) + check(ExtType(0x42, b'Z' * 256), b'\xc8\x01\x00\x42' + b'Z' * 256) + check(ExtType(0x42, b'Z' * 0xffff), b'\xc8\xff\xff\x42' + b'Z' * 0xffff) # ext 32 - check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) + check( + ExtType(0x42, b'Z' * + 0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z' * 0x10000) # needs large memory - #check(ExtType(0x42, b'Z'*0xffffffff), + # check(ExtType(0x42, b'Z'*0xffffffff), # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/pandas/tests/test_msgpack/test_obj.py b/pandas/tests/test_msgpack/test_obj.py index 886fec522d4f3..bcc76929fe8f8 100644 --- a/pandas/tests/test_msgpack/test_obj.py +++ b/pandas/tests/test_msgpack/test_obj.py @@ -1,14 +1,13 @@ # coding: utf-8 import unittest -import nose - -import datetime from pandas.msgpack import packb, unpackb + class DecodeError(Exception): pass + class TestObj(unittest.TestCase): def _arr_to_str(self, arr): @@ -28,32 +27,37 @@ def _encode_complex(self, obj): return obj def test_encode_hook(self): - packed = packb([3, 1+2j], default=self._encode_complex) + packed = packb([3, 1 + 2j], default=self._encode_complex) unpacked = unpackb(packed, use_list=1) assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2} def test_decode_hook(self): packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}]) - unpacked = unpackb(packed, object_hook=self._decode_complex, use_list=1) - assert unpacked[1] == 1+2j + unpacked = unpackb(packed, object_hook=self._decode_complex, + use_list=1) + assert unpacked[1] == 1 + 2j def test_decode_pairs_hook(self): packed = packb([3, {1: 2, 3: 4}]) prod_sum = 1 * 2 + 3 * 4 - unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1) + unpacked = unpackb( + packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), + use_list=1) assert unpacked[1] == prod_sum def test_only_one_obj_hook(self): - self.assertRaises(TypeError, unpackb, b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) + self.assertRaises(TypeError, unpackb, b'', object_hook=lambda x: x, + object_pairs_hook=lambda x: x) def test_bad_hook(self): def f(): - packed = packb([3, 1+2j], default=lambda o: o) - unpacked = unpackb(packed, use_list=1) + packed = packb([3, 1 + 2j], default=lambda o: o) + unpacked = unpackb(packed, use_list=1) # noqa + self.assertRaises(TypeError, f) def test_array_hook(self): - packed = packb([1,2,3]) + packed = packb([1, 2, 3]) unpacked = unpackb(packed, list_hook=self._arr_to_str, use_list=1) assert unpacked == '123' @@ -61,11 +65,12 @@ def test_an_exception_in_objecthook1(self): def f(): packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) unpackb(packed, object_hook=self.bad_complex_decoder) - self.assertRaises(DecodeError, f) + self.assertRaises(DecodeError, f) def test_an_exception_in_objecthook2(self): def f(): packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) unpackb(packed, list_hook=self.bad_complex_decoder, use_list=1) + self.assertRaises(DecodeError, f) diff --git a/pandas/tests/test_msgpack/test_pack.py b/pandas/tests/test_msgpack/test_pack.py index 22df6df5e2e45..99c7453212b8b 100644 --- a/pandas/tests/test_msgpack/test_pack.py +++ b/pandas/tests/test_msgpack/test_pack.py @@ -2,13 +2,13 @@ # coding: utf-8 import unittest -import nose import struct from pandas import compat from pandas.compat import u, OrderedDict from pandas.msgpack import packb, unpackb, Unpacker, Packer + class TestPack(unittest.TestCase): def check(self, data, use_list=False): @@ -20,25 +20,25 @@ def testPack(self): 0, 1, 127, 128, 255, 256, 65535, 65536, -1, -32, -33, -128, -129, -32768, -32769, 1.0, - b"", b"a", b"a"*31, b"a"*32, + b"", b"a", b"a" * 31, b"a" * 32, None, True, False, (), ((),), ((), None,), {None: 0}, - (1<<23), - ] + (1 << 23), + ] for td in test_data: self.check(td) def testPackUnicode(self): - test_data = [ - u(""), u("abcd"), [u("defgh")], u("Русский текст"), - ] + test_data = [u(""), u("abcd"), [u("defgh")], u("Русский текст"), ] for td in test_data: - re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') + re = unpackb( + packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') assert re == td packer = Packer(encoding='utf-8') data = packer.pack(td) - re = Unpacker(compat.BytesIO(data), encoding='utf-8', use_list=1).unpack() + re = Unpacker( + compat.BytesIO(data), encoding='utf-8', use_list=1).unpack() assert re == td def testPackUTF32(self): @@ -47,30 +47,36 @@ def testPackUTF32(self): compat.u("abcd"), [compat.u("defgh")], compat.u("Русский текст"), - ] + ] for td in test_data: - re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') + re = unpackb( + packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') assert re == td def testPackBytes(self): - test_data = [ - b"", b"abcd", (b"defgh",), - ] + test_data = [b"", b"abcd", (b"defgh", ), ] for td in test_data: self.check(td) def testIgnoreUnicodeErrors(self): - re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) + re = unpackb( + packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', + use_list=1) assert re == "abcdef" def testStrictUnicodeUnpack(self): - self.assertRaises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), encoding='utf-8', use_list=1) + self.assertRaises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), + encoding='utf-8', use_list=1) def testStrictUnicodePack(self): - self.assertRaises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), encoding='ascii', unicode_errors='strict') + self.assertRaises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), + encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(self): - re = unpackb(packb(compat.u("abcФФФdef"), encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1) + re = unpackb( + packb( + compat.u("abcФФФdef"), encoding='ascii', + unicode_errors='ignore'), encoding='utf-8', use_list=1) assert re == compat.u("abcdef") def testNoEncoding(self): @@ -81,8 +87,10 @@ def testDecodeBinary(self): assert re == b"abc" def testPackFloat(self): - assert packb(1.0, use_single_float=True) == b'\xca' + struct.pack('>f', 1.0) - assert packb(1.0, use_single_float=False) == b'\xcb' + struct.pack('>d', 1.0) + assert packb(1.0, + use_single_float=True) == b'\xca' + struct.pack('>f', 1.0) + assert packb( + 1.0, use_single_float=False) == b'\xcb' + struct.pack('>d', 1.0) def testArraySize(self, sizes=[0, 5, 50, 1000]): bio = compat.BytesIO() @@ -118,23 +126,24 @@ def testMapSize(self, sizes=[0, 5, 50, 1000]): for size in sizes: bio.write(packer.pack_map_header(size)) for i in range(size): - bio.write(packer.pack(i)) # key - bio.write(packer.pack(i * 2)) # value + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value bio.seek(0) unpacker = Unpacker(bio) for size in sizes: assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) - def test_odict(self): seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] od = OrderedDict(seq) assert unpackb(packb(od), use_list=1) == dict(seq) + def pair_hook(seq): return list(seq) - assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq + assert unpackb( + packb(od), object_pairs_hook=pair_hook, use_list=1) == seq def test_pairlist(self): pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')] diff --git a/pandas/tests/test_msgpack/test_read_size.py b/pandas/tests/test_msgpack/test_read_size.py index 7cbb9c9807201..965e97a7007de 100644 --- a/pandas/tests/test_msgpack/test_read_size.py +++ b/pandas/tests/test_msgpack/test_read_size.py @@ -2,6 +2,7 @@ from pandas.msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError + def test_read_array_header(): unpacker = Unpacker() unpacker.feed(packb(['a', 'b', 'c'])) @@ -28,6 +29,7 @@ def test_read_map_header(): except OutOfData: assert 1, 'okay' + def test_incorrect_type_array(): unpacker = Unpacker() unpacker.feed(packb(1)) @@ -37,6 +39,7 @@ def test_incorrect_type_array(): except UnexpectedTypeException: assert 1, 'okay' + def test_incorrect_type_map(): unpacker = Unpacker() unpacker.feed(packb(1)) @@ -46,6 +49,7 @@ def test_incorrect_type_map(): except UnexpectedTypeException: assert 1, 'okay' + def test_correct_type_nested_array(): unpacker = Unpacker() unpacker.feed(packb({'a': ['b', 'c', 'd']})) @@ -55,6 +59,7 @@ def test_correct_type_nested_array(): except UnexpectedTypeException: assert 1, 'okay' + def test_incorrect_type_nested_map(): unpacker = Unpacker() unpacker.feed(packb([{'a': 'b'}])) @@ -63,4 +68,3 @@ def test_incorrect_type_nested_map(): assert 0, 'should raise exception' except UnexpectedTypeException: assert 1, 'okay' - diff --git a/pandas/tests/test_msgpack/test_seq.py b/pandas/tests/test_msgpack/test_seq.py index 464ff6d0174af..76a21b98f22da 100644 --- a/pandas/tests/test_msgpack/test_seq.py +++ b/pandas/tests/test_msgpack/test_seq.py @@ -4,9 +4,9 @@ import io import pandas.msgpack as msgpack - binarydata = bytes(bytearray(range(256))) + def gen_binary_data(idx): return binarydata[:idx % 300] @@ -18,10 +18,16 @@ def test_exceeding_unpacker_read_size(): NUMBER_OF_STRINGS = 6 read_size = 16 - # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): - # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) - # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): + + # 5 ok for read_size=16, while 6 glibc detected *** python: double free or + # corruption (fasttop): + + # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: + # double free or corruption (!prev) + + # 40 ok for read_size=1024, while 50 introduces errors + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** + # python: double free or corruption (!prev): for idx in range(NUMBER_OF_STRINGS): data = gen_binary_data(idx) diff --git a/pandas/tests/test_msgpack/test_sequnpack.py b/pandas/tests/test_msgpack/test_sequnpack.py index 72ceed0471437..2f496b9fbbafa 100644 --- a/pandas/tests/test_msgpack/test_sequnpack.py +++ b/pandas/tests/test_msgpack/test_sequnpack.py @@ -2,12 +2,12 @@ # coding: utf-8 import unittest -import nose from pandas import compat from pandas.msgpack import Unpacker, BufferFull from pandas.msgpack import OutOfData + class TestPack(unittest.TestCase): def test_partialdata(self): @@ -89,8 +89,8 @@ def test_issue124(self): assert tuple(unpacker) == (b'?', b'!') assert tuple(unpacker) == () unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == (b'?',) + assert tuple(unpacker) == (b'?', ) assert tuple(unpacker) == () unpacker.feed(b"!") - assert tuple(unpacker) == (b'!',) + assert tuple(unpacker) == (b'!', ) assert tuple(unpacker) == () diff --git a/pandas/tests/test_msgpack/test_subtype.py b/pandas/tests/test_msgpack/test_subtype.py index 0934b31cebeda..c89b36717a159 100644 --- a/pandas/tests/test_msgpack/test_subtype.py +++ b/pandas/tests/test_msgpack/test_subtype.py @@ -1,20 +1,25 @@ #!/usr/bin/env python # coding: utf-8 -from pandas.msgpack import packb, unpackb +from pandas.msgpack import packb from collections import namedtuple + class MyList(list): pass + class MyDict(dict): pass + class MyTuple(tuple): pass + MyNamedTuple = namedtuple('MyNamedTuple', 'x y') + def test_types(): assert packb(MyDict()) == packb(dict()) assert packb(MyList()) == packb(list()) diff --git a/pandas/tests/test_msgpack/test_unpack.py b/pandas/tests/test_msgpack/test_unpack.py index fe840083ae1c2..a182c676adb3b 100644 --- a/pandas/tests/test_msgpack/test_unpack.py +++ b/pandas/tests/test_msgpack/test_unpack.py @@ -4,9 +4,11 @@ import pandas.util.testing as tm import nose + class TestUnpack(tm.TestCase): + def test_unpack_array_header_from_file(self): - f = BytesIO(packb([1,2,3,4])) + f = BytesIO(packb([1, 2, 3, 4])) unpacker = Unpacker(f) assert unpacker.read_array_header() == 4 assert unpacker.unpack() == 1 @@ -15,7 +17,6 @@ def test_unpack_array_header_from_file(self): assert unpacker.unpack() == 4 self.assertRaises(OutOfData, unpacker.unpack) - def test_unpacker_hook_refcnt(self): if not hasattr(sys, 'getrefcount'): raise nose.SkipTest('no sys.getrefcount()') @@ -41,9 +42,7 @@ def hook(x): assert sys.getrefcount(hook) == basecnt - def test_unpacker_ext_hook(self): - class MyUnpacker(Unpacker): def __init__(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5b00ea163d85f..6302c011a4491 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -10,14 +10,12 @@ from pandas.core.index import Index, MultiIndex from pandas import Panel, DataFrame, Series, notnull, isnull, Timestamp -from pandas.util.testing import (assert_almost_equal, - assert_series_equal, - assert_frame_equal, - assertRaisesRegexp) +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_frame_equal, assertRaisesRegexp) import pandas.core.common as com import pandas.util.testing as tm -from pandas.compat import (range, lrange, StringIO, lzip, u, - product as cart_product, zip) +from pandas.compat import (range, lrange, StringIO, lzip, u, product as + cart_product, zip) import pandas as pd import pandas.index as _index @@ -29,8 +27,8 @@ class TestMultiLevel(tm.TestCase): def setUp(self): - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) @@ -38,8 +36,7 @@ def setUp(self): columns=Index(['A', 'B', 'C'], name='exp')) self.single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], - names=['first']) + labels=[[0, 1, 2, 3]], names=['first']) # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], @@ -57,10 +54,9 @@ def setUp(self): # use Int64Index, to make sure things work self.ymd.index.set_levels([lev.astype('i8') - for lev in self.ymd.index.levels], - inplace=True) - self.ymd.index.set_names(['year', 'month', 'day'], - inplace=True) + for lev in self.ymd.index.levels], + inplace=True) + self.ymd.index.set_names(['year', 'month', 'day'], inplace=True) def test_append(self): a, b = self.frame[:5], self.frame[5:] @@ -75,7 +71,8 @@ def test_append_index(self): tm._skip_if_no_pytz() idx1 = Index([1.1, 1.2, 1.3]) - idx2 = pd.date_range('2011-01-01', freq='D', periods=3, tz='Asia/Tokyo') + idx2 = pd.date_range('2011-01-01', freq='D', periods=3, + tz='Asia/Tokyo') idx3 = Index(['A', 'B', 'C']) midx_lv2 = MultiIndex.from_arrays([idx1, idx2]) @@ -97,7 +94,8 @@ def test_append_index(self): self.assertTrue(result.equals(expected)) result = midx_lv2.append(midx_lv2) - expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)]) + expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2) + ]) self.assertTrue(result.equals(expected)) result = midx_lv2.append(midx_lv3) @@ -108,7 +106,7 @@ def test_append_index(self): np.array([(1.1, datetime.datetime(2011, 1, 1, tzinfo=tz), 'A'), (1.2, datetime.datetime(2011, 1, 2, tzinfo=tz), 'B'), (1.3, datetime.datetime(2011, 1, 3, tzinfo=tz), 'C')] - + expected_tuples), None) + + expected_tuples), None) self.assertTrue(result.equals(expected)) def test_dataframe_constructor(self): @@ -124,16 +122,15 @@ def test_dataframe_constructor(self): tm.assertIsInstance(multi.columns, MultiIndex) def test_series_constructor(self): - multi = Series(1., index=[np.array(['a', 'a', 'b', 'b']), - np.array(['x', 'y', 'x', 'y'])]) + multi = Series(1., index=[np.array(['a', 'a', 'b', 'b']), np.array( + ['x', 'y', 'x', 'y'])]) tm.assertIsInstance(multi.index, MultiIndex) - multi = Series(1., index=[['a', 'a', 'b', 'b'], - ['x', 'y', 'x', 'y']]) + multi = Series(1., index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) tm.assertIsInstance(multi.index, MultiIndex) multi = Series(lrange(4), index=[['a', 'a', 'b', 'b'], - ['x', 'y', 'x', 'y']]) + ['x', 'y', 'x', 'y']]) tm.assertIsInstance(multi.index, MultiIndex) def test_reindex_level(self): @@ -168,8 +165,8 @@ def _check_op(opname): # Series op = getattr(Series, opname) result = op(self.ymd['A'], month_sums['A'], level='month') - broadcasted = self.ymd['A'].groupby( - level='month').transform(np.sum) + broadcasted = self.ymd['A'].groupby(level='month').transform( + np.sum) expected = op(self.ymd['A'], broadcasted) expected.name = 'A' assert_series_equal(result, expected) @@ -180,7 +177,6 @@ def _check_op(opname): _check_op('div') def test_pickle(self): - def _test_roundtrip(frame): unpickled = self.round_trip_pickle(frame) assert_frame_equal(frame, unpickled) @@ -217,38 +213,40 @@ def test_sort_index_preserve_levels(self): def test_sorting_repr_8017(self): np.random.seed(0) - data = np.random.randn(3,4) + data = np.random.randn(3, 4) - for gen, extra in [([1.,3.,2.,5.],4.), - ([1,3,2,5],4), - ([Timestamp('20130101'),Timestamp('20130103'),Timestamp('20130102'),Timestamp('20130105')],Timestamp('20130104')), - (['1one','3one','2one','5one'],'4one')]: + for gen, extra in [([1., 3., 2., 5.], 4.), ([1, 3, 2, 5], 4), + ([Timestamp('20130101'), Timestamp('20130103'), + Timestamp('20130102'), Timestamp('20130105')], + Timestamp('20130104')), + (['1one', '3one', '2one', '5one'], '4one')]: columns = MultiIndex.from_tuples([('red', i) for i in gen]) df = DataFrame(data, index=list('def'), columns=columns) - df2 = pd.concat([df,DataFrame('world', - index=list('def'), - columns=MultiIndex.from_tuples([('red', extra)]))],axis=1) + df2 = pd.concat([df, + DataFrame('world', index=list('def'), + columns=MultiIndex.from_tuples( + [('red', extra)]))], axis=1) # check that the repr is good # make sure that we have a correct sparsified repr # e.g. only 1 header of read - self.assertEqual(str(df2).splitlines()[0].split(),['red']) + self.assertEqual(str(df2).splitlines()[0].split(), ['red']) # GH 8017 # sorting fails after columns added # construct single-dtype then sort result = df.copy().sort_index(axis=1) - expected = df.iloc[:,[0,2,1,3]] + expected = df.iloc[:, [0, 2, 1, 3]] assert_frame_equal(result, expected) result = df2.sort_index(axis=1) - expected = df2.iloc[:,[0,2,1,4,3]] + expected = df2.iloc[:, [0, 2, 1, 4, 3]] assert_frame_equal(result, expected) # setitem then sort result = df.copy() - result[('red',extra)] = 'world' + result[('red', extra)] = 'world' result = result.sort_index(axis=1) assert_frame_equal(result, expected) @@ -285,7 +283,10 @@ def test_series_getitem(self): s = self.ymd['A'] result = s[2000, 3] - result2 = s.ix[2000, 3] + + # TODO(wesm): unused? + # result2 = s.ix[2000, 3] + expected = s.reindex(s.index[42:65]) expected.index = expected.index.droplevel(0).droplevel(0) assert_series_equal(result, expected) @@ -389,10 +390,8 @@ def test_frame_getitem_setitem_multislice(self): assert_frame_equal(df, result) def test_frame_getitem_multicolumn_empty_level(self): - f = DataFrame({'a': ['1', '2', '3'], - 'b': ['2', '3', '4']}) - f.columns = [['level1 item1', 'level1 item2'], - ['', 'level2 item2'], + f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) + f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], ['level3 item1', 'level3 item2']] result = f['level1 item1'] @@ -413,7 +412,7 @@ def test_frame_setitem_multi_column(self): cp['a'] = cp['b'].values assert_frame_equal(cp['a'], cp['b']) - #---------------------------------------- + # --------------------------------------- # #1803 columns = MultiIndex.from_tuples([('A', '1'), ('A', '2'), ('B', '1')]) df = DataFrame(index=[1, 3, 5], columns=columns) @@ -482,18 +481,20 @@ def test_xs(self): # GH 6574 # missing values in returned index should be preserrved acc = [ - ('a','abcde',1), - ('b','bbcde',2), - ('y','yzcde',25), - ('z','xbcde',24), - ('z',None,26), - ('z','zbcde',25), - ('z','ybcde',26), - ] - df = DataFrame(acc, columns=['a1','a2','cnt']).set_index(['a1','a2']) - expected = DataFrame({ 'cnt' : [24,26,25,26] }, index=Index(['xbcde',np.nan,'zbcde','ybcde'],name='a2')) - - result = df.xs('z',level='a1') + ('a', 'abcde', 1), + ('b', 'bbcde', 2), + ('y', 'yzcde', 25), + ('z', 'xbcde', 24), + ('z', None, 26), + ('z', 'zbcde', 25), + ('z', 'ybcde', 26), + ] + df = DataFrame(acc, + columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2']) + expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index( + ['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2')) + + result = df.xs('z', level='a1') assert_frame_equal(result, expected) def test_xs_partial(self): @@ -510,8 +511,8 @@ def test_xs_partial(self): # ex from #1796 index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]], labels=[[0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 1, 1, 0, 0, 1, 1], - [0, 1, 0, 1, 0, 1, 0, 1]]) + [0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, + 0, 1]]) df = DataFrame(np.random.randn(8, 4), index=index, columns=list('abcd')) @@ -526,8 +527,8 @@ def test_xs_level(self): assert_frame_equal(result, expected) - index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), - ('p', 'q', 'r')]) + index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), ( + 'p', 'q', 'r')]) df = DataFrame(np.random.randn(3, 5), index=index) result = df.xs('c', level=2) expected = df[1:2] @@ -541,6 +542,7 @@ def test_xs_level(self): # as we are trying to write a view def f(x): x[:] = 10 + self.assertRaises(com.SettingWithCopyError, f, result) def test_xs_level_multiple(self): @@ -564,6 +566,7 @@ def test_xs_level_multiple(self): # as we are trying to write a view def f(x): x[:] = 10 + self.assertRaises(com.SettingWithCopyError, f, result) # GH2107 @@ -638,8 +641,7 @@ def test_getitem_toplevel(self): def test_getitem_setitem_slice_integers(self): index = MultiIndex(levels=[[0, 1, 2], [0, 2]], - labels=[[0, 0, 1, 1, 2, 2], - [0, 1, 0, 1, 0, 1]]) + labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) frame = DataFrame(np.random.randn(len(index), 4), index=index, columns=['a', 'b', 'c', 'd']) @@ -761,27 +763,27 @@ def test_sortlevel(self): def test_sortlevel_large_cardinality(self): # #2684 (int64) - index = MultiIndex.from_arrays([np.arange(4000)]*3) - df = DataFrame(np.random.randn(4000), index=index, dtype = np.int64) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) # it works! result = df.sortlevel(0) self.assertTrue(result.index.lexsort_depth == 3) # #2684 (int32) - index = MultiIndex.from_arrays([np.arange(4000)]*3) - df = DataFrame(np.random.randn(4000), index=index, dtype = np.int32) + index = MultiIndex.from_arrays([np.arange(4000)] * 3) + df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) # it works! result = df.sortlevel(0) - self.assertTrue((result.dtypes.values == df.dtypes.values).all() == True) + self.assertTrue((result.dtypes.values == df.dtypes.values).all()) self.assertTrue(result.index.lexsort_depth == 3) def test_delevel_infer_dtype(self): - tuples = [tuple for tuple in cart_product(['foo', 'bar'], - [10, 20], [1.0, 1.1])] - index = MultiIndex.from_tuples(tuples, - names=['prm0', 'prm1', 'prm2']) + tuples = [tuple + for tuple in cart_product( + ['foo', 'bar'], [10, 20], [1.0, 1.1])] + index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8, 3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() @@ -850,10 +852,9 @@ def _check_counts(frame, axis=0): assert_almost_equal(result.columns, ['A', 'B', 'C']) def test_count_level_series(self): - index = MultiIndex(levels=[['foo', 'bar', 'baz'], - ['one', 'two', 'three', 'four']], - labels=[[0, 0, 0, 2, 2], - [2, 0, 1, 1, 2]]) + index = MultiIndex(levels=[['foo', 'bar', 'baz'], ['one', 'two', + 'three', 'four']], + labels=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]]) s = Series(np.random.randn(len(index)), index=index) @@ -888,17 +889,17 @@ def test_get_level_number_out_of_bounds(self): def test_unstack(self): # just check that it works for now unstacked = self.ymd.unstack() - unstacked2 = unstacked.unstack() + unstacked.unstack() # test that ints work - unstacked = self.ymd.astype(int).unstack() + self.ymd.astype(int).unstack() # test that int32 work - unstacked = self.ymd.astype(np.int32).unstack() + self.ymd.astype(np.int32).unstack() def test_unstack_multiple_no_empty_columns(self): - index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0), - (1, 'baz', 1), (1, 'qux', 1)]) + index = MultiIndex.from_tuples([(0, 'foo', 0), (0, 'bar', 0), ( + 1, 'baz', 1), (1, 'qux', 1)]) s = Series(np.random.randn(4), index=index) @@ -973,16 +974,17 @@ def check(left, right): columns=['1st', '2nd', '3rd']) mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd', '3rd']], - labels=[np.tile(np.arange(2).repeat(3), 2), - np.tile(np.arange(3), 4)]) + labels=[np.tile( + np.arange(2).repeat(3), 2), np.tile( + np.arange(3), 4)]) left, right = df.stack(), Series(np.arange(12), index=mi) check(left, right) df.columns = ['1st', '2nd', '1st'] - mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd']], - labels=[np.tile(np.arange(2).repeat(3), 2), - np.tile([0, 1, 0], 4)]) + mi = MultiIndex(levels=[['a', 'b'], ['1st', '2nd']], labels=[np.tile( + np.arange(2).repeat(3), 2), np.tile( + [0, 1, 0], 4)]) left, right = df.stack(), Series(np.arange(12), index=mi) check(left, right) @@ -990,9 +992,10 @@ def check(left, right): tpls = ('a', 2), ('b', 1), ('a', 1), ('b', 2) df.index = MultiIndex.from_tuples(tpls) mi = MultiIndex(levels=[['a', 'b'], [1, 2], ['1st', '2nd']], - labels=[np.tile(np.arange(2).repeat(3), 2), - np.repeat([1, 0, 1], [3, 6, 3]), - np.tile([0, 1, 0], 4)]) + labels=[np.tile( + np.arange(2).repeat(3), 2), np.repeat( + [1, 0, 1], [3, 6, 3]), np.tile( + [0, 1, 0], 4)]) left, right = df.stack(), Series(np.arange(12), index=mi) check(left, right) @@ -1031,8 +1034,8 @@ def test_stack_mixed_dtype(self): self.assertEqual(stacked['bar'].dtype, np.float_) def test_unstack_bug(self): - df = DataFrame({'state': ['naive', 'naive', 'naive', - 'activ', 'activ', 'activ'], + df = DataFrame({'state': ['naive', 'naive', 'naive', 'activ', 'activ', + 'activ'], 'exp': ['a', 'b', 'b', 'b', 'a', 'a'], 'barcode': [1, 2, 3, 4, 1, 3], 'v': ['hi', 'hi', 'bye', 'bye', 'bye', 'peace'], @@ -1072,8 +1075,7 @@ def test_stack_unstack_multiple(self): unstacked = self.ymd.unstack(['year', 'month']) expected = self.ymd.unstack('year').unstack('month') assert_frame_equal(unstacked, expected) - self.assertEqual(unstacked.columns.names, - expected.columns.names) + self.assertEqual(unstacked.columns.names, expected.columns.names) # series s = self.ymd['A'] @@ -1126,7 +1128,8 @@ def test_unstack_period_series(self): result2 = s.unstack(level=1) result3 = s.unstack(level=0) - e_idx = pd.PeriodIndex(['2013-01', '2013-02', '2013-03'], freq='M', name='period') + e_idx = pd.PeriodIndex( + ['2013-01', '2013-02', '2013-03'], freq='M', name='period') expected = DataFrame({'A': [1, 3, 5], 'B': [2, 4, 6]}, index=e_idx, columns=['A', 'B']) expected.columns.name = 'str' @@ -1147,9 +1150,11 @@ def test_unstack_period_series(self): result2 = s.unstack(level=1) result3 = s.unstack(level=0) - e_idx = pd.PeriodIndex(['2013-01', '2013-02', '2013-03'], freq='M', name='period1') + e_idx = pd.PeriodIndex( + ['2013-01', '2013-02', '2013-03'], freq='M', name='period1') e_cols = pd.PeriodIndex(['2013-07', '2013-08', '2013-09', '2013-10', - '2013-11', '2013-12'], freq='M', name='period2') + '2013-11', '2013-12'], + freq='M', name='period2') expected = DataFrame([[np.nan, np.nan, np.nan, np.nan, 2, 1], [np.nan, np.nan, 4, 3, np.nan, np.nan], [6, 5, np.nan, np.nan, np.nan, np.nan]], @@ -1161,9 +1166,11 @@ def test_unstack_period_series(self): def test_unstack_period_frame(self): # GH 4342 - idx1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-02', '2014-02', '2014-01', '2014-01'], + idx1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-02', '2014-02', + '2014-01', '2014-01'], freq='M', name='period1') - idx2 = pd.PeriodIndex(['2013-12', '2013-12', '2014-02', '2013-10', '2013-10', '2014-02'], + idx2 = pd.PeriodIndex(['2013-12', '2013-12', '2014-02', '2013-10', + '2013-10', '2014-02'], freq='M', name='period2') value = {'A': [1, 2, 3, 4, 5, 6], 'B': [6, 5, 4, 3, 2, 1]} idx = pd.MultiIndex.from_arrays([idx1, idx2]) @@ -1185,7 +1192,8 @@ def test_unstack_period_frame(self): e_1 = pd.PeriodIndex(['2014-01', '2014-02', '2014-01', '2014-02'], freq='M', name='period1') - e_2 = pd.PeriodIndex(['2013-10', '2013-12', '2014-02'], freq='M', name='period2') + e_2 = pd.PeriodIndex( + ['2013-10', '2013-12', '2014-02'], freq='M', name='period2') e_cols = pd.MultiIndex.from_arrays(['A A B B'.split(), e_1]) expected = DataFrame([[5, 4, 2, 3], [1, 2, 6, 5], [6, 3, 1, 4]], index=e_2, columns=e_cols) @@ -1212,9 +1220,7 @@ def test_stack_multiple_bug(self): def test_stack_dropna(self): # GH #3997 - df = pd.DataFrame({'A': ['a1', 'a2'], - 'B': ['b1', 'b2'], - 'C': [1, 1]}) + df = pd.DataFrame({'A': ['a1', 'a2'], 'B': ['b1', 'b2'], 'C': [1, 1]}) df = df.set_index(['A', 'B']) stacked = df.unstack().stack(dropna=False) @@ -1225,8 +1231,8 @@ def test_stack_dropna(self): def test_unstack_multiple_hierarchical(self): df = DataFrame(index=[[0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 1, 1, 0, 0, 1, 1], - [0, 1, 0, 1, 0, 1, 0, 1]], + [0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, 0, 1 + ]], columns=[[0, 0, 1, 1], [0, 1, 0, 1]]) df.index.names = ['a', 'b', 'c'] @@ -1280,7 +1286,8 @@ def test_unstack_unobserved_keys(self): def test_groupby_corner(self): midx = MultiIndex(levels=[['foo'], ['bar'], ['baz']], - labels=[[0], [0], [0]], names=['one', 'two', 'three']) + labels=[[0], [0], [0]], + names=['one', 'two', 'three']) df = DataFrame([np.random.rand(4)], columns=['a', 'b', 'c', 'd'], index=midx) # should work @@ -1288,9 +1295,8 @@ def test_groupby_corner(self): def test_groupby_level_no_obs(self): # #1697 - midx = MultiIndex.from_tuples([('f1', 's1'), ('f1', 's2'), - ('f2', 's1'), ('f2', 's2'), - ('f3', 's1'), ('f3', 's2')]) + midx = MultiIndex.from_tuples([('f1', 's1'), ('f1', 's2'), ( + 'f2', 's1'), ('f2', 's2'), ('f3', 's1'), ('f3', 's2')]) df = DataFrame( [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) df1 = df.select(lambda u: u[0] in ['f2', 'f3'], axis=1) @@ -1309,7 +1315,8 @@ def test_join(self): self.assertFalse(np.isnan(joined.values).all()) - assert_frame_equal(joined, expected, check_names=False) # TODO what should join do with names ? + assert_frame_equal(joined, expected, check_names=False + ) # TODO what should join do with names ? def test_swaplevel(self): swapped = self.frame['A'].swaplevel(0, 1) @@ -1328,8 +1335,7 @@ def test_swaplevel(self): assert_frame_equal(swapped, exp) def test_swaplevel_panel(self): - panel = Panel({'ItemA': self.frame, - 'ItemB': self.frame * 2}) + panel = Panel({'ItemA': self.frame, 'ItemB': self.frame * 2}) result = panel.swaplevel(0, 1, axis='major') expected = panel.copy() @@ -1362,11 +1368,11 @@ def test_insert_index(self): self.assertTrue((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): - x = Series(data=[1, 2, 3], - index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])) + x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ( + "A", 2), ("B", 3)])) - y = Series(data=[4, 5, 6], - index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])) + y = Series(data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ( + "Z", 2), ("B", 3)])) res = x - y exp_index = x.index.union(y.index) @@ -1383,18 +1389,15 @@ def test_is_lexsorted(self): levels = [[0, 1], [0, 1, 2]] index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 1, 2]]) + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]) self.assertTrue(index.is_lexsorted()) index = MultiIndex(levels=levels, - labels=[[0, 0, 0, 1, 1, 1], - [0, 1, 2, 0, 2, 1]]) + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]) self.assertFalse(index.is_lexsorted()) index = MultiIndex(levels=levels, - labels=[[0, 0, 1, 0, 1, 1], - [0, 1, 0, 2, 2, 1]]) + labels=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]) self.assertFalse(index.is_lexsorted()) self.assertEqual(index.lexsort_depth, 0) @@ -1414,6 +1417,7 @@ def test_frame_getitem_view(self): def f(): df['foo']['one'] = 2 return df + self.assertRaises(com.SettingWithCopyError, f) try: @@ -1445,7 +1449,7 @@ def test_frame_getitem_not_sorted(self): def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = lzip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) @@ -1491,8 +1495,7 @@ def test_count(self): 'mad', 'std', 'var', 'sem'] def test_series_group_min_max(self): - for op, level, skipna in cart_product(self.AGG_FUNCTIONS, - lrange(2), + for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) @@ -1520,6 +1523,7 @@ def test_frame_group_ops(self): def aggf(x): pieces.append(x) return getattr(x, op)(skipna=skipna, axis=axis) + leftside = grouped.agg(aggf) rightside = getattr(frame, op)(level=level, axis=axis, skipna=skipna) @@ -1555,8 +1559,8 @@ def test_frame_any_all_group(self): assert_frame_equal(result, ex) def test_std_var_pass_ddof(self): - index = MultiIndex.from_arrays([np.arange(5).repeat(10), - np.tile(np.arange(10), 5)]) + index = MultiIndex.from_arrays([np.arange(5).repeat(10), np.tile( + np.arange(10), 5)]) df = DataFrame(np.random.randn(len(index), 5), index=index) for meth in ['var', 'std']: @@ -1588,7 +1592,8 @@ def test_groupby_multilevel(self): expected = self.ymd.groupby([k1, k2]).mean() - assert_frame_equal(result, expected, check_names=False) # TODO groupby with level_values drops names + assert_frame_equal(result, expected, check_names=False + ) # TODO groupby with level_values drops names self.assertEqual(result.index.names, self.ymd.index.names[:2]) result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean() @@ -1598,8 +1603,8 @@ def test_groupby_multilevel_with_transform(self): pass def test_multilevel_consolidate(self): - index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), - ('bar', 'one'), ('bar', 'two')]) + index = MultiIndex.from_tuples([('foo', 'one'), ('foo', 'two'), ( + 'bar', 'one'), ('bar', 'two')]) df = DataFrame(np.random.randn(4, 4), index=index, columns=index) df['Totals', ''] = df.sum(1) df = df.consolidate() @@ -1658,8 +1663,7 @@ def test_unstack_group_index_overflow(self): # test roundtrip stacked = result.stack() - assert_series_equal(s, - stacked.reindex(s.index)) + assert_series_equal(s, stacked.reindex(s.index)) # put it at beginning index = MultiIndex(levels=[[0, 1]] + [level] * 8, @@ -1671,8 +1675,8 @@ def test_unstack_group_index_overflow(self): # put it in middle index = MultiIndex(levels=[level] * 4 + [[0, 1]] + [level] * 4, - labels=([labels] * 4 + [np.arange(2).repeat(500)] - + [labels] * 4)) + labels=([labels] * 4 + [np.arange(2).repeat(500)] + + [labels] * 4)) s = Series(np.arange(1000), index=index) result = s.unstack(4) @@ -1682,12 +1686,11 @@ def test_getitem_lowerdim_corner(self): self.assertRaises(KeyError, self.frame.ix.__getitem__, (('bar', 'three'), 'B')) - # in theory should be inserting in a sorted space???? - self.frame.ix[('bar','three'),'B'] = 0 - self.assertEqual(self.frame.sortlevel().ix[('bar','three'),'B'], 0) + self.frame.ix[('bar', 'three'), 'B'] = 0 + self.assertEqual(self.frame.sortlevel().ix[('bar', 'three'), 'B'], 0) - #---------------------------------------------------------------------- + # --------------------------------------------------------------------- # AMBIGUOUS CASES! def test_partial_ix_missing(self): @@ -1706,7 +1709,7 @@ def test_partial_ix_missing(self): self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6)) self.assertRaises(Exception, self.ymd.ix.__getitem__, (2000, 6), 0) - #---------------------------------------------------------------------- + # --------------------------------------------------------------------- def test_to_html(self): self.ymd.columns.name = 'foo' @@ -1714,10 +1717,9 @@ def test_to_html(self): self.ymd.T.to_html() def test_level_with_tuples(self): - index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0), - ('foo', 'qux', 0)], - [0, 1]], - labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) + index = MultiIndex(levels=[[('foo', 'bar', 0), ('foo', 'baz', 0), ( + 'foo', 'qux', 0)], [0, 1]], + labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) @@ -1738,10 +1740,9 @@ def test_level_with_tuples(self): assert_frame_equal(result, expected) assert_frame_equal(result2, expected) - index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), - ('foo', 'qux')], - [0, 1]], - labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) + index = MultiIndex(levels=[[('foo', 'bar'), ('foo', 'baz'), ( + 'foo', 'qux')], [0, 1]], + labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]]) series = Series(np.random.randn(6), index=index) frame = DataFrame(np.random.randn(6, 4), index=index) @@ -1978,8 +1979,8 @@ def test_unicode_repr_level_names(self): def test_dataframe_insert_column_all_na(self): # GH #1534 - mix = MultiIndex.from_tuples( - [('1a', '2a'), ('1a', '2b'), ('1a', '2c')]) + mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c') + ]) df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix) s = Series({(1, 1): 1, (1, 2): 2}) df['new'] = s @@ -2006,21 +2007,23 @@ def test_set_column_scalar_with_ix(self): self.assertTrue((self.frame.ix[subset, 'B'] == 97).all()) def test_frame_dict_constructor_empty_series(self): - s1 = Series([1, 2, 3, 4], index=MultiIndex.from_tuples([(1, 2), (1, 3), - (2, 2), (2, 4)])) - s2 = Series([1, 2, 3, 4], - index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) + s1 = Series([ + 1, 2, 3, 4 + ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (2, 2), (2, 4)])) + s2 = Series([ + 1, 2, 3, 4 + ], index=MultiIndex.from_tuples([(1, 2), (1, 3), (3, 2), (3, 4)])) s3 = Series() # it works! - df = DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) - df = DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2}) + DataFrame({'foo': s1, 'bar': s2, 'baz': s3}) + DataFrame.from_dict({'foo': s1, 'baz': s3, 'bar': s2}) def test_indexing_ambiguity_bug_1678(self): - columns = MultiIndex.from_tuples([('Ohio', 'Green'), ('Ohio', 'Red'), - ('Colorado', 'Green')]) - index = MultiIndex.from_tuples( - [('a', 1), ('a', 2), ('b', 1), ('b', 2)]) + columns = MultiIndex.from_tuples([('Ohio', 'Green'), ('Ohio', 'Red'), ( + 'Colorado', 'Green')]) + index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2) + ]) frame = DataFrame(np.arange(12).reshape((4, 3)), index=index, columns=columns) @@ -2090,8 +2093,8 @@ def test_assign_index_sequences(self): def test_tuples_have_na(self): index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]], - labels=[[1, 1, 1, 1, -1, 0, 0, 0], - [0, 1, 2, 3, 0, 1, 2, 3]]) + labels=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, + 1, 2, 3]]) self.assertTrue(isnull(index[4][0])) self.assertTrue(isnull(index.values[4][0])) @@ -2099,9 +2102,9 @@ def test_tuples_have_na(self): def test_duplicate_groupby_issues(self): idx_tp = [('600809', '20061231'), ('600809', '20070331'), ('600809', '20070630'), ('600809', '20070331')] - dt = ['demo','demo','demo','demo'] + dt = ['demo', 'demo', 'demo', 'demo'] - idx = MultiIndex.from_tuples(idx_tp,names = ['STK_ID','RPT_Date']) + idx = MultiIndex.from_tuples(idx_tp, names=['STK_ID', 'RPT_Date']) s = Series(dt, index=idx) result = s.groupby(s.index).first() @@ -2109,39 +2112,43 @@ def test_duplicate_groupby_issues(self): def test_duplicate_mi(self): # GH 4516 - df = DataFrame([['foo','bar',1.0,1],['foo','bar',2.0,2],['bah','bam',3.0,3], - ['bah','bam',4.0,4],['foo','bar',5.0,5],['bah','bam',6.0,6]], + df = DataFrame([['foo', 'bar', 1.0, 1], ['foo', 'bar', 2.0, 2], + ['bah', 'bam', 3.0, 3], + ['bah', 'bam', 4.0, 4], ['foo', 'bar', 5.0, 5], + ['bah', 'bam', 6.0, 6]], columns=list('ABCD')) - df = df.set_index(['A','B']) + df = df.set_index(['A', 'B']) df = df.sortlevel(0) - expected = DataFrame([['foo','bar',1.0,1],['foo','bar',2.0,2],['foo','bar',5.0,5]], - columns=list('ABCD')).set_index(['A','B']) - result = df.loc[('foo','bar')] - assert_frame_equal(result,expected) + expected = DataFrame([['foo', 'bar', 1.0, 1], ['foo', 'bar', 2.0, 2], + ['foo', 'bar', 5.0, 5]], + columns=list('ABCD')).set_index(['A', 'B']) + result = df.loc[('foo', 'bar')] + assert_frame_equal(result, expected) def test_duplicated_drop_duplicates(self): # GH 4060 - idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2 ,3], [1, 1, 1, 1, 2, 2])) + idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2])) - expected = np.array([False, False, False, True, False, False], dtype=bool) + expected = np.array( + [False, False, False, True, False, False], dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([1, 2, 3, 2 ,3], [1, 1, 1, 2, 2])) + expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(), expected) expected = np.array([True, False, False, False, False, False]) duplicated = idx.duplicated(keep='last') tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 1, 2 ,3], [1, 1, 1, 2, 2])) + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep='last'), expected) expected = np.array([True, False, False, True, False, False]) duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 2 ,3], [1, 1, 2, 2])) + expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2])) tm.assert_index_equal(idx.drop_duplicates(keep=False), expected) # deprecate take_last @@ -2150,9 +2157,10 @@ def test_duplicated_drop_duplicates(self): duplicated = idx.duplicated(take_last=True) tm.assert_numpy_array_equal(duplicated, expected) self.assertTrue(duplicated.dtype == bool) - expected = MultiIndex.from_arrays(([2, 3, 1, 2 ,3], [1, 1, 1, 2, 2])) + expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2])) with tm.assert_produces_warning(FutureWarning): - tm.assert_index_equal(idx.drop_duplicates(take_last=True), expected) + tm.assert_index_equal( + idx.drop_duplicates(take_last=True), expected) def test_multiindex_set_index(self): # segfault in #3308 @@ -2166,11 +2174,16 @@ def test_multiindex_set_index(self): df.set_index(index) def test_datetimeindex(self): - idx1 = pd.DatetimeIndex(['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00'] * 2, tz='Asia/Tokyo') - idx2 = pd.date_range('2010/01/01', periods=6, freq='M', tz='US/Eastern') + idx1 = pd.DatetimeIndex( + ['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00' + ] * 2, tz='Asia/Tokyo') + idx2 = pd.date_range('2010/01/01', periods=6, freq='M', + tz='US/Eastern') idx = MultiIndex.from_arrays([idx1, idx2]) - expected1 = pd.DatetimeIndex(['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00'], tz='Asia/Tokyo') + expected1 = pd.DatetimeIndex( + ['2013-04-01 9:00', '2013-04-02 9:00', '2013-04-03 9:00' + ], tz='Asia/Tokyo') self.assertTrue(idx.levels[0].equals(expected1)) self.assertTrue(idx.levels[1].equals(idx2)) @@ -2181,10 +2194,11 @@ def test_datetimeindex(self): date2 = datetime.datetime.today() date3 = Timestamp.today() - for d1, d2 in itertools.product([date1,date2,date3],[date1,date2,date3]): - index = pd.MultiIndex.from_product([[d1],[d2]]) - self.assertIsInstance(index.levels[0],pd.DatetimeIndex) - self.assertIsInstance(index.levels[1],pd.DatetimeIndex) + for d1, d2 in itertools.product( + [date1, date2, date3], [date1, date2, date3]): + index = pd.MultiIndex.from_product([[d1], [d2]]) + self.assertIsInstance(index.levels[0], pd.DatetimeIndex) + self.assertIsInstance(index.levels[1], pd.DatetimeIndex) def test_constructor_with_tz(self): @@ -2203,15 +2217,18 @@ def test_constructor_with_tz(self): def test_set_index_datetime(self): # GH 3950 - df = pd.DataFrame({'label':['a', 'a', 'a', 'b', 'b', 'b'], - 'datetime':['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 09:00:00'], - 'value':range(6)}) + df = pd.DataFrame( + {'label': ['a', 'a', 'a', 'b', 'b', 'b'], + 'datetime': ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00', '2011-07-19 07:00:00', + '2011-07-19 08:00:00', '2011-07-19 09:00:00'], + 'value': range(6)}) df.index = pd.to_datetime(df.pop('datetime'), utc=True) df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific') - expected = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00']) + expected = pd.DatetimeIndex( + ['2011-07-19 07:00:00', '2011-07-19 08:00:00', + '2011-07-19 09:00:00']) expected = expected.tz_localize('UTC').tz_convert('US/Pacific') df = df.set_index('label', append=True) @@ -2222,13 +2239,14 @@ def test_set_index_datetime(self): self.assertTrue(df.index.levels[0].equals(pd.Index(['a', 'b']))) self.assertTrue(df.index.levels[1].equals(expected)) - df = DataFrame(np.random.random(6)) idx1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', '2011-07-19 09:00:00', '2011-07-19 07:00:00', - '2011-07-19 08:00:00', '2011-07-19 09:00:00'], tz='US/Eastern') - idx2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-01 09:00', '2012-04-01 09:00', - '2012-04-02 09:00', '2012-04-02 09:00', '2012-04-02 09:00'], + '2011-07-19 08:00:00', '2011-07-19 09:00:00'], + tz='US/Eastern') + idx2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-01 09:00', + '2012-04-01 09:00', '2012-04-02 09:00', + '2012-04-02 09:00', '2012-04-02 09:00'], tz='US/Eastern') idx3 = pd.date_range('2011-01-01 09:00', periods=6, tz='Asia/Tokyo') @@ -2236,9 +2254,11 @@ def test_set_index_datetime(self): df = df.set_index(idx2, append=True) df = df.set_index(idx3, append=True) - expected1 = pd.DatetimeIndex(['2011-07-19 07:00:00', '2011-07-19 08:00:00', - '2011-07-19 09:00:00'], tz='US/Eastern') - expected2 = pd.DatetimeIndex(['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern') + expected1 = pd.DatetimeIndex(['2011-07-19 07:00:00', + '2011-07-19 08:00:00', + '2011-07-19 09:00:00'], tz='US/Eastern') + expected2 = pd.DatetimeIndex( + ['2012-04-01 09:00', '2012-04-02 09:00'], tz='US/Eastern') self.assertTrue(df.index.levels[0].equals(expected1)) self.assertTrue(df.index.levels[1].equals(expected2)) @@ -2252,69 +2272,90 @@ def test_set_index_datetime(self): def test_reset_index_datetime(self): # GH 3950 for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern']: - idx1 = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, name='idx1') - idx2 = pd.Index(range(5), name='idx2',dtype='int64') + idx1 = pd.date_range('1/1/2011', periods=5, freq='D', tz=tz, + name='idx1') + idx2 = pd.Index(range(5), name='idx2', dtype='int64') idx = pd.MultiIndex.from_arrays([idx1, idx2]) - df = pd.DataFrame({'a': np.arange(5,dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) + df = pd.DataFrame( + {'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), datetime.datetime(2011, 1, 2), datetime.datetime(2011, 1, 3), datetime.datetime(2011, 1, 4), datetime.datetime(2011, 1, 5)], - 'idx2': np.arange(5,dtype='int64'), - 'a': np.arange(5,dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx1', 'idx2', 'a', 'b']) - expected['idx1'] = expected['idx1'].apply(lambda d: pd.Timestamp(d, tz=tz)) + 'idx2': np.arange(5, dtype='int64'), + 'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'a', 'b']) + expected['idx1'] = expected['idx1'].apply( + lambda d: pd.Timestamp(d, tz=tz)) assert_frame_equal(df.reset_index(), expected) - idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', tz='Europe/Paris', name='idx3') + idx3 = pd.date_range('1/1/2012', periods=5, freq='MS', + tz='Europe/Paris', name='idx3') idx = pd.MultiIndex.from_arrays([idx1, idx2, idx3]) - df = pd.DataFrame({'a': np.arange(5,dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) + df = pd.DataFrame( + {'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx) expected = pd.DataFrame({'idx1': [datetime.datetime(2011, 1, 1), datetime.datetime(2011, 1, 2), datetime.datetime(2011, 1, 3), datetime.datetime(2011, 1, 4), datetime.datetime(2011, 1, 5)], - 'idx2': np.arange(5,dtype='int64'), + 'idx2': np.arange(5, dtype='int64'), 'idx3': [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 2, 1), datetime.datetime(2012, 3, 1), datetime.datetime(2012, 4, 1), datetime.datetime(2012, 5, 1)], - 'a': np.arange(5,dtype='int64'), 'b': ['A', 'B', 'C', 'D', 'E']}, - columns=['idx1', 'idx2', 'idx3', 'a', 'b']) - expected['idx1'] = expected['idx1'].apply(lambda d: pd.Timestamp(d, tz=tz)) - expected['idx3'] = expected['idx3'].apply(lambda d: pd.Timestamp(d, tz='Europe/Paris')) + 'a': np.arange(5, dtype='int64'), + 'b': ['A', 'B', 'C', 'D', 'E']}, + columns=['idx1', 'idx2', 'idx3', 'a', 'b']) + expected['idx1'] = expected['idx1'].apply( + lambda d: pd.Timestamp(d, tz=tz)) + expected['idx3'] = expected['idx3'].apply( + lambda d: pd.Timestamp(d, tz='Europe/Paris')) assert_frame_equal(df.reset_index(), expected) # GH 7793 - idx = pd.MultiIndex.from_product([['a','b'], pd.date_range('20130101', periods=3, tz=tz)]) - df = pd.DataFrame(np.arange(6,dtype='int64').reshape(6,1), columns=['a'], index=idx) + idx = pd.MultiIndex.from_product([['a', 'b'], pd.date_range( + '20130101', periods=3, tz=tz)]) + df = pd.DataFrame( + np.arange(6, dtype='int64').reshape( + 6, 1), columns=['a'], index=idx) expected = pd.DataFrame({'level_0': 'a a a b b b'.split(), - 'level_1': [datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 2), - datetime.datetime(2013, 1, 3)] * 2, + 'level_1': [ + datetime.datetime(2013, 1, 1), + datetime.datetime(2013, 1, 2), + datetime.datetime(2013, 1, 3)] * 2, 'a': np.arange(6, dtype='int64')}, - columns=['level_0', 'level_1', 'a']) - expected['level_1'] = expected['level_1'].apply(lambda d: pd.Timestamp(d, offset='D', tz=tz)) + columns=['level_0', 'level_1', 'a']) + expected['level_1'] = expected['level_1'].apply( + lambda d: pd.Timestamp(d, offset='D', tz=tz)) assert_frame_equal(df.reset_index(), expected) def test_reset_index_period(self): # GH 7746 - idx = pd.MultiIndex.from_product([pd.period_range('20130101', periods=3, freq='M'), - ['a','b','c']], names=['month', 'feature']) - - df = pd.DataFrame(np.arange(9,dtype='int64').reshape(-1,1), index=idx, columns=['a']) - expected = pd.DataFrame({'month': [pd.Period('2013-01', freq='M')] * 3 + - [pd.Period('2013-02', freq='M')] * 3 + - [pd.Period('2013-03', freq='M')] * 3, - 'feature': ['a', 'b', 'c'] * 3, - 'a': np.arange(9, dtype='int64')}, - columns=['month', 'feature', 'a']) + idx = pd.MultiIndex.from_product([pd.period_range('20130101', + periods=3, freq='M'), + ['a', 'b', 'c']], + names=['month', 'feature']) + + df = pd.DataFrame(np.arange(9, dtype='int64') + .reshape(-1, 1), + index=idx, columns=['a']) + expected = pd.DataFrame({ + 'month': ([pd.Period('2013-01', freq='M')] * 3 + + [pd.Period('2013-02', freq='M')] * 3 + + [pd.Period('2013-03', freq='M')] * 3), + 'feature': ['a', 'b', 'c'] * 3, + 'a': np.arange(9, dtype='int64') + }, columns=['month', 'feature', 'a']) assert_frame_equal(df.reset_index(), expected) def test_set_index_period(self): @@ -2344,15 +2385,12 @@ def test_set_index_period(self): def test_repeat(self): # GH 9361 # fixed by # GH 7891 - m_idx = pd.MultiIndex.from_tuples([(1, 2), (3, 4), - (5, 6), (7, 8)]) + m_idx = pd.MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)]) data = ['a', 'b', 'c', 'd'] m_df = pd.Series(data, index=m_idx) - assert m_df.repeat(3).shape == (3 * len(data),) + assert m_df.repeat(3).shape == (3 * len(data), ) if __name__ == '__main__': - - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index b9db95fe06a43..ecd3fa6ed53ee 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -12,6 +12,7 @@ use_bn = nanops._USE_BOTTLENECK + class TestnanopsDataFrame(tm.TestCase): def setUp(self): @@ -22,7 +23,7 @@ def setUp(self): self.arr_float = np.random.randn(*self.arr_shape) self.arr_float1 = np.random.randn(*self.arr_shape) - self.arr_complex = self.arr_float + self.arr_float1*1j + self.arr_complex = self.arr_float + self.arr_float1 * 1j self.arr_int = np.random.randint(-10, 10, self.arr_shape) self.arr_bool = np.random.randint(0, 2, self.arr_shape) == 0 self.arr_str = np.abs(self.arr_float).astype('S') @@ -38,37 +39,31 @@ def setUp(self): self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1]) self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan]) - self.arr_inf = self.arr_float*np.inf + self.arr_inf = self.arr_float * np.inf self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf]) self.arr_float1_inf = np.vstack([self.arr_float1, self.arr_inf]) self.arr_inf_float1 = np.vstack([self.arr_inf, self.arr_float1]) self.arr_inf_inf = np.vstack([self.arr_inf, self.arr_inf]) self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf]) - self.arr_float_nan_inf = np.vstack([self.arr_float, - self.arr_nan, + self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan, self.arr_inf]) - self.arr_nan_float1_inf = np.vstack([self.arr_float, - self.arr_inf, + self.arr_nan_float1_inf = np.vstack([self.arr_float, self.arr_inf, self.arr_nan]) - self.arr_nan_nan_inf = np.vstack([self.arr_nan, - self.arr_nan, + self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan, self.arr_inf]) - self.arr_obj = np.vstack([self.arr_float.astype('O'), - self.arr_int.astype('O'), - self.arr_bool.astype('O'), - self.arr_complex.astype('O'), - self.arr_str.astype('O'), - self.arr_utf.astype('O'), - self.arr_date.astype('O'), - self.arr_tdelta.astype('O')]) - - self.arr_nan_nanj = self.arr_nan + self.arr_nan*1j + self.arr_obj = np.vstack([self.arr_float.astype( + 'O'), self.arr_int.astype('O'), self.arr_bool.astype( + 'O'), self.arr_complex.astype('O'), self.arr_str.astype( + 'O'), self.arr_utf.astype('O'), self.arr_date.astype('O'), + self.arr_tdelta.astype('O')]) + + self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j self.arr_complex_nan = np.vstack([self.arr_complex, self.arr_nan_nanj]) - self.arr_nan_infj = self.arr_inf*1j + self.arr_nan_infj = self.arr_inf * 1j self.arr_complex_nan_infj = np.vstack([self.arr_complex, - self.arr_nan_infj]) + self.arr_nan_infj]) self.arr_float_2d = self.arr_float[:, :, 0] self.arr_float1_2d = self.arr_float1[:, :, 0] @@ -136,7 +131,8 @@ def _coerce_tds(targ, res): return targ, res try: - if axis != 0 and hasattr(targ, 'shape') and targ.ndim and targ.shape != res.shape: + if axis != 0 and hasattr( + targ, 'shape') and targ.ndim and targ.shape != res.shape: res = np.split(res, [targ.shape[0]], axis=0)[0] except: targ, res = _coerce_tds(targ, res) @@ -176,9 +172,9 @@ def _coerce_tds(targ, res): tm.assert_almost_equal(targ.real, res.real) tm.assert_almost_equal(targ.imag, res.imag) - def check_fun_data(self, testfunc, targfunc, - testarval, targarval, targarnanval, **kwargs): - for axis in list(range(targarval.ndim))+[None]: + def check_fun_data(self, testfunc, targfunc, testarval, targarval, + targarnanval, **kwargs): + for axis in list(range(targarval.ndim)) + [None]: for skipna in [False, True]: targartempval = targarval if skipna else targarnanval try: @@ -196,9 +192,8 @@ def check_fun_data(self, testfunc, targfunc, res = testfunc(testarval, **kwargs) self.check_results(targ, res, axis) except BaseException as exc: - exc.args += ('axis: %s of %s' % (axis, testarval.ndim-1), - 'skipna: %s' % skipna, - 'kwargs: %s' % kwargs) + exc.args += ('axis: %s of %s' % (axis, testarval.ndim - 1), + 'skipna: %s' % skipna, 'kwargs: %s' % kwargs) raise if testarval.ndim <= 1: @@ -210,13 +205,11 @@ def check_fun_data(self, testfunc, targfunc, targarnanval2 = np.take(targarnanval, 0, axis=-1) except ValueError: return - self.check_fun_data(testfunc, targfunc, - testarval2, targarval2, targarnanval2, - **kwargs) + self.check_fun_data(testfunc, targfunc, testarval2, targarval2, + targarnanval2, **kwargs) - def check_fun(self, testfunc, targfunc, - testar, targar=None, targarnan=None, - **kwargs): + def check_fun(self, testfunc, targfunc, testar, targar=None, + targarnan=None, **kwargs): if targar is None: targar = testar if targarnan is None: @@ -225,25 +218,22 @@ def check_fun(self, testfunc, targfunc, targarval = getattr(self, targar) targarnanval = getattr(self, targarnan) try: - self.check_fun_data(testfunc, targfunc, - testarval, targarval, targarnanval, **kwargs) + self.check_fun_data(testfunc, targfunc, testarval, targarval, + targarnanval, **kwargs) except BaseException as exc: - exc.args += ('testar: %s' % testar, - 'targar: %s' % targar, + exc.args += ('testar: %s' % testar, 'targar: %s' % targar, 'targarnan: %s' % targarnan) raise - def check_funs(self, testfunc, targfunc, - allow_complex=True, allow_all_nan=True, allow_str=True, - allow_date=True, allow_tdelta=True, allow_obj=True, - **kwargs): + def check_funs(self, testfunc, targfunc, allow_complex=True, + allow_all_nan=True, allow_str=True, allow_date=True, + allow_tdelta=True, allow_obj=True, **kwargs): self.check_fun(testfunc, targfunc, 'arr_float', **kwargs) self.check_fun(testfunc, targfunc, 'arr_float_nan', 'arr_float', **kwargs) self.check_fun(testfunc, targfunc, 'arr_int', **kwargs) self.check_fun(testfunc, targfunc, 'arr_bool', **kwargs) - objs = [self.arr_float.astype('O'), - self.arr_int.astype('O'), + objs = [self.arr_float.astype('O'), self.arr_int.astype('O'), self.arr_bool.astype('O')] if allow_all_nan: @@ -251,8 +241,8 @@ def check_funs(self, testfunc, targfunc, if allow_complex: self.check_fun(testfunc, targfunc, 'arr_complex', **kwargs) - self.check_fun(testfunc, targfunc, - 'arr_complex_nan', 'arr_complex', **kwargs) + self.check_fun(testfunc, targfunc, 'arr_complex_nan', + 'arr_complex', **kwargs) if allow_all_nan: self.check_fun(testfunc, targfunc, 'arr_nan_nanj', **kwargs) objs += [self.arr_complex.astype('O')] @@ -260,8 +250,7 @@ def check_funs(self, testfunc, targfunc, if allow_str: self.check_fun(testfunc, targfunc, 'arr_str', **kwargs) self.check_fun(testfunc, targfunc, 'arr_utf', **kwargs) - objs += [self.arr_str.astype('O'), - self.arr_utf.astype('O')] + objs += [self.arr_str.astype('O'), self.arr_utf.astype('O')] if allow_date: try: @@ -287,21 +276,26 @@ def check_funs(self, testfunc, targfunc, # counterparts, so the numpy functions need to be given something # else if allow_obj == 'convert': - targfunc = partial(self._badobj_wrap, - func=targfunc, allow_complex=allow_complex) + targfunc = partial(self._badobj_wrap, func=targfunc, + allow_complex=allow_complex) self.check_fun(testfunc, targfunc, 'arr_obj', **kwargs) - def check_funs_ddof(self, testfunc, targfunc, - allow_complex=True, allow_all_nan=True, allow_str=True, - allow_date=False, allow_tdelta=False, allow_obj=True,): + def check_funs_ddof(self, + testfunc, + targfunc, + allow_complex=True, + allow_all_nan=True, + allow_str=True, + allow_date=False, + allow_tdelta=False, + allow_obj=True, ): for ddof in range(3): try: - self.check_funs(testfunc, targfunc, - allow_complex, allow_all_nan, allow_str, - allow_date, allow_tdelta, allow_obj, - ddof=ddof) + self.check_funs(testfunc, targfunc, allow_complex, + allow_all_nan, allow_str, allow_date, + allow_tdelta, allow_obj, ddof=ddof) except BaseException as exc: - exc.args += ('ddof %s' % ddof,) + exc.args += ('ddof %s' % ddof, ) raise def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): @@ -313,21 +307,21 @@ def _badobj_wrap(self, value, func, allow_complex=True, **kwargs): return func(value, **kwargs) def test_nanany(self): - self.check_funs(nanops.nanany, np.any, - allow_all_nan=False, allow_str=False, allow_date=False, allow_tdelta=False) + self.check_funs(nanops.nanany, np.any, allow_all_nan=False, + allow_str=False, allow_date=False, allow_tdelta=False) def test_nanall(self): - self.check_funs(nanops.nanall, np.all, - allow_all_nan=False, allow_str=False, allow_date=False, allow_tdelta=False) + self.check_funs(nanops.nanall, np.all, allow_all_nan=False, + allow_str=False, allow_date=False, allow_tdelta=False) def test_nansum(self): - self.check_funs(nanops.nansum, np.sum, - allow_str=False, allow_date=False, allow_tdelta=True) + self.check_funs(nanops.nansum, np.sum, allow_str=False, + allow_date=False, allow_tdelta=True) def test_nanmean(self): - self.check_funs(nanops.nanmean, np.mean, - allow_complex=False, allow_obj=False, - allow_str=False, allow_date=False, allow_tdelta=True) + self.check_funs(nanops.nanmean, np.mean, allow_complex=False, + allow_obj=False, allow_str=False, allow_date=False, + allow_tdelta=True) def test_nanmean_overflow(self): # GH 10155 @@ -348,7 +342,7 @@ def test_nanmean_overflow(self): def test_returned_dtype(self): dtypes = [np.int16, np.int32, np.int64, np.float32, np.float64] - if hasattr(np,'float128'): + if hasattr(np, 'float128'): dtypes.append(np.float128) for dtype in dtypes: @@ -358,44 +352,38 @@ def test_returned_dtype(self): for method in group_a + group_b: result = getattr(s, method)() if is_integer_dtype(dtype) and method in group_a: - self.assertTrue(result.dtype == np.float64, - "return dtype expected from %s is np.float64, got %s instead" % (method, result.dtype)) + self.assertTrue( + result.dtype == np.float64, + "return dtype expected from %s is np.float64, " + "got %s instead" % (method, result.dtype)) else: - self.assertTrue(result.dtype == dtype, - "return dtype expected from %s is %s, got %s instead" % (method, dtype, result.dtype)) + self.assertTrue( + result.dtype == dtype, + "return dtype expected from %s is %s, " + "got %s instead" % (method, dtype, result.dtype)) def test_nanmedian(self): with warnings.catch_warnings(record=True): - self.check_funs(nanops.nanmedian, np.median, - allow_complex=False, allow_str=False, allow_date=False, - allow_tdelta=True, - allow_obj='convert') + self.check_funs(nanops.nanmedian, np.median, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=True, allow_obj='convert') def test_nanvar(self): - self.check_funs_ddof(nanops.nanvar, np.var, - allow_complex=False, - allow_str=False, - allow_date=False, - allow_tdelta=True, - allow_obj='convert') + self.check_funs_ddof(nanops.nanvar, np.var, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=True, allow_obj='convert') def test_nanstd(self): - self.check_funs_ddof(nanops.nanstd, np.std, - allow_complex=False, - allow_str=False, - allow_date=False, - allow_tdelta=True, - allow_obj='convert') + self.check_funs_ddof(nanops.nanstd, np.std, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=True, allow_obj='convert') def test_nansem(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import sem - self.check_funs_ddof(nanops.nansem, sem, - allow_complex=False, - allow_str=False, - allow_date=False, - allow_tdelta=True, - allow_obj='convert') + self.check_funs_ddof(nanops.nansem, sem, allow_complex=False, + allow_str=False, allow_date=False, + allow_tdelta=True, allow_obj='convert') def _minmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -405,13 +393,11 @@ def _minmax_wrap(self, value, axis=None, func=None): def test_nanmin(self): func = partial(self._minmax_wrap, func=np.min) - self.check_funs(nanops.nanmin, func, - allow_str=False, allow_obj=False) + self.check_funs(nanops.nanmin, func, allow_str=False, allow_obj=False) def test_nanmax(self): func = partial(self._minmax_wrap, func=np.max) - self.check_funs(nanops.nanmax, func, - allow_str=False, allow_obj=False) + self.check_funs(nanops.nanmax, func, allow_str=False, allow_obj=False) def _argminmax_wrap(self, value, axis=None, func=None): res = func(value, axis) @@ -426,21 +412,18 @@ def _argminmax_wrap(self, value, axis=None, func=None): def test_nanargmax(self): func = partial(self._argminmax_wrap, func=np.argmax) - self.check_funs(nanops.nanargmax, func, - allow_str=False, allow_obj=False, - allow_date=True, - allow_tdelta=True) + self.check_funs(nanops.nanargmax, func, allow_str=False, + allow_obj=False, allow_date=True, allow_tdelta=True) def test_nanargmin(self): func = partial(self._argminmax_wrap, func=np.argmin) if tm.sys.version_info[0:2] == (2, 6): - self.check_funs(nanops.nanargmin, func, - allow_date=True, - allow_tdelta=True, - allow_str=False, allow_obj=False) + self.check_funs(nanops.nanargmin, func, allow_date=True, + allow_tdelta=True, allow_str=False, + allow_obj=False) else: - self.check_funs(nanops.nanargmin, func, - allow_str=False, allow_obj=False) + self.check_funs(nanops.nanargmin, func, allow_str=False, + allow_obj=False) def _skew_kurt_wrap(self, values, axis=None, func=None): if not isinstance(values.dtype.type, np.floating): @@ -458,53 +441,45 @@ def test_nanskew(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import skew func = partial(self._skew_kurt_wrap, func=skew) - self.check_funs(nanops.nanskew, func, - allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False) + self.check_funs(nanops.nanskew, func, allow_complex=False, + allow_str=False, allow_date=False, allow_tdelta=False) def test_nankurt(self): tm.skip_if_no_package('scipy.stats') from scipy.stats import kurtosis func1 = partial(kurtosis, fisher=True) func = partial(self._skew_kurt_wrap, func=func1) - self.check_funs(nanops.nankurt, func, - allow_complex=False, allow_str=False, allow_date=False, allow_tdelta=False) + self.check_funs(nanops.nankurt, func, allow_complex=False, + allow_str=False, allow_date=False, allow_tdelta=False) def test_nanprod(self): - self.check_funs(nanops.nanprod, np.prod, - allow_str=False, allow_date=False, allow_tdelta=False) + self.check_funs(nanops.nanprod, np.prod, allow_str=False, + allow_date=False, allow_tdelta=False) def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): - res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, - **kwargs) + res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs) res01 = checkfun(self.arr_float_2d, self.arr_float1_2d, - min_periods=len(self.arr_float_2d)-1, - **kwargs) + min_periods=len(self.arr_float_2d) - 1, **kwargs) tm.assert_almost_equal(targ0, res00) tm.assert_almost_equal(targ0, res01) res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, **kwargs) res11 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d, - min_periods=len(self.arr_float_2d)-1, - **kwargs) + min_periods=len(self.arr_float_2d) - 1, **kwargs) tm.assert_almost_equal(targ1, res10) tm.assert_almost_equal(targ1, res11) targ2 = np.nan - res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, - **kwargs) - res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, - **kwargs) - res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, - **kwargs) + res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs) + res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs) + res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs) res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, **kwargs) res24 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d, - min_periods=len(self.arr_float_2d)-1, - **kwargs) + min_periods=len(self.arr_float_2d) - 1, **kwargs) res25 = checkfun(self.arr_float_2d, self.arr_float1_2d, - min_periods=len(self.arr_float_2d)+1, - **kwargs) + min_periods=len(self.arr_float_2d) + 1, **kwargs) tm.assert_almost_equal(targ2, res20) tm.assert_almost_equal(targ2, res21) tm.assert_almost_equal(targ2, res22) @@ -513,42 +488,29 @@ def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs): tm.assert_almost_equal(targ2, res25) def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs): - res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, - **kwargs) + res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs) res01 = checkfun(self.arr_float_1d, self.arr_float1_1d, - min_periods=len(self.arr_float_1d)-1, - **kwargs) + min_periods=len(self.arr_float_1d) - 1, **kwargs) tm.assert_almost_equal(targ0, res00) tm.assert_almost_equal(targ0, res01) - res10 = checkfun(self.arr_float_nan_1d, - self.arr_float1_nan_1d, - **kwargs) - res11 = checkfun(self.arr_float_nan_1d, - self.arr_float1_nan_1d, - min_periods=len(self.arr_float_1d)-1, + res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, **kwargs) + res11 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d, + min_periods=len(self.arr_float_1d) - 1, **kwargs) tm.assert_almost_equal(targ1, res10) tm.assert_almost_equal(targ1, res11) targ2 = np.nan - res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, - **kwargs) - res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, - **kwargs) - res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, - **kwargs) - res23 = checkfun(self.arr_float_nan_1d, - self.arr_nan_float1_1d, - **kwargs) - res24 = checkfun(self.arr_float_nan_1d, - self.arr_nan_float1_1d, - min_periods=len(self.arr_float_1d)-1, - **kwargs) - res25 = checkfun(self.arr_float_1d, - self.arr_float1_1d, - min_periods=len(self.arr_float_1d)+1, + res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs) + res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs) + res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs) + res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, **kwargs) + res24 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d, + min_periods=len(self.arr_float_1d) - 1, **kwargs) + res25 = checkfun(self.arr_float_1d, self.arr_float1_1d, + min_periods=len(self.arr_float_1d) + 1, **kwargs) tm.assert_almost_equal(targ2, res20) tm.assert_almost_equal(targ2, res21) tm.assert_almost_equal(targ2, res22) @@ -636,7 +598,7 @@ def check_nancomp(self, checkfun, targ0): res2 = checkfun(arr_float_nan, arr_nan_float1) tm.assert_almost_equal(targ2, res2) except Exception as exc: - exc.args += ('ndim: %s' % arr_float.ndim,) + exc.args += ('ndim: %s' % arr_float.ndim, ) raise try: @@ -684,7 +646,7 @@ def check_bool(self, func, value, correct, *args, **kwargs): else: self.assertFalse(res0) except BaseException as exc: - exc.args += ('dim: %s' % getattr(value, 'ndim', value),) + exc.args += ('dim: %s' % getattr(value, 'ndim', value), ) raise if not hasattr(value, 'ndim'): break @@ -694,26 +656,15 @@ def check_bool(self, func, value, correct, *args, **kwargs): break def test__has_infs(self): - pairs = [('arr_complex', False), - ('arr_int', False), - ('arr_bool', False), - ('arr_str', False), - ('arr_utf', False), - ('arr_complex', False), - ('arr_complex_nan', False), - - ('arr_nan_nanj', False), - ('arr_nan_infj', True), + pairs = [('arr_complex', False), ('arr_int', False), + ('arr_bool', False), ('arr_str', False), ('arr_utf', False), + ('arr_complex', False), ('arr_complex_nan', False), + ('arr_nan_nanj', False), ('arr_nan_infj', True), ('arr_complex_nan_infj', True)] - pairs_float = [('arr_float', False), - ('arr_nan', False), - ('arr_float_nan', False), - ('arr_nan_nan', False), - - ('arr_float_inf', True), - ('arr_inf', True), - ('arr_nan_inf', True), - ('arr_float_nan_inf', True), + pairs_float = [('arr_float', False), ('arr_nan', False), + ('arr_float_nan', False), ('arr_nan_nan', False), + ('arr_float_inf', True), ('arr_inf', True), + ('arr_nan_inf', True), ('arr_float_nan_inf', True), ('arr_nan_nan_inf', True)] for arr, correct in pairs: @@ -721,7 +672,7 @@ def test__has_infs(self): try: self.check_bool(nanops._has_infs, val, correct) except BaseException as exc: - exc.args += (arr,) + exc.args += (arr, ) raise for arr, correct in pairs_float: @@ -731,40 +682,32 @@ def test__has_infs(self): self.check_bool(nanops._has_infs, val.astype('f4'), correct) self.check_bool(nanops._has_infs, val.astype('f2'), correct) except BaseException as exc: - exc.args += (arr,) + exc.args += (arr, ) raise def test__isfinite(self): - pairs = [('arr_complex', False), - ('arr_int', False), - ('arr_bool', False), - ('arr_str', False), - ('arr_utf', False), - ('arr_complex', False), - ('arr_complex_nan', True), - - ('arr_nan_nanj', True), - ('arr_nan_infj', True), + pairs = [('arr_complex', False), ('arr_int', False), + ('arr_bool', False), ('arr_str', False), ('arr_utf', False), + ('arr_complex', False), ('arr_complex_nan', True), + ('arr_nan_nanj', True), ('arr_nan_infj', True), ('arr_complex_nan_infj', True)] - pairs_float = [('arr_float', False), - ('arr_nan', True), - ('arr_float_nan', True), - ('arr_nan_nan', True), - - ('arr_float_inf', True), - ('arr_inf', True), - ('arr_nan_inf', True), - ('arr_float_nan_inf', True), + pairs_float = [('arr_float', False), ('arr_nan', True), + ('arr_float_nan', True), ('arr_nan_nan', True), + ('arr_float_inf', True), ('arr_inf', True), + ('arr_nan_inf', True), ('arr_float_nan_inf', True), ('arr_nan_nan_inf', True)] func1 = lambda x: np.any(nanops._isfinite(x).ravel()) - func2 = lambda x: np.any(nanops._isfinite(x).values.ravel()) + + # TODO: unused? + # func2 = lambda x: np.any(nanops._isfinite(x).values.ravel()) + for arr, correct in pairs: val = getattr(self, arr) try: self.check_bool(func1, val, correct) except BaseException as exc: - exc.args += (arr,) + exc.args += (arr, ) raise for arr, correct in pairs_float: @@ -774,7 +717,7 @@ def test__isfinite(self): self.check_bool(func1, val.astype('f4'), correct) self.check_bool(func1, val.astype('f2'), correct) except BaseException as exc: - exc.args += (arr,) + exc.args += (arr, ) raise def test__bn_ok_dtype(self): @@ -790,6 +733,7 @@ def test__bn_ok_dtype(self): class TestEnsureNumeric(tm.TestCase): + def test_numeric_values(self): # Test integer self.assertEqual(nanops._ensure_numeric(1), 1, 'Failed for int') @@ -817,8 +761,7 @@ def test_ndarray(self): # Test non-convertible string ndarray s_values = np.array(['foo', 'bar', 'baz'], dtype=object) - self.assertRaises(ValueError, - lambda: nanops._ensure_numeric(s_values)) + self.assertRaises(ValueError, lambda: nanops._ensure_numeric(s_values)) def test_convertable_values(self): self.assertTrue(np.allclose(nanops._ensure_numeric('1'), 1.0), @@ -829,12 +772,9 @@ def test_convertable_values(self): 'Failed for convertible complex string') def test_non_convertable_values(self): - self.assertRaises(TypeError, - lambda: nanops._ensure_numeric('foo')) - self.assertRaises(TypeError, - lambda: nanops._ensure_numeric({})) - self.assertRaises(TypeError, - lambda: nanops._ensure_numeric([])) + self.assertRaises(TypeError, lambda: nanops._ensure_numeric('foo')) + self.assertRaises(TypeError, lambda: nanops._ensure_numeric({})) + self.assertRaises(TypeError, lambda: nanops._ensure_numeric([])) class TestNanvarFixedValues(tm.TestCase): @@ -849,32 +789,30 @@ def setUp(self): def test_nanvar_all_finite(self): samples = self.samples actual_variance = nanops.nanvar(samples) - np.testing.assert_almost_equal( - actual_variance, self.variance, decimal=2) + np.testing.assert_almost_equal(actual_variance, self.variance, + decimal=2) def test_nanvar_nans(self): samples = np.nan * np.ones(2 * self.samples.shape[0]) samples[::2] = self.samples actual_variance = nanops.nanvar(samples, skipna=True) - np.testing.assert_almost_equal( - actual_variance, self.variance, decimal=2) + np.testing.assert_almost_equal(actual_variance, self.variance, + decimal=2) actual_variance = nanops.nanvar(samples, skipna=False) - np.testing.assert_almost_equal( - actual_variance, np.nan, decimal=2) + np.testing.assert_almost_equal(actual_variance, np.nan, decimal=2) def test_nanstd_nans(self): samples = np.nan * np.ones(2 * self.samples.shape[0]) samples[::2] = self.samples actual_std = nanops.nanstd(samples, skipna=True) - np.testing.assert_almost_equal( - actual_std, self.variance ** 0.5, decimal=2) + np.testing.assert_almost_equal(actual_std, self.variance ** 0.5, + decimal=2) actual_std = nanops.nanvar(samples, skipna=False) - np.testing.assert_almost_equal( - actual_std, np.nan, decimal=2) + np.testing.assert_almost_equal(actual_std, np.nan, decimal=2) def test_nanvar_axis(self): # Generate some sample data. @@ -883,12 +821,12 @@ def test_nanvar_axis(self): samples = np.vstack([samples_norm, samples_unif]) actual_variance = nanops.nanvar(samples, axis=1) - np.testing.assert_array_almost_equal( - actual_variance, np.array([self.variance, 1.0 / 12]), decimal=2) + np.testing.assert_array_almost_equal(actual_variance, np.array( + [self.variance, 1.0 / 12]), decimal=2) def test_nanvar_ddof(self): n = 5 - samples = self.prng.uniform(size=(10000, n+1)) + samples = self.prng.uniform(size=(10000, n + 1)) samples[:, -1] = np.nan # Force use of our own algorithm. variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean() @@ -899,37 +837,34 @@ def test_nanvar_ddof(self): var = 1.0 / 12 np.testing.assert_almost_equal(variance_1, var, decimal=2) # The underestimated variance. - np.testing.assert_almost_equal( - variance_0, (n - 1.0) / n * var, decimal=2) + np.testing.assert_almost_equal(variance_0, (n - 1.0) / n * var, + decimal=2) # The overestimated variance. - np.testing.assert_almost_equal( - variance_2, (n - 1.0) / (n - 2.0) * var, decimal=2) + np.testing.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var, + decimal=2) def test_ground_truth(self): # Test against values that were precomputed with Numpy. samples = np.empty((4, 4)) - samples[:3, :3] = np.array([[0.97303362, 0.21869576, 0.55560287], - [0.72980153, 0.03109364, 0.99155171], + samples[:3, :3] = np.array([[0.97303362, 0.21869576, 0.55560287 + ], [0.72980153, 0.03109364, 0.99155171], [0.09317602, 0.60078248, 0.15871292]]) samples[3] = samples[:, 3] = np.nan # Actual variances along axis=0, 1 for ddof=0, 1, 2 - variance = np.array( - [[[0.13762259, 0.05619224, 0.11568816], - [0.20643388, 0.08428837, 0.17353224], - [0.41286776, 0.16857673, 0.34706449]], - [[0.09519783, 0.16435395, 0.05082054], - [0.14279674, 0.24653093, 0.07623082], - [0.28559348, 0.49306186, 0.15246163]]] - ) + variance = np.array([[[0.13762259, 0.05619224, 0.11568816 + ], [0.20643388, 0.08428837, 0.17353224], + [0.41286776, 0.16857673, 0.34706449]], + [[0.09519783, 0.16435395, 0.05082054 + ], [0.14279674, 0.24653093, 0.07623082], + [0.28559348, 0.49306186, 0.15246163]]]) # Test nanvar. for axis in range(2): for ddof in range(3): var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof) - np.testing.assert_array_almost_equal( - var[:3], variance[axis, ddof] - ) + np.testing.assert_array_almost_equal(var[:3], + variance[axis, ddof]) np.testing.assert_equal(var[3], np.nan) # Test nanstd. @@ -937,8 +872,7 @@ def test_ground_truth(self): for ddof in range(3): std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof) np.testing.assert_array_almost_equal( - std[:3], variance[axis, ddof] ** 0.5 - ) + std[:3], variance[axis, ddof] ** 0.5) np.testing.assert_equal(std[3], np.nan) def test_nanstd_roundoff(self): @@ -956,5 +890,5 @@ def prng(self): if __name__ == '__main__': import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', - '-s'], exit=False) + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure', '-s' + ], exit=False) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index f12d851a6772d..a1f2b3edf892f 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -20,36 +20,35 @@ from pandas.compat import range, lrange, StringIO, OrderedDict from pandas import SparsePanel -from pandas.util.testing import (assert_panel_equal, - assert_frame_equal, - assert_series_equal, - assert_almost_equal, - assert_produces_warning, - ensure_clean, - assertRaisesRegexp, - makeCustomDataframe as mkdf, - makeMixedDataFrame - ) +from pandas.util.testing import (assert_panel_equal, assert_frame_equal, + assert_series_equal, assert_almost_equal, + assert_produces_warning, ensure_clean, + assertRaisesRegexp, makeCustomDataframe as + mkdf, makeMixedDataFrame) import pandas.core.panel as panelm import pandas.util.testing as tm + def ignore_sparse_panel_future_warning(func): """ decorator to ignore FutureWarning if we have a SparsePanel can be removed when SparsePanel is fully removed """ + @wraps(func) def wrapper(self, *args, **kwargs): if isinstance(self.panel, SparsePanel): - with assert_produces_warning(FutureWarning, check_stacklevel=False): + with assert_produces_warning(FutureWarning, + check_stacklevel=False): return func(self, *args, **kwargs) else: return func(self, *args, **kwargs) return wrapper + class PanelTests(object): panel = None @@ -72,7 +71,7 @@ class SafeForLongAndSparse(object): _multiprocess_can_split_ = True def test_repr(self): - foo = repr(self.panel) + repr(self.panel) @ignore_sparse_panel_future_warning def test_copy_names(self): @@ -122,6 +121,7 @@ def this_skew(x): if len(x) < 3: return np.nan return skew(x, bias=False) + self._check_stat_op('skew', this_skew) # def test_mad(self): @@ -133,6 +133,7 @@ def alt(x): if len(x) < 2: return np.nan return np.var(x, ddof=1) + self._check_stat_op('var', alt) def test_std(self): @@ -140,13 +141,15 @@ def alt(x): if len(x) < 2: return np.nan return np.std(x, ddof=1) + self._check_stat_op('std', alt) def test_sem(self): def alt(x): if len(x) < 2: return np.nan - return np.std(x, ddof=1)/np.sqrt(len(x)) + return np.std(x, ddof=1) / np.sqrt(len(x)) + self._check_stat_op('sem', alt) # def test_skew(self): @@ -170,6 +173,7 @@ def _check_stat_op(self, name, alternative, obj=None, has_skipna=True): f = getattr(obj, name) if has_skipna: + def skipna_wrapper(x): nona = remove_na(x) if len(nona) == 0: @@ -207,9 +211,9 @@ def assert_panel_equal(cls, x, y): assert_panel_equal(x, y) def test_get_axis(self): - assert(self.panel._get_axis(0) is self.panel.items) - assert(self.panel._get_axis(1) is self.panel.major_axis) - assert(self.panel._get_axis(2) is self.panel.minor_axis) + assert (self.panel._get_axis(0) is self.panel.items) + assert (self.panel._get_axis(1) is self.panel.major_axis) + assert (self.panel._get_axis(2) is self.panel.minor_axis) def test_set_axis(self): new_items = Index(np.arange(len(self.panel.items))) @@ -224,12 +228,16 @@ def test_set_axis(self): self.assertNotIn('ItemA', self.panel._item_cache) self.assertIs(self.panel.items, new_items) - item = self.panel[0] + # TODO: unused? + item = self.panel[0] # noqa + self.panel.major_axis = new_major self.assertIs(self.panel[0].index, new_major) self.assertIs(self.panel.major_axis, new_major) - item = self.panel[0] + # TODO: unused? + item = self.panel[0] # noqa + self.panel.minor_axis = new_minor self.assertIs(self.panel[0].columns, new_minor) self.assertIs(self.panel.minor_axis, new_minor) @@ -366,7 +374,7 @@ def check_op(op, name): try: check_op(operator.truediv, 'div') except: - com.pprint_thing("Failing operation: %r" % name) + com.pprint_thing("Failing operation: %r" % 'div') raise @ignore_sparse_panel_future_warning @@ -380,13 +388,15 @@ def test_neg(self): # issue 7692 def test_raise_when_not_implemented(self): - p = Panel(np.arange(3*4*5).reshape(3,4,5), items=['ItemA','ItemB','ItemC'], - major_axis=pd.date_range('20130101',periods=4),minor_axis=list('ABCDE')) + p = Panel(np.arange(3 * 4 * 5).reshape(3, 4, 5), + items=['ItemA', 'ItemB', 'ItemC'], + major_axis=pd.date_range('20130101', periods=4), + minor_axis=list('ABCDE')) d = p.sum(axis=1).ix[0] ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'div', 'mod', 'pow'] for op in ops: with self.assertRaises(NotImplementedError): - getattr(p,op)(d, axis=0) + getattr(p, op)(d, axis=0) @ignore_sparse_panel_future_warning def test_select(self): @@ -409,7 +419,7 @@ def test_select(self): self.assert_panel_equal(result, expected) # corner case, empty thing - result = p.select(lambda x: x in ('foo',), axis='items') + result = p.select(lambda x: x in ('foo', ), axis='items') self.assert_panel_equal(result, p.reindex(items=[])) def test_get_value(self): @@ -500,8 +510,7 @@ def test_setitem(self): df2 = self.panel['ItemF'] - assert_frame_equal(df, df2.reindex(index=df.index, - columns=df.columns)) + assert_frame_equal(df, df2.reindex(index=df.index, columns=df.columns)) # scalar self.panel['ItemG'] = 1 @@ -547,15 +556,17 @@ def test_set_minor_major(self): # GH 11014 df1 = DataFrame(['a', 'a', 'a', np.nan, 'a', np.nan]) df2 = DataFrame([1.0, np.nan, 1.0, np.nan, 1.0, 1.0]) - panel = Panel({'Item1' : df1, 'Item2': df2}) + panel = Panel({'Item1': df1, 'Item2': df2}) newminor = notnull(panel.iloc[:, :, 0]) panel.loc[:, :, 'NewMinor'] = newminor - assert_frame_equal(panel.loc[:, :, 'NewMinor'], newminor.astype(object)) + assert_frame_equal(panel.loc[:, :, 'NewMinor'], + newminor.astype(object)) newmajor = notnull(panel.iloc[:, 0, :]) panel.loc[:, 'NewMajor', :] = newmajor - assert_frame_equal(panel.loc[:, 'NewMajor', :], newmajor.astype(object)) + assert_frame_equal(panel.loc[:, 'NewMajor', :], + newmajor.astype(object)) def test_major_xs(self): ref = self.panel['ItemA'] @@ -632,14 +643,11 @@ def test_getitem_fancy_labels(self): p.reindex(items=items, major=dates)) # only 1 - assert_panel_equal(p.ix[items, :, :], - p.reindex(items=items)) + assert_panel_equal(p.ix[items, :, :], p.reindex(items=items)) - assert_panel_equal(p.ix[:, dates, :], - p.reindex(major=dates)) + assert_panel_equal(p.ix[:, dates, :], p.reindex(major=dates)) - assert_panel_equal(p.ix[:, :, cols], - p.reindex(minor=cols)) + assert_panel_equal(p.ix[:, :, cols], p.reindex(minor=cols)) def test_getitem_fancy_slice(self): pass @@ -681,7 +689,6 @@ def test_getitem_fancy_xs(self): def test_getitem_fancy_xs_check_view(self): item = 'ItemB' date = self.panel.major_axis[5] - col = 'C' # make sure it's always a view NS = slice(None, None) @@ -731,10 +738,9 @@ def test_ix_align(self): assert_series_equal(df.ix[0, 0, :].reindex(b.index), b) def test_ix_frame_align(self): - from pandas import DataFrame p_orig = tm.makePanel() df = p_orig.ix[0].copy() - assert_frame_equal(p_orig['ItemA'],df) + assert_frame_equal(p_orig['ItemA'], df) p = p_orig.copy() p.ix[0, :, :] = df @@ -767,12 +773,13 @@ def test_ix_frame_align(self): p = p_orig.copy() p.ix[0, [0, 1, 3, 5], -2:] = df out = p.ix[0, [0, 1, 3, 5], -2:] - assert_frame_equal(out, df.iloc[[0,1,3,5],[2,3]]) + assert_frame_equal(out, df.iloc[[0, 1, 3, 5], [2, 3]]) # GH3830, panel assignent by values/frame - for dtype in ['float64','int64']: + for dtype in ['float64', 'int64']: - panel = Panel(np.arange(40).reshape((2,4,5)), items=['a1','a2'], dtype=dtype) + panel = Panel(np.arange(40).reshape((2, 4, 5)), + items=['a1', 'a2'], dtype=dtype) df1 = panel.iloc[0] df2 = panel.iloc[1] @@ -802,8 +809,8 @@ def _check_view(self, indexer, comp): comp(cp.ix[indexer].reindex_like(obj), obj) def test_logical_with_nas(self): - d = Panel({'ItemA': {'a': [np.nan, False]}, 'ItemB': { - 'a': [True, True]}}) + d = Panel({'ItemA': {'a': [np.nan, False]}, + 'ItemB': {'a': [True, True]}}) result = d['ItemA'] | d['ItemB'] expected = DataFrame({'a': [np.nan, True]}) @@ -884,12 +891,12 @@ def test_set_value(self): " plus the value provided"): self.panel.set_value('a') + _panel = tm.makePanel() tm.add_nans(_panel) -class TestPanel(tm.TestCase, PanelTests, CheckIndexing, - SafeForLongAndSparse, +class TestPanel(tm.TestCase, PanelTests, CheckIndexing, SafeForLongAndSparse, SafeForSparse): _multiprocess_can_split_ = True @@ -927,8 +934,7 @@ def test_constructor(self): assert_panel_equal(wp, self.panel) # strings handled prop - wp = Panel([[['foo', 'foo', 'foo', ], - ['foo', 'foo', 'foo']]]) + wp = Panel([[['foo', 'foo', 'foo', ], ['foo', 'foo', 'foo']]]) self.assertEqual(wp.values.dtype, np.object_) vals = self.panel.values @@ -943,15 +949,18 @@ def test_constructor(self): # GH #8285, test when scalar data is used to construct a Panel # if dtype is not passed, it should be inferred - value_and_dtype = [(1, 'int64'), (3.14, 'float64'), ('foo', np.object_)] + value_and_dtype = [(1, 'int64'), (3.14, 'float64'), + ('foo', np.object_)] for (val, dtype) in value_and_dtype: - wp = Panel(val, items=range(2), major_axis=range(3), minor_axis=range(4)) + wp = Panel(val, items=range(2), major_axis=range(3), + minor_axis=range(4)) vals = np.empty((2, 3, 4), dtype=dtype) vals.fill(val) assert_panel_equal(wp, Panel(vals, dtype=dtype)) # test the case when dtype is passed - wp = Panel(1, items=range(2), major_axis=range(3), minor_axis=range(4), dtype='float32') + wp = Panel(1, items=range(2), major_axis=range(3), minor_axis=range(4), + dtype='float32') vals = np.empty((2, 3, 4), dtype='float32') vals.fill(1) assert_panel_equal(wp, Panel(vals, dtype='float32')) @@ -997,25 +1006,35 @@ def _check_dtype(panel, dtype): self.assertEqual(panel[i].values.dtype.name, dtype) # only nan holding types allowed here - for dtype in ['float64','float32','object']: - panel = Panel(items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) - _check_dtype(panel,dtype) + for dtype in ['float64', 'float32', 'object']: + panel = Panel(items=lrange(2), major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) - for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) - _check_dtype(panel,dtype) + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype=dtype), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) - for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) - _check_dtype(panel,dtype) + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.array(np.random.randn(2, 10, 5), dtype='O'), + items=lrange(2), + major_axis=lrange(10), + minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) - for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.random.randn(2,10,5),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) - _check_dtype(panel,dtype) + for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: + panel = Panel(np.random.randn(2, 10, 5), items=lrange( + 2), major_axis=lrange(10), minor_axis=lrange(5), dtype=dtype) + _check_dtype(panel, dtype) for dtype in ['float64', 'float32', 'int64', 'int32', 'object']: - df1 = DataFrame(np.random.randn(2, 5), index=lrange(2), columns=lrange(5)) - df2 = DataFrame(np.random.randn(2, 5), index=lrange(2), columns=lrange(5)) + df1 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) + df2 = DataFrame(np.random.randn(2, 5), + index=lrange(2), columns=lrange(5)) panel = Panel.from_dict({'a': df1, 'b': df2}, dtype=dtype) _check_dtype(panel, dtype) @@ -1045,7 +1064,10 @@ def test_ctor_dict(self): wp = Panel.from_dict(d) wp2 = Panel.from_dict(d2) # nested Dict - wp3 = Panel.from_dict(d3) + + # TODO: unused? + wp3 = Panel.from_dict(d3) # noqa + self.assertTrue(wp.major_axis.equals(self.panel.major_axis)) assert_panel_equal(wp, wp2) @@ -1060,7 +1082,10 @@ def test_ctor_dict(self): # a pathological case d4 = {'A': None, 'B': None} - wp4 = Panel.from_dict(d4) + + # TODO: unused? + wp4 = Panel.from_dict(d4) # noqa + assert_panel_equal(Panel(d4), Panel(items=['A', 'B'])) # cast @@ -1099,8 +1124,9 @@ def test_constructor_dict_mixed(self): self.assertRaises(Exception, Panel, data) def test_ctor_orderedDict(self): - keys = list(set(np.random.randint(0,5000,100)))[:50] # unique random int keys - d = OrderedDict([(k,mkdf(10,5)) for k in keys]) + keys = list(set(np.random.randint(0, 5000, 100)))[ + :50] # unique random int keys + d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) p = Panel(d) self.assertTrue(list(p.items) == keys) @@ -1113,8 +1139,7 @@ def test_constructor_resize(self): major = self.panel.major_axis[:-1] minor = self.panel.minor_axis[:-1] - result = Panel(data, items=items, major_axis=major, - minor_axis=minor) + result = Panel(data, items=items, major_axis=major, minor_axis=minor) expected = self.panel.reindex(items=items, major=major, minor=minor) assert_panel_equal(result, expected) @@ -1134,8 +1159,7 @@ def test_from_dict_mixed_orient(self): df = tm.makeDataFrame() df['foo'] = 'bar' - data = {'k1': df, - 'k2': df} + data = {'k1': df, 'k2': df} panel = Panel.from_dict(data, orient='minor') @@ -1143,147 +1167,177 @@ def test_from_dict_mixed_orient(self): self.assertEqual(panel['A'].values.dtype, np.float64) def test_constructor_error_msgs(self): - def testit(): - Panel(np.random.randn(3,4,5), lrange(4), lrange(5), lrange(5)) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(4, 5, 5\)", testit) + Panel(np.random.randn(3, 4, 5), lrange(4), lrange(5), lrange(5)) + + assertRaisesRegexp(ValueError, + "Shape of passed values is \(3, 4, 5\), " + "indices imply \(4, 5, 5\)", + testit) def testit(): - Panel(np.random.randn(3,4,5), lrange(5), lrange(4), lrange(5)) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 4, 5\)", testit) + Panel(np.random.randn(3, 4, 5), lrange(5), lrange(4), lrange(5)) + + assertRaisesRegexp(ValueError, + "Shape of passed values is \(3, 4, 5\), " + "indices imply \(5, 4, 5\)", + testit) def testit(): - Panel(np.random.randn(3,4,5), lrange(5), lrange(5), lrange(4)) - assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 5, 4\)", testit) + Panel(np.random.randn(3, 4, 5), lrange(5), lrange(5), lrange(4)) + + assertRaisesRegexp(ValueError, + "Shape of passed values is \(3, 4, 5\), " + "indices imply \(5, 5, 4\)", + testit) def test_conform(self): df = self.panel['ItemA'][:-5].filter(items=['A', 'B']) conformed = self.panel.conform(df) - assert(conformed.index.equals(self.panel.major_axis)) - assert(conformed.columns.equals(self.panel.minor_axis)) + assert (conformed.index.equals(self.panel.major_axis)) + assert (conformed.columns.equals(self.panel.minor_axis)) def test_convert_objects(self): # GH 4937 - p = Panel(dict(A = dict(a = ['1','1.0']))) - expected = Panel(dict(A = dict(a = [1,1.0]))) + p = Panel(dict(A=dict(a=['1', '1.0']))) + expected = Panel(dict(A=dict(a=[1, 1.0]))) result = p._convert(numeric=True, coerce=True) assert_panel_equal(result, expected) def test_dtypes(self): result = self.panel.dtypes - expected = Series(np.dtype('float64'),index=self.panel.items) + expected = Series(np.dtype('float64'), index=self.panel.items) assert_series_equal(result, expected) def test_apply(self): # GH1148 - from pandas import Series,DataFrame - # ufunc applied = self.panel.apply(np.sqrt) - self.assertTrue(assert_almost_equal(applied.values, - np.sqrt(self.panel.values))) + self.assertTrue(assert_almost_equal(applied.values, np.sqrt( + self.panel.values))) # ufunc same shape - result = self.panel.apply(lambda x: x*2, axis='items') - expected = self.panel*2 + result = self.panel.apply(lambda x: x * 2, axis='items') + expected = self.panel * 2 assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x*2, axis='major_axis') - expected = self.panel*2 + result = self.panel.apply(lambda x: x * 2, axis='major_axis') + expected = self.panel * 2 assert_panel_equal(result, expected) - result = self.panel.apply(lambda x: x*2, axis='minor_axis') - expected = self.panel*2 + result = self.panel.apply(lambda x: x * 2, axis='minor_axis') + expected = self.panel * 2 assert_panel_equal(result, expected) # reduction to DataFrame result = self.panel.apply(lambda x: x.dtype, axis='items') - expected = DataFrame(np.dtype('float64'),index=self.panel.major_axis,columns=self.panel.minor_axis) - assert_frame_equal(result,expected) + expected = DataFrame(np.dtype('float64'), index=self.panel.major_axis, + columns=self.panel.minor_axis) + assert_frame_equal(result, expected) result = self.panel.apply(lambda x: x.dtype, axis='major_axis') - expected = DataFrame(np.dtype('float64'),index=self.panel.minor_axis,columns=self.panel.items) - assert_frame_equal(result,expected) + expected = DataFrame(np.dtype('float64'), index=self.panel.minor_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) result = self.panel.apply(lambda x: x.dtype, axis='minor_axis') - expected = DataFrame(np.dtype('float64'),index=self.panel.major_axis,columns=self.panel.items) - assert_frame_equal(result,expected) + expected = DataFrame(np.dtype('float64'), index=self.panel.major_axis, + columns=self.panel.items) + assert_frame_equal(result, expected) # reductions via other dims expected = self.panel.sum(0) result = self.panel.apply(lambda x: x.sum(), axis='items') - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) expected = self.panel.sum(1) result = self.panel.apply(lambda x: x.sum(), axis='major_axis') - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) expected = self.panel.sum(2) result = self.panel.apply(lambda x: x.sum(), axis='minor_axis') - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # pass kwargs result = self.panel.apply(lambda x, y: x.sum() + y, axis='items', y=5) expected = self.panel.sum(0) + 5 - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_apply_slabs(self): # same shape as original - result = self.panel.apply(lambda x: x*2, axis = ['items','major_axis']) - expected = (self.panel*2).transpose('minor_axis','major_axis','items') - assert_panel_equal(result,expected) - result = self.panel.apply(lambda x: x*2, axis = ['major_axis','items']) - assert_panel_equal(result,expected) - - result = self.panel.apply(lambda x: x*2, axis = ['items','minor_axis']) - expected = (self.panel*2).transpose('major_axis','minor_axis','items') - assert_panel_equal(result,expected) - result = self.panel.apply(lambda x: x*2, axis = ['minor_axis','items']) - assert_panel_equal(result,expected) - - result = self.panel.apply(lambda x: x*2, axis = ['major_axis','minor_axis']) - expected = self.panel*2 - assert_panel_equal(result,expected) - result = self.panel.apply(lambda x: x*2, axis = ['minor_axis','major_axis']) - assert_panel_equal(result,expected) + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'major_axis']) + expected = (self.panel * 2).transpose('minor_axis', 'major_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'items']) + assert_panel_equal(result, expected) + + result = self.panel.apply(lambda x: x * 2, + axis=['items', 'minor_axis']) + expected = (self.panel * 2).transpose('major_axis', 'minor_axis', + 'items') + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'items']) + assert_panel_equal(result, expected) + + result = self.panel.apply(lambda x: x * 2, + axis=['major_axis', 'minor_axis']) + expected = self.panel * 2 + assert_panel_equal(result, expected) + result = self.panel.apply(lambda x: x * 2, + axis=['minor_axis', 'major_axis']) + assert_panel_equal(result, expected) # reductions - result = self.panel.apply(lambda x: x.sum(0), axis = ['items','major_axis']) + result = self.panel.apply(lambda x: x.sum(0), axis=[ + 'items', 'major_axis' + ]) expected = self.panel.sum(1).T - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) - result = self.panel.apply(lambda x: x.sum(1), axis = ['items','major_axis']) + result = self.panel.apply(lambda x: x.sum(1), axis=[ + 'items', 'major_axis' + ]) expected = self.panel.sum(0) - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) # transforms - f = lambda x: ((x.T-x.mean(1))/x.std(1)).T + f = lambda x: ((x.T - x.mean(1)) / x.std(1)).T # make sure that we don't trigger any warnings with tm.assert_produces_warning(False): - result = self.panel.apply(f, axis = ['items','major_axis']) - expected = Panel(dict([ (ax,f(self.panel.loc[:,:,ax])) for ax in self.panel.minor_axis ])) - assert_panel_equal(result,expected) - - result = self.panel.apply(f, axis = ['major_axis','minor_axis']) - expected = Panel(dict([ (ax,f(self.panel.loc[ax])) for ax in self.panel.items ])) - assert_panel_equal(result,expected) + result = self.panel.apply(f, axis=['items', 'major_axis']) + expected = Panel(dict([(ax, f(self.panel.loc[:, :, ax])) + for ax in self.panel.minor_axis])) + assert_panel_equal(result, expected) + + result = self.panel.apply(f, axis=['major_axis', 'minor_axis']) + expected = Panel(dict([(ax, f(self.panel.loc[ax])) + for ax in self.panel.items])) + assert_panel_equal(result, expected) - result = self.panel.apply(f, axis = ['minor_axis','items']) - expected = Panel(dict([ (ax,f(self.panel.loc[:,ax])) for ax in self.panel.major_axis ])) - assert_panel_equal(result,expected) + result = self.panel.apply(f, axis=['minor_axis', 'items']) + expected = Panel(dict([(ax, f(self.panel.loc[:, ax])) + for ax in self.panel.major_axis])) + assert_panel_equal(result, expected) # with multi-indexes # GH7469 - index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ('two', 'a'), ('two', 'b')]) - dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape(4,3), columns=list("ABC"), index=index) - dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape(4,3), columns=list("ABC"), index=index) - p = Panel({'f':dfa, 'g':dfb}) + index = MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), ( + 'two', 'a'), ('two', 'b')]) + dfa = DataFrame(np.array(np.arange(12, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + dfb = DataFrame(np.array(np.arange(10, 22, dtype='int64')).reshape( + 4, 3), columns=list("ABC"), index=index) + p = Panel({'f': dfa, 'g': dfb}) result = p.apply(lambda x: x.sum(), axis=0) # on windows this will be in32 result = result.astype('int64') expected = p.sum(0) - assert_frame_equal(result,expected) + assert_frame_equal(result, expected) def test_apply_no_or_zero_ndim(self): # GH10332 @@ -1303,7 +1357,6 @@ def test_apply_no_or_zero_ndim(self): assert_series_equal(result_float, expected_float) assert_series_equal(result_float64, expected_float64) - def test_reindex(self): ref = self.panel['ItemB'] @@ -1317,8 +1370,8 @@ def test_reindex(self): assert_frame_equal(result['ItemB'], ref.reindex(index=new_major)) # raise exception put both major and major_axis - self.assertRaises(Exception, self.panel.reindex, - major_axis=new_major, major=new_major) + self.assertRaises(Exception, self.panel.reindex, major_axis=new_major, + major=new_major) # minor new_minor = list(self.panel.minor_axis[:2]) @@ -1327,22 +1380,21 @@ def test_reindex(self): # this ok result = self.panel.reindex() - assert_panel_equal(result,self.panel) + assert_panel_equal(result, self.panel) self.assertFalse(result is self.panel) # with filling smaller_major = self.panel.major_axis[::5] smaller = self.panel.reindex(major=smaller_major) - larger = smaller.reindex(major=self.panel.major_axis, - method='pad') + larger = smaller.reindex(major=self.panel.major_axis, method='pad') assert_frame_equal(larger.major_xs(self.panel.major_axis[1]), smaller.major_xs(smaller_major[0])) # don't necessarily copy result = self.panel.reindex(major=self.panel.major_axis, copy=False) - assert_panel_equal(result,self.panel) + assert_panel_equal(result, self.panel) self.assertTrue(result is self.panel) def test_reindex_multi(self): @@ -1350,8 +1402,7 @@ def test_reindex_multi(self): # with and without copy full reindexing result = self.panel.reindex(items=self.panel.items, major=self.panel.major_axis, - minor=self.panel.minor_axis, - copy = False) + minor=self.panel.minor_axis, copy=False) self.assertIs(result.items, self.panel.items) self.assertIs(result.major_axis, self.panel.major_axis) @@ -1359,31 +1410,36 @@ def test_reindex_multi(self): result = self.panel.reindex(items=self.panel.items, major=self.panel.major_axis, - minor=self.panel.minor_axis, - copy = False) - assert_panel_equal(result,self.panel) + minor=self.panel.minor_axis, copy=False) + assert_panel_equal(result, self.panel) # multi-axis indexing consistency # GH 5900 - df = DataFrame(np.random.randn(4,3)) - p = Panel({ 'Item1' : df }) - expected = Panel({ 'Item1' : df }) + df = DataFrame(np.random.randn(4, 3)) + p = Panel({'Item1': df}) + expected = Panel({'Item1': df}) expected['Item2'] = np.nan - items = ['Item1','Item2'] + items = ['Item1', 'Item2'] major_axis = np.arange(4) minor_axis = np.arange(3) results = [] - results.append(p.reindex(items=items, major_axis=major_axis, copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, copy=False)) - results.append(p.reindex(items=items, minor_axis=minor_axis, copy=True)) - results.append(p.reindex(items=items, minor_axis=minor_axis, copy=False)) - results.append(p.reindex(items=items, major_axis=major_axis, minor_axis=minor_axis, copy=True)) - results.append(p.reindex(items=items, major_axis=major_axis, minor_axis=minor_axis, copy=False)) + results.append(p.reindex(items=items, major_axis=major_axis, + copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + copy=False)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=True)) + results.append(p.reindex(items=items, minor_axis=minor_axis, + copy=False)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=True)) + results.append(p.reindex(items=items, major_axis=major_axis, + minor_axis=minor_axis, copy=False)) for i, r in enumerate(results): - assert_panel_equal(expected,r) + assert_panel_equal(expected, r) def test_reindex_like(self): # reindex_like @@ -1465,9 +1521,9 @@ def test_fillna(self): self.assertRaises(TypeError, self.panel.fillna, (1, 2)) # limit not implemented when only value is specified - p = Panel(np.random.randn(3,4,5)) - p.iloc[0:2,0:2,0:2] = np.nan - self.assertRaises(NotImplementedError, lambda : p.fillna(999,limit=1)) + p = Panel(np.random.randn(3, 4, 5)) + p.iloc[0:2, 0:2, 0:2] = np.nan + self.assertRaises(NotImplementedError, lambda: p.fillna(999, limit=1)) def test_ffill_bfill(self): assert_panel_equal(self.panel.ffill(), @@ -1504,7 +1560,7 @@ def test_swapaxes(self): # this works, but return a copy result = self.panel.swapaxes('items', 'items') - assert_panel_equal(self.panel,result) + assert_panel_equal(self.panel, result) self.assertNotEqual(id(self.panel), id(result)) def test_transpose(self): @@ -1528,11 +1584,13 @@ def test_transpose(self): assert_panel_equal(result, expected) # duplicate axes - with tm.assertRaisesRegexp(TypeError, 'not enough/duplicate arguments'): + with tm.assertRaisesRegexp(TypeError, + 'not enough/duplicate arguments'): self.panel.transpose('minor', maj='major', minor='items') with tm.assertRaisesRegexp(ValueError, 'repeated axis in transpose'): - self.panel.transpose('minor', 'major', major='minor', minor='items') + self.panel.transpose('minor', 'major', major='minor', + minor='items') result = self.panel.transpose(2, 1, 0) assert_panel_equal(result, expected) @@ -1597,7 +1655,8 @@ def test_to_frame_mixed(self): lp = panel.to_frame() wp = lp.to_panel() self.assertEqual(wp['bool'].values.dtype, np.bool_) - # Previously, this was mutating the underlying index and changing its name + # Previously, this was mutating the underlying index and changing its + # name assert_frame_equal(wp['bool'], panel['bool'], check_names=False) # GH 8704 @@ -1610,23 +1669,28 @@ def test_to_frame_mixed(self): p = df.to_panel() expected = panel.copy() expected['category'] = 'foo' - assert_panel_equal(p,expected) + assert_panel_equal(p, expected) def test_to_frame_multi_major(self): - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), - (2, 'two')]) + idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two')]) df = DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'c', 1], [4, 'd', 1]], columns=['A', 'B', 'C'], index=idx) wp = Panel({'i1': df, 'i2': df}) - expected_idx = MultiIndex.from_tuples([(1, 'one', 'A'), (1, 'one', 'B'), - (1, 'one', 'C'), (1, 'two', 'A'), - (1, 'two', 'B'), (1, 'two', 'C'), - (2, 'one', 'A'), (2, 'one', 'B'), - (2, 'one', 'C'), (2, 'two', 'A'), - (2, 'two', 'B'), (2, 'two', 'C')], - names=[None, None, 'minor']) - expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, 'c', 1, 4, 'd', 1], - 'i2': [1, 'a', 1, 2, 'b', 1, 3, 'c', 1, 4, 'd', 1]}, + expected_idx = MultiIndex.from_tuples( + [ + (1, 'one', 'A'), (1, 'one', 'B'), + (1, 'one', 'C'), (1, 'two', 'A'), + (1, 'two', 'B'), (1, 'two', 'C'), + (2, 'one', 'A'), (2, 'one', 'B'), + (2, 'one', 'C'), (2, 'two', 'A'), + (2, 'two', 'B'), (2, 'two', 'C') + ], + names=[None, None, 'minor']) + expected = DataFrame({'i1': [1, 'a', 1, 2, 'b', 1, 3, + 'c', 1, 4, 'd', 1], + 'i2': [1, 'a', 1, 2, 'b', + 1, 3, 'c', 1, 4, 'd', 1]}, index=expected_idx) result = wp.to_frame() assert_frame_equal(result, expected) @@ -1635,17 +1699,23 @@ def test_to_frame_multi_major(self): result = wp.to_frame() assert_frame_equal(result, expected[1:]) - idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'), - (np.nan, 'two')]) + idx = MultiIndex.from_tuples([(1, 'two'), (1, 'one'), (2, 'one'), ( + np.nan, 'two')]) df = DataFrame([[1, 'a', 1], [2, 'b', 1], [3, 'c', 1], [4, 'd', 1]], columns=['A', 'B', 'C'], index=idx) wp = Panel({'i1': df, 'i2': df}) - ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), (1, 'two', 'C'), - (1, 'one', 'A'), (1, 'one', 'B'), (1, 'one', 'C'), - (2, 'one', 'A'), (2, 'one', 'B'), (2, 'one', 'C'), - (np.nan, 'two', 'A'), (np.nan, 'two', 'B'), + ex_idx = MultiIndex.from_tuples([(1, 'two', 'A'), (1, 'two', 'B'), + (1, 'two', 'C'), + (1, 'one', 'A'), + (1, 'one', 'B'), + (1, 'one', 'C'), + (2, 'one', 'A'), + (2, 'one', 'B'), + (2, 'one', 'C'), + (np.nan, 'two', 'A'), + (np.nan, 'two', 'B'), (np.nan, 'two', 'C')], - names=[None, None, 'minor']) + names=[None, None, 'minor']) expected.index = ex_idx result = wp.to_frame() assert_frame_equal(result, expected) @@ -1653,31 +1723,33 @@ def test_to_frame_multi_major(self): def test_to_frame_multi_major_minor(self): cols = MultiIndex(levels=[['C_A', 'C_B'], ['C_1', 'C_2']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) - idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), - (2, 'two'), (3, 'three'), (4, 'four')]) - df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], ['a', 'b', 'w', 'x'], - ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], [-5, -6, -7, -8] - ], columns=cols, index=idx) + idx = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), ( + 2, 'two'), (3, 'three'), (4, 'four')]) + df = DataFrame([[1, 2, 11, 12], [3, 4, 13, 14], + ['a', 'b', 'w', 'x'], + ['c', 'd', 'y', 'z'], [-1, -2, -3, -4], + [-5, -6, -7, -8]], columns=cols, index=idx) wp = Panel({'i1': df, 'i2': df}) - exp_idx = MultiIndex.from_tuples([(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), - (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), - (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), - (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), - (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), - (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), - (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), - (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), - (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), - (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), - (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), - (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], - names=[None, None, None, None]) - exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], [3, 3], [4, 4], [13, 13], - [14, 14], ['a', 'a'], ['b', 'b'], ['w', 'w'], ['x', 'x'], - ['c', 'c'], ['d', 'd'], ['y', 'y'], ['z', 'z'], [-1, -1], - [-2, -2], [-3, -3], [-4, -4], [-5, -5], [-6, -6], [-7, -7], - [-8, -8]] + exp_idx = MultiIndex.from_tuples( + [(1, 'one', 'C_A', 'C_1'), (1, 'one', 'C_A', 'C_2'), + (1, 'one', 'C_B', 'C_1'), (1, 'one', 'C_B', 'C_2'), + (1, 'two', 'C_A', 'C_1'), (1, 'two', 'C_A', 'C_2'), + (1, 'two', 'C_B', 'C_1'), (1, 'two', 'C_B', 'C_2'), + (2, 'one', 'C_A', 'C_1'), (2, 'one', 'C_A', 'C_2'), + (2, 'one', 'C_B', 'C_1'), (2, 'one', 'C_B', 'C_2'), + (2, 'two', 'C_A', 'C_1'), (2, 'two', 'C_A', 'C_2'), + (2, 'two', 'C_B', 'C_1'), (2, 'two', 'C_B', 'C_2'), + (3, 'three', 'C_A', 'C_1'), (3, 'three', 'C_A', 'C_2'), + (3, 'three', 'C_B', 'C_1'), (3, 'three', 'C_B', 'C_2'), + (4, 'four', 'C_A', 'C_1'), (4, 'four', 'C_A', 'C_2'), + (4, 'four', 'C_B', 'C_1'), (4, 'four', 'C_B', 'C_2')], + names=[None, None, None, None]) + exp_val = [[1, 1], [2, 2], [11, 11], [12, 12], [3, 3], [4, 4], + [13, 13], [14, 14], ['a', 'a'], ['b', 'b'], ['w', 'w'], + ['x', 'x'], ['c', 'c'], ['d', 'd'], ['y', 'y'], ['z', 'z'], + [-1, -1], [-2, -2], [-3, -3], [-4, -4], [-5, -5], [-6, -6], + [-7, -7], [-8, -8]] result = wp.to_frame() expected = DataFrame(exp_val, columns=['i1', 'i2'], index=exp_idx) assert_frame_equal(result, expected) @@ -1724,8 +1796,8 @@ def test_panel_dups(self): result = panel.loc['E'] assert_frame_equal(result, expected) - expected = no_dup_panel.loc[['A','B']] - expected.items = ['A','A'] + expected = no_dup_panel.loc[['A', 'B']] + expected.items = ['A', 'A'] result = panel.loc['A'] assert_panel_equal(result, expected) @@ -1734,17 +1806,17 @@ def test_panel_dups(self): no_dup_panel = Panel(data, major_axis=list("ABCDE")) panel = Panel(data, major_axis=list("AACDE")) - expected = no_dup_panel.loc[:,'A'] - result = panel.iloc[:,0] + expected = no_dup_panel.loc[:, 'A'] + result = panel.iloc[:, 0] assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:,'E'] - result = panel.loc[:,'E'] + expected = no_dup_panel.loc[:, 'E'] + result = panel.loc[:, 'E'] assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:,['A','B']] - expected.major_axis = ['A','A'] - result = panel.loc[:,'A'] + expected = no_dup_panel.loc[:, ['A', 'B']] + expected.major_axis = ['A', 'A'] + result = panel.loc[:, 'A'] assert_panel_equal(result, expected) # minor @@ -1752,17 +1824,17 @@ def test_panel_dups(self): no_dup_panel = Panel(data, minor_axis=list("ABCDE")) panel = Panel(data, minor_axis=list("AACDE")) - expected = no_dup_panel.loc[:,:,'A'] - result = panel.iloc[:,:,0] + expected = no_dup_panel.loc[:, :, 'A'] + result = panel.iloc[:, :, 0] assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:,:,'E'] - result = panel.loc[:,:,'E'] + expected = no_dup_panel.loc[:, :, 'E'] + result = panel.loc[:, :, 'E'] assert_frame_equal(result, expected) - expected = no_dup_panel.loc[:,:,['A','B']] - expected.minor_axis = ['A','A'] - result = panel.loc[:,:,'A'] + expected = no_dup_panel.loc[:, :, ['A', 'B']] + expected.minor_axis = ['A', 'A'] + result = panel.loc[:, :, 'A'] assert_panel_equal(result, expected) def test_filter(self): @@ -1780,22 +1852,19 @@ def test_shift(self): idx = self.panel.major_axis[0] idx_lag = self.panel.major_axis[1] shifted = self.panel.shift(1) - assert_frame_equal(self.panel.major_xs(idx), - shifted.major_xs(idx_lag)) + assert_frame_equal(self.panel.major_xs(idx), shifted.major_xs(idx_lag)) # minor idx = self.panel.minor_axis[0] idx_lag = self.panel.minor_axis[1] shifted = self.panel.shift(1, axis='minor') - assert_frame_equal(self.panel.minor_xs(idx), - shifted.minor_xs(idx_lag)) + assert_frame_equal(self.panel.minor_xs(idx), shifted.minor_xs(idx_lag)) # items idx = self.panel.items[0] idx_lag = self.panel.items[1] shifted = self.panel.shift(1, axis='items') - assert_frame_equal(self.panel[idx], - shifted[idx_lag]) + assert_frame_equal(self.panel[idx], shifted[idx_lag]) # negative numbers, #2164 result = self.panel.shift(-1) @@ -1804,7 +1873,7 @@ def test_shift(self): assert_panel_equal(result, expected) # mixed dtypes #6959 - data = [('item '+ch, makeMixedDataFrame()) for ch in list('abcde')] + data = [('item ' + ch, makeMixedDataFrame()) for ch in list('abcde')] data = dict(data) mixed_panel = Panel.from_dict(data, orient='minor') shifted = mixed_panel.shift(1) @@ -1836,10 +1905,9 @@ def test_tshift(self): shifted2 = panel.tshift(freq=panel.major_axis.freq) assert_panel_equal(shifted, shifted2) - inferred_ts = Panel(panel.values, - items=panel.items, - major_axis=Index(np.asarray(panel.major_axis)), - minor_axis=panel.minor_axis) + inferred_ts = Panel(panel.values, items=panel.items, + major_axis=Index(np.asarray(panel.major_axis)), + minor_axis=panel.minor_axis) shifted = inferred_ts.tshift(1) unshifted = shifted.tshift(-1) assert_panel_equal(shifted, panel.tshift(1)) @@ -1886,9 +1954,9 @@ def test_pct_change(self): expected = Panel({'i1': DataFrame({'c1': [np.nan, np.nan, np.nan], 'c2': [np.nan, np.nan, np.nan]}), 'i2': DataFrame({'c1': [1, 0.5, .2], - 'c2': [1./3, 0.25, 1./6]}), - 'i3': DataFrame({'c1': [.5, 1./3, 1./6], - 'c2': [.25, .2, 1./7]})}) + 'c2': [1. / 3, 0.25, 1. / 6]}), + 'i3': DataFrame({'c1': [.5, 1. / 3, 1. / 6], + 'c2': [.25, .2, 1. / 7]})}) assert_panel_equal(result, expected) result = wp.pct_change(axis=0) assert_panel_equal(result, expected) @@ -1899,25 +1967,25 @@ def test_pct_change(self): 'i2': DataFrame({'c1': [np.nan, np.nan, np.nan], 'c2': [np.nan, np.nan, np.nan]}), 'i3': DataFrame({'c1': [2, 1, .4], - 'c2': [2./3, .5, 1./3]})}) + 'c2': [2. / 3, .5, 1. / 3]})}) assert_panel_equal(result, expected) - + def test_round(self): - values = [[[-3.2,2.2],[0,-4.8213],[3.123,123.12], - [-1566.213,88.88],[-12,94.5]], - [[-5.82,3.5],[6.21,-73.272], [-9.087,23.12], - [272.212,-99.99],[23,-76.5]]] - evalues = [[[float(np.around(i)) for i in j] for j in k] for k in values] + values = [[[-3.2, 2.2], [0, -4.8213], [3.123, 123.12], + [-1566.213, 88.88], [-12, 94.5]], + [[-5.82, 3.5], [6.21, -73.272], [-9.087, 23.12], + [272.212, -99.99], [23, -76.5]]] + evalues = [[[float(np.around(i)) for i in j] for j in k] + for k in values] p = Panel(values, items=['Item1', 'Item2'], major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A','B']) + minor_axis=['A', 'B']) expected = Panel(evalues, items=['Item1', 'Item2'], major_axis=pd.date_range('1/1/2000', periods=5), - minor_axis=['A','B']) + minor_axis=['A', 'B']) result = p.round() self.assert_panel_equal(expected, result) - def test_multiindex_get(self): ind = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)], names=['first', 'second']) @@ -1951,11 +2019,7 @@ def test_repr_empty(self): repr(empty) def test_rename(self): - mapper = { - 'ItemA': 'foo', - 'ItemB': 'bar', - 'ItemC': 'baz' - } + mapper = {'ItemA': 'foo', 'ItemB': 'bar', 'ItemC': 'baz'} renamed = self.panel.rename_axis(mapper, axis=0) exp = Index(['foo', 'bar', 'baz']) @@ -1979,21 +2043,19 @@ def test_get_attr(self): self.panel['i'] = self.panel['ItemA'] assert_frame_equal(self.panel['i'], self.panel.i) - def test_from_frame_level1_unsorted(self): - tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), - ('AAPL', 1), ('MSFT', 1)] + tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2), ('AAPL', 1), + ('MSFT', 1)] midx = MultiIndex.from_tuples(tuples) df = DataFrame(np.random.rand(5, 4), index=midx) p = df.to_panel() assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index()) def test_to_excel(self): - import os try: - import xlwt - import xlrd - import openpyxl + import xlwt # noqa + import xlrd # noqa + import openpyxl # noqa from pandas.io.excel import ExcelFile except ImportError: raise nose.SkipTest("need xlwt xlrd openpyxl") @@ -2013,8 +2075,8 @@ def test_to_excel(self): def test_to_excel_xlsxwriter(self): try: - import xlrd - import xlsxwriter + import xlrd # noqa + import xlsxwriter # noqa from pandas.io.excel import ExcelFile except ImportError: raise nose.SkipTest("Requires xlrd and xlsxwriter. Skipping test.") @@ -2112,135 +2174,96 @@ def check_drop(drop_val, axis_number, aliases, expected): check_drop("B", 2, ['minor_axis', 'minor'], expected) def test_update(self): - pan = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - other = Panel([[[3.6, 2., np.nan], - [np.nan, np.nan, 7]]], items=[1]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) pan.update(other) - expected = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[3.6, 2., 3], - [1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[3.6, 2., 3], [1.5, np.nan, 7], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) assert_panel_equal(pan, expected) def test_update_from_dict(self): - pan = Panel({'one': DataFrame([[1.5, np.nan, 3], - [1.5, np.nan, 3], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]), - 'two': DataFrame([[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]])}) - - other = {'two': DataFrame([[3.6, 2., np.nan], - [np.nan, np.nan, 7]])} + pan = Panel({'one': DataFrame([[1.5, np.nan, 3], [1.5, np.nan, 3], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]]), + 'two': DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]])}) + + other = {'two': DataFrame([[3.6, 2., np.nan], [np.nan, np.nan, 7]])} pan.update(other) - expected = Panel({'two': DataFrame([[3.6, 2., 3], - [1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]), - 'one': DataFrame([[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]])}) + expected = Panel( + {'two': DataFrame([[3.6, 2., 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]]), + 'one': DataFrame([[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]])}) assert_panel_equal(pan, expected) def test_update_nooverwrite(self): - pan = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - other = Panel([[[3.6, 2., np.nan], - [np.nan, np.nan, 7]]], items=[1]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) pan.update(other, overwrite=False) - expected = Panel([[[1.5, np.nan, 3], - [1.5, np.nan, 3], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, 2., 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3], [1.5, np.nan, 3], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, 2., 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) assert_panel_equal(pan, expected) def test_update_filtered(self): - pan = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - other = Panel([[[3.6, 2., np.nan], - [np.nan, np.nan, 7]]], items=[1]) + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + other = Panel([[[3.6, 2., np.nan], [np.nan, np.nan, 7]]], items=[1]) pan.update(other, filter_func=lambda x: x > 2) - expected = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3], - [1.5, np.nan, 7], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) + expected = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]], + [[1.5, np.nan, 3], [1.5, np.nan, 7], + [1.5, np.nan, 3.], [1.5, np.nan, 3.]]]) assert_panel_equal(pan, expected) def test_update_raise(self): - pan = Panel([[[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]], - [[1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.], - [1.5, np.nan, 3.]]]) - - np.testing.assert_raises(Exception, pan.update, *(pan,), + pan = Panel([[[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]], + [[1.5, np.nan, 3.], [1.5, np.nan, 3.], [1.5, np.nan, 3.], + [1.5, np.nan, 3.]]]) + + np.testing.assert_raises(Exception, pan.update, *(pan, ), **{'raise_conflict': True}) def test_all_any(self): - self.assertTrue((self.panel.all(axis=0).values == - nanall(self.panel, axis=0)).all()) - self.assertTrue((self.panel.all(axis=1).values == - nanall(self.panel, axis=1).T).all()) - self.assertTrue((self.panel.all(axis=2).values == - nanall(self.panel, axis=2).T).all()) - self.assertTrue((self.panel.any(axis=0).values == - nanany(self.panel, axis=0)).all()) - self.assertTrue((self.panel.any(axis=1).values == - nanany(self.panel, axis=1).T).all()) - self.assertTrue((self.panel.any(axis=2).values == - nanany(self.panel, axis=2).T).all()) + self.assertTrue((self.panel.all(axis=0).values == nanall( + self.panel, axis=0)).all()) + self.assertTrue((self.panel.all(axis=1).values == nanall( + self.panel, axis=1).T).all()) + self.assertTrue((self.panel.all(axis=2).values == nanall( + self.panel, axis=2).T).all()) + self.assertTrue((self.panel.any(axis=0).values == nanany( + self.panel, axis=0)).all()) + self.assertTrue((self.panel.any(axis=1).values == nanany( + self.panel, axis=1).T).all()) + self.assertTrue((self.panel.any(axis=2).values == nanany( + self.panel, axis=2).T).all()) def test_all_any_unhandled(self): self.assertRaises(NotImplementedError, self.panel.all, bool_only=True) @@ -2274,8 +2297,8 @@ def test_ops_differently_indexed(self): # careful, mutation self.panel['foo'] = lp2['ItemA'] - assert_series_equal(self.panel['foo'].reindex(lp2.index), - lp2['ItemA'], check_names=False) + assert_series_equal(self.panel['foo'].reindex(lp2.index), lp2['ItemA'], + check_names=False) def test_ops_scalar(self): result = self.panel.mul(2) @@ -2325,7 +2348,7 @@ def test_arith_flex_panel(self): aliases = {'div': 'truediv'} self.panel = self.panel.to_panel() - for n in [ np.random.randint(-50, -1), np.random.randint(1, 50), 0]: + for n in [np.random.randint(-50, -1), np.random.randint(1, 50), 0]: for op in ops: alias = aliases.get(op, op) f = getattr(operator, alias) @@ -2361,17 +2384,20 @@ def test_truncate(self): trunced = self.panel.truncate(start, end).to_panel() expected = self.panel.to_panel()['ItemA'].truncate(start, end) - assert_frame_equal(trunced['ItemA'], expected, check_names=False) # TODO trucate drops index.names + # TODO trucate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) trunced = self.panel.truncate(before=start).to_panel() expected = self.panel.to_panel()['ItemA'].truncate(before=start) - assert_frame_equal(trunced['ItemA'], expected, check_names=False) # TODO trucate drops index.names + # TODO trucate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) trunced = self.panel.truncate(after=end).to_panel() expected = self.panel.to_panel()['ItemA'].truncate(after=end) - assert_frame_equal(trunced['ItemA'], expected, check_names=False) # TODO trucate drops index.names + # TODO trucate drops index.names + assert_frame_equal(trunced['ItemA'], expected, check_names=False) # truncate on dates that aren't in there wp = self.panel.to_panel() @@ -2401,10 +2427,7 @@ def test_axis_dummies(self): self.assertEqual(len(major_dummies.columns), len(self.panel.index.levels[0])) - mapping = {'A': 'one', - 'B': 'one', - 'C': 'two', - 'D': 'two'} + mapping = {'A': 'one', 'B': 'one', 'C': 'two', 'D': 'two'} transformed = make_axis_dummies(self.panel, 'minor', transform=mapping.get) @@ -2502,9 +2525,10 @@ def _monotonic(arr): def test_panel_index(): index = panelm.panel_index([1, 2, 3, 4], [1, 2, 3]) - expected = MultiIndex.from_arrays([np.tile([1, 2, 3, 4], 3), - np.repeat([1, 2, 3], 4)]) - assert(index.equals(expected)) + expected = MultiIndex.from_arrays([np.tile( + [1, 2, 3, 4], 3), np.repeat( + [1, 2, 3], 4)]) + assert (index.equals(expected)) def test_import_warnings(): @@ -2513,7 +2537,7 @@ def test_import_warnings(): with assert_produces_warning(): panel.major_xs(1, copy=False) + if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 3772d4b9c272b..6238f13864552 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -1,19 +1,17 @@ # -*- coding: utf-8 -*- from datetime import datetime from pandas.compat import range, lrange -import os import operator import nose import numpy as np -from pandas import Series, DataFrame, Index, isnull, notnull, pivot, MultiIndex +from pandas import Series, Index, isnull, notnull from pandas.core.datetools import bday from pandas.core.panel import Panel from pandas.core.panel4d import Panel4D from pandas.core.series import remove_na import pandas.core.common as com -import pandas.core.panel as panelmod from pandas import compat from pandas.util.testing import (assert_panel_equal, @@ -22,7 +20,6 @@ assert_series_equal, assert_almost_equal) import pandas.util.testing as tm -import pandas.compat as compat def add_nans(panel4d): @@ -36,7 +33,7 @@ class SafeForLongAndSparse(object): _multiprocess_can_split_ = True def test_repr(self): - foo = repr(self.panel4d) + repr(self.panel4d) def test_iter(self): tm.equalContents(list(self.panel4d), self.panel4d.labels) @@ -102,7 +99,7 @@ def test_sem(self): def alt(x): if len(x) < 2: return np.nan - return np.std(x, ddof=1)/np.sqrt(len(x)) + return np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt) # def test_skew(self): @@ -170,12 +167,18 @@ def test_get_axis(self): def test_set_axis(self): new_labels = Index(np.arange(len(self.panel4d.labels))) - new_items = Index(np.arange(len(self.panel4d.items))) + + # TODO: unused? + # new_items = Index(np.arange(len(self.panel4d.items))) + new_major = Index(np.arange(len(self.panel4d.major_axis))) new_minor = Index(np.arange(len(self.panel4d.minor_axis))) # ensure propagate to potentially prior-cached items too - label = self.panel4d['l1'] + + # TODO: unused? + # label = self.panel4d['l1'] + self.panel4d.labels = new_labels if hasattr(self.panel4d, '_item_cache'): @@ -343,7 +346,7 @@ def test_delitem_and_pop(self): assert_panel_equal(panel4dc[0], panel4d[0]) def test_setitem(self): - ## LongPanel with one item + # LongPanel with one item # lp = self.panel.filter(['ItemA', 'ItemB']).to_frame() # self.assertRaises(Exception, self.panel.__setitem__, # 'ItemE', lp) @@ -379,23 +382,24 @@ def test_setitem_by_indexer(self): # Panel panel4dc = self.panel4d.copy() p = panel4dc.iloc[0] + def func(): self.panel4d.iloc[0] = p self.assertRaises(NotImplementedError, func) # DataFrame panel4dc = self.panel4d.copy() - df = panel4dc.iloc[0,0] + df = panel4dc.iloc[0, 0] df.iloc[:] = 1 - panel4dc.iloc[0,0] = df - self.assertTrue((panel4dc.iloc[0,0].values == 1).all()) + panel4dc.iloc[0, 0] = df + self.assertTrue((panel4dc.iloc[0, 0].values == 1).all()) # Series panel4dc = self.panel4d.copy() - s = panel4dc.iloc[0,0,:,0] + s = panel4dc.iloc[0, 0, :, 0] s.iloc[:] = 1 - panel4dc.iloc[0,0,:,0] = s - self.assertTrue((panel4dc.iloc[0,0,:,0].values == 1).all()) + panel4dc.iloc[0, 0, :, 0] = s + self.assertTrue((panel4dc.iloc[0, 0, :, 0].values == 1).all()) # scalar panel4dc = self.panel4d.copy() @@ -419,8 +423,6 @@ def test_setitem_by_indexer_mixed_type(self): self.assertTrue(panel4dc.iloc[1].values.all()) self.assertTrue((panel4dc.iloc[2].values == 'foo').all()) - - def test_comparisons(self): p1 = tm.makePanel4D() p2 = tm.makePanel4D() @@ -459,7 +461,8 @@ def test_setitem_ndarray(self): # offset=datetools.MonthEnd()) # lons_coarse = np.linspace(-177.5, 177.5, 72) # lats_coarse = np.linspace(-87.5, 87.5, 36) - # P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse) + # P = Panel(items=timeidx, major_axis=lons_coarse, + # minor_axis=lats_coarse) # data = np.random.randn(72*36).reshape((72,36)) # key = datetime(2009,2,28) # P[key] = data# @@ -472,7 +475,8 @@ def test_major_xs(self): idx = self.panel4d.major_axis[5] xs = self.panel4d.major_xs(idx) - assert_series_equal(xs['l1'].T['ItemA'], ref.xs(idx), check_names=False) + assert_series_equal(xs['l1'].T['ItemA'], + ref.xs(idx), check_names=False) # not contained idx = self.panel4d.major_axis[0] - bday @@ -527,11 +531,13 @@ def test_getitem_fancy_labels(self): # all 4 specified assert_panel4d_equal(panel4d.ix[labels, items, dates, cols], - panel4d.reindex(labels=labels, items=items, major=dates, minor=cols)) + panel4d.reindex(labels=labels, items=items, + major=dates, minor=cols)) # 3 specified assert_panel4d_equal(panel4d.ix[:, items, dates, cols], - panel4d.reindex(items=items, major=dates, minor=cols)) + panel4d.reindex(items=items, major=dates, + minor=cols)) # 2 specified assert_panel4d_equal(panel4d.ix[:, :, dates, cols], @@ -632,15 +638,18 @@ def test_constructor(self): # GH #8285, test when scalar data is used to construct a Panel4D # if dtype is not passed, it should be inferred - value_and_dtype = [(1, 'int64'), (3.14, 'float64'), ('foo', np.object_)] + value_and_dtype = [(1, 'int64'), (3.14, 'float64'), + ('foo', np.object_)] for (val, dtype) in value_and_dtype: - panel4d = Panel4D(val, labels=range(2), items=range(3), major_axis=range(4), minor_axis=range(5)) + panel4d = Panel4D(val, labels=range(2), items=range( + 3), major_axis=range(4), minor_axis=range(5)) vals = np.empty((2, 3, 4, 5), dtype=dtype) vals.fill(val) assert_panel4d_equal(panel4d, Panel4D(vals, dtype=dtype)) # test the case when dtype is passed - panel4d = Panel4D(1, labels=range(2), items=range(3), major_axis=range(4), minor_axis=range(5), dtype='float32') + panel4d = Panel4D(1, labels=range(2), items=range( + 3), major_axis=range(4), minor_axis=range(5), dtype='float32') vals = np.empty((2, 3, 4, 5), dtype='float32') vals.fill(1) assert_panel4d_equal(panel4d, Panel4D(vals, dtype='float32')) @@ -829,7 +838,7 @@ def test_reindex(self): # don't necessarily copy result = self.panel4d.reindex() - assert_panel4d_equal(result,self.panel4d) + assert_panel4d_equal(result, self.panel4d) self.assertFalse(result is self.panel4d) # with filling @@ -845,7 +854,7 @@ def test_reindex(self): # don't necessarily copy result = self.panel4d.reindex( major=self.panel4d.major_axis, copy=False) - assert_panel4d_equal(result,self.panel4d) + assert_panel4d_equal(result, self.panel4d) self.assertTrue(result is self.panel4d) def test_not_hashable(self): @@ -913,7 +922,8 @@ def test_fillna(self): filled = self.panel4d.fillna(0) self.assertTrue(np.isfinite(filled.values).all()) - self.assertRaises(NotImplementedError, self.panel4d.fillna, method='pad') + self.assertRaises(NotImplementedError, + self.panel4d.fillna, method='pad') def test_swapaxes(self): result = self.panel4d.swapaxes('labels', 'items') @@ -937,7 +947,7 @@ def test_swapaxes(self): # this works, but return a copy result = self.panel4d.swapaxes('items', 'items') - assert_panel4d_equal(self.panel4d,result) + assert_panel4d_equal(self.panel4d, result) self.assertNotEqual(id(self.panel4d), id(result)) def test_to_frame(self): @@ -1001,7 +1011,7 @@ def test_apply(self): def test_dtypes(self): result = self.panel4d.dtypes - expected = Series(np.dtype('float64'),index=self.panel4d.labels) + expected = Series(np.dtype('float64'), index=self.panel4d.labels) assert_series_equal(result, expected) def test_compound(self): @@ -1090,7 +1100,6 @@ def test_rename(self): def test_get_attr(self): assert_panel_equal(self.panel4d['l1'], self.panel4d.l1) - def test_from_frame_level1_unsorted(self): raise nose.SkipTest("skipping for now") @@ -1099,6 +1108,5 @@ def test_to_excel(self): if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 67d015b940885..ac497bc580585 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -1,21 +1,10 @@ # -*- coding: utf-8 -*- -from datetime import datetime -import os -import operator import nose -import numpy as np - from pandas.core import panelnd from pandas.core.panel import Panel -import pandas.core.common as com -from pandas import compat - -from pandas.util.testing import (assert_panel_equal, - assert_panel4d_equal, - assert_frame_equal, - assert_series_equal, - assert_almost_equal) + +from pandas.util.testing import assert_panel_equal import pandas.util.testing as tm @@ -36,7 +25,7 @@ def test_4d_construction(self): aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2) - p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) + p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) # noqa def test_4d_construction_alt(self): @@ -50,7 +39,7 @@ def test_4d_construction_alt(self): aliases={'major': 'major_axis', 'minor': 'minor_axis'}, stat_axis=2) - p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) + p4d = Panel4D(dict(L1=tm.makePanel(), L2=tm.makePanel())) # noqa def test_4d_construction_error(self): @@ -106,6 +95,5 @@ def test_5d_construction(self): # expected = if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 2961301366188..6de589f87cfd8 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -1,10 +1,5 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=W0612,E1101 -from copy import deepcopy -from datetime import datetime, timedelta -import operator -import os - import nose from pandas import DataFrame, Series @@ -16,10 +11,9 @@ from pandas.util.testing import assert_frame_equal -from pandas.core.reshape import (melt, lreshape, get_dummies, - wide_to_long) +from pandas.core.reshape import (melt, lreshape, get_dummies, wide_to_long) import pandas.util.testing as tm -from pandas.compat import StringIO, cPickle, range, u +from pandas.compat import range, u _multiprocess_can_split_ = True @@ -34,9 +28,9 @@ def setUp(self): self.var_name = 'var' self.value_name = 'val' - self.df1 = pd.DataFrame([[ 1.067683, -1.110463, 0.20867 ], - [-1.321405, 0.368915, -1.055342], - [-0.807333, 0.08298 , -0.873361]]) + self.df1 = pd.DataFrame([[1.067683, -1.110463, 0.20867 + ], [-1.321405, 0.368915, -1.055342], + [-0.807333, 0.08298, -0.873361]]) self.df1.columns = [list('ABC'), list('abc')] self.df1.columns.names = ['CAP', 'low'] @@ -45,10 +39,12 @@ def test_default_col_names(self): self.assertEqual(result.columns.tolist(), ['variable', 'value']) result1 = melt(self.df, id_vars=['id1']) - self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value']) + self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value' + ]) result2 = melt(self.df, id_vars=['id1', 'id2']) - self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value']) + self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', + 'value']) def test_value_vars(self): result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A') @@ -57,8 +53,9 @@ def test_value_vars(self): result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, - 'variable': ['A']*10 + ['B']*10, - 'value': self.df['A'].tolist() + self.df['B'].tolist()}, + 'variable': ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) @@ -70,18 +67,21 @@ def test_custom_var_name(self): self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value']) result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name) - self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value']) + self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', + 'value']) - result8 = melt(self.df, id_vars=['id1', 'id2'], - value_vars='A', var_name=self.var_name) - self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value']) + result8 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name) + self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', + 'value']) - result9 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=self.var_name) + result9 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name) expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, - self.var_name: ['A']*10 + ['B']*10, - 'value': self.df['A'].tolist() + self.df['B'].tolist()}, + self.var_name: ['A'] * 10 + ['B'] * 10, + 'value': (self.df['A'].tolist() + + self.df['B'].tolist())}, columns=['id1', 'id2', self.var_name, 'value']) tm.assert_frame_equal(result9, expected9) @@ -92,45 +92,56 @@ def test_custom_value_name(self): result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name) self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val']) - result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name) - self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val']) + result12 = melt(self.df, id_vars=['id1', 'id2'], + value_name=self.value_name) + self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', + 'val']) - result13 = melt(self.df, id_vars=['id1', 'id2'], - value_vars='A', value_name=self.value_name) - self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) + result13 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', + value_name=self.value_name) + self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', + 'val']) - result14 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], value_name=self.value_name) + result14 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], + value_name=self.value_name) expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, - 'variable': ['A']*10 + ['B']*10, - self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', 'variable', self.value_name]) + 'variable': ['A'] * 10 + ['B'] * 10, + self.value_name: (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', 'variable', + self.value_name]) tm.assert_frame_equal(result14, expected14) def test_custom_var_and_value_name(self): - result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name) + result15 = melt(self.df, var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result15.columns.tolist(), ['var', 'val']) - result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name) + result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, + value_name=self.value_name) self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val']) result17 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val']) + self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val' + ]) - result18 = melt(self.df, id_vars=['id1', 'id2'], - value_vars='A', var_name=self.var_name, value_name=self.value_name) - self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) + result18 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A', + var_name=self.var_name, value_name=self.value_name) + self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val' + ]) - result19 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) + result19 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'], + var_name=self.var_name, value_name=self.value_name) expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, - self.var_name: ['A']*10 + ['B']*10, - self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', self.var_name, self.value_name]) + self.var_name: ['A'] * 10 + ['B'] * 10, + self.value_name: (self.df['A'].tolist() + + self.df['B'].tolist())}, + columns=['id1', 'id2', self.var_name, + self.value_name]) tm.assert_frame_equal(result19, expected19) df20 = self.df.copy() @@ -142,7 +153,7 @@ def test_col_level(self): res1 = melt(self.df1, col_level=0) res2 = melt(self.df1, col_level='CAP') self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) - self.assertEqual(res1.columns.tolist(), ['CAP', 'value']) + self.assertEqual(res2.columns.tolist(), ['CAP', 'value']) def test_multiindex(self): res = pd.melt(self.df1) @@ -154,7 +165,8 @@ class TestGetDummies(tm.TestCase): sparse = False def setUp(self): - self.df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'], + self.df = DataFrame({'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c'], 'C': [1, 2, 3]}) def test_basic(self): @@ -162,14 +174,21 @@ def test_basic(self): s_series = Series(s_list) s_series_index = Series(s_list, list('ABC')) - expected = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0}, - 'b': {0: 0.0, 1: 1.0, 2: 0.0}, - 'c': {0: 0.0, 1: 0.0, 2: 1.0}}) + expected = DataFrame({'a': {0: 1.0, + 1: 0.0, + 2: 0.0}, + 'b': {0: 0.0, + 1: 1.0, + 2: 0.0}, + 'c': {0: 0.0, + 1: 0.0, + 2: 1.0}}) assert_frame_equal(get_dummies(s_list, sparse=self.sparse), expected) assert_frame_equal(get_dummies(s_series, sparse=self.sparse), expected) expected.index = list('ABC') - assert_frame_equal(get_dummies(s_series_index, sparse=self.sparse), expected) + assert_frame_equal( + get_dummies(s_series_index, sparse=self.sparse), expected) def test_basic_types(self): # GH 10531 @@ -180,14 +199,16 @@ def test_basic_types(self): 'c': [2, 3, 3, 3, 2]}) if not self.sparse: - exp_df_type = DataFrame + exp_df_type = DataFrame exp_blk_type = pd.core.internals.FloatBlock else: exp_df_type = SparseDataFrame exp_blk_type = pd.core.internals.SparseBlock - self.assertEqual(type(get_dummies(s_list, sparse=self.sparse)), exp_df_type) - self.assertEqual(type(get_dummies(s_series, sparse=self.sparse)), exp_df_type) + self.assertEqual( + type(get_dummies(s_list, sparse=self.sparse)), exp_df_type) + self.assertEqual( + type(get_dummies(s_series, sparse=self.sparse)), exp_df_type) r = get_dummies(s_df, sparse=self.sparse, columns=s_df.columns) self.assertEqual(type(r), exp_df_type) @@ -197,15 +218,15 @@ def test_basic_types(self): self.assertEqual(type(r[['a_1']]._data.blocks[0]), exp_blk_type) self.assertEqual(type(r[['a_2']]._data.blocks[0]), exp_blk_type) - def test_just_na(self): just_na_list = [np.nan] just_na_series = Series(just_na_list) - just_na_series_index = Series(just_na_list, index = ['A']) + just_na_series_index = Series(just_na_list, index=['A']) res_list = get_dummies(just_na_list, sparse=self.sparse) res_series = get_dummies(just_na_series, sparse=self.sparse) - res_series_index = get_dummies(just_na_series_index, sparse=self.sparse) + res_series_index = get_dummies(just_na_series_index, + sparse=self.sparse) self.assertEqual(res_list.empty, True) self.assertEqual(res_series.empty, True) @@ -218,56 +239,79 @@ def test_just_na(self): def test_include_na(self): s = ['a', 'b', np.nan] res = get_dummies(s, sparse=self.sparse) - exp = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0}, - 'b': {0: 0.0, 1: 1.0, 2: 0.0}}) + exp = DataFrame({'a': {0: 1.0, + 1: 0.0, + 2: 0.0}, + 'b': {0: 0.0, + 1: 1.0, + 2: 0.0}}) assert_frame_equal(res, exp) # Sparse dataframes do not allow nan labelled columns, see #GH8822 res_na = get_dummies(s, dummy_na=True, sparse=self.sparse) - exp_na = DataFrame({nan: {0: 0.0, 1: 0.0, 2: 1.0}, - 'a': {0: 1.0, 1: 0.0, 2: 0.0}, - 'b': {0: 0.0, 1: 1.0, 2: 0.0}}).reindex_axis(['a', 'b', nan], 1) + exp_na = DataFrame({nan: {0: 0.0, + 1: 0.0, + 2: 1.0}, + 'a': {0: 1.0, + 1: 0.0, + 2: 0.0}, + 'b': {0: 0.0, + 1: 1.0, + 2: 0.0}}).reindex_axis( + ['a', 'b', nan], 1) # hack (NaN handling in assert_index_equal) exp_na.columns = res_na.columns assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([nan], dummy_na=True, sparse=self.sparse) - exp_just_na = DataFrame(Series(1.0,index=[0]),columns=[nan]) + exp_just_na = DataFrame(Series(1.0, index=[0]), columns=[nan]) tm.assert_numpy_array_equal(res_just_na.values, exp_just_na.values) - def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values + def test_unicode(self + ): # See GH 6885 - get_dummies chokes on unicode values import unicodedata e = 'e' eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') s = [e, eacute, eacute] res = get_dummies(s, prefix='letter', sparse=self.sparse) - exp = DataFrame({'letter_e': {0: 1.0, 1: 0.0, 2: 0.0}, - u('letter_%s') % eacute: {0: 0.0, 1: 1.0, 2: 1.0}}) + exp = DataFrame({'letter_e': {0: 1.0, + 1: 0.0, + 2: 0.0}, + u('letter_%s') % eacute: {0: 0.0, + 1: 1.0, + 2: 1.0}}) assert_frame_equal(res, exp) def test_dataframe_dummies_all_obj(self): df = self.df[['A', 'B']] result = get_dummies(df, sparse=self.sparse) - expected = DataFrame({'A_a': [1., 0, 1], 'A_b': [0., 1, 0], - 'B_b': [1., 1, 0], 'B_c': [0., 0, 1]}) + expected = DataFrame({'A_a': [1., 0, 1], + 'A_b': [0., 1, 0], + 'B_b': [1., 1, 0], + 'B_c': [0., 0, 1]}) assert_frame_equal(result, expected) def test_dataframe_dummies_mix_default(self): df = self.df result = get_dummies(df, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], 'A_a': [1., 0, 1], - 'A_b': [0., 1, 0], 'B_b': [1., 1, 0], + expected = DataFrame({'C': [1, 2, 3], + 'A_a': [1., 0, 1], + 'A_b': [0., 1, 0], + 'B_b': [1., 1, 0], 'B_c': [0., 0, 1]}) expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c']] assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_list(self): prefixes = ['from_A', 'from_B'] - df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'], + df = DataFrame({'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c'], 'C': [1, 2, 3]}) result = get_dummies(df, prefix=prefixes, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], 'from_A_a': [1., 0, 1], - 'from_A_b': [0., 1, 0], 'from_B_b': [1., 1, 0], + expected = DataFrame({'C': [1, 2, 3], + 'from_A_a': [1., 0, 1], + 'from_A_b': [0., 1, 0], + 'from_B_b': [1., 1, 0], 'from_B_c': [0., 0, 1]}) expected = expected[['C', 'from_A_a', 'from_A_b', 'from_B_b', 'from_B_c']] @@ -285,17 +329,21 @@ def test_dataframe_dummies_prefix_str(self): def test_dataframe_dummies_subset(self): df = self.df - result = get_dummies(df, prefix=['from_A'], - columns=['A'], sparse=self.sparse) - expected = DataFrame({'from_A_a': [1., 0, 1], 'from_A_b': [0., 1, 0], - 'B': ['b', 'b', 'c'], 'C': [1, 2, 3]}) + result = get_dummies(df, prefix=['from_A'], columns=['A'], + sparse=self.sparse) + expected = DataFrame({'from_A_a': [1., 0, 1], + 'from_A_b': [0., 1, 0], + 'B': ['b', 'b', 'c'], + 'C': [1, 2, 3]}) assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_sep(self): df = self.df result = get_dummies(df, prefix_sep='..', sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], 'A..a': [1., 0, 1], - 'A..b': [0., 1, 0], 'B..b': [1., 1, 0], + expected = DataFrame({'C': [1, 2, 3], + 'A..a': [1., 0, 1], + 'A..b': [0., 1, 0], + 'B..b': [1., 1, 0], 'B..c': [0., 0, 1]}) expected = expected[['C', 'A..a', 'A..b', 'B..b', 'B..c']] assert_frame_equal(result, expected) @@ -304,7 +352,8 @@ def test_dataframe_dummies_prefix_sep(self): expected = expected.rename(columns={'B..b': 'B__b', 'B..c': 'B__c'}) assert_frame_equal(result, expected) - result = get_dummies(df, prefix_sep={'A': '..', 'B': '__'}, sparse=self.sparse) + result = get_dummies(df, prefix_sep={'A': '..', + 'B': '__'}, sparse=self.sparse) assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_bad_length(self): @@ -317,11 +366,14 @@ def test_dataframe_dummies_prefix_sep_bad_length(self): def test_dataframe_dummies_prefix_dict(self): prefixes = {'A': 'from_A', 'B': 'from_B'} - df = DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'b', 'c'], + df = DataFrame({'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c'], 'C': [1, 2, 3]}) result = get_dummies(df, prefix=prefixes, sparse=self.sparse) - expected = DataFrame({'from_A_a': [1., 0, 1], 'from_A_b': [0., 1, 0], - 'from_B_b': [1., 1, 0], 'from_B_c': [0., 0, 1], + expected = DataFrame({'from_A_a': [1., 0, 1], + 'from_A_b': [0., 1, 0], + 'from_B_b': [1., 1, 0], + 'from_B_c': [0., 0, 1], 'C': [1, 2, 3]}) assert_frame_equal(result, expected) @@ -329,11 +381,15 @@ def test_dataframe_dummies_with_na(self): df = self.df df.loc[3, :] = [np.nan, np.nan, np.nan] result = get_dummies(df, dummy_na=True, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3, np.nan], 'A_a': [1., 0, 1, 0], - 'A_b': [0., 1, 0, 0], 'A_nan': [0., 0, 0, 1], 'B_b': [1., 1, 0, 0], - 'B_c': [0., 0, 1, 0], 'B_nan': [0., 0, 0, 1]}) - expected = expected[['C', 'A_a', 'A_b', 'A_nan', 'B_b', 'B_c', - 'B_nan']] + expected = DataFrame({'C': [1, 2, 3, np.nan], + 'A_a': [1., 0, 1, 0], + 'A_b': [0., 1, 0, 0], + 'A_nan': [0., 0, 0, 1], + 'B_b': [1., 1, 0, 0], + 'B_c': [0., 0, 1, 0], + 'B_nan': [0., 0, 0, 1]}) + expected = expected[['C', 'A_a', 'A_b', 'A_nan', 'B_b', 'B_c', 'B_nan' + ]] assert_frame_equal(result, expected) result = get_dummies(df, dummy_na=False, sparse=self.sparse) @@ -344,12 +400,15 @@ def test_dataframe_dummies_with_categorical(self): df = self.df df['cat'] = pd.Categorical(['x', 'y', 'y']) result = get_dummies(df, sparse=self.sparse) - expected = DataFrame({'C': [1, 2, 3], 'A_a': [1., 0, 1], - 'A_b': [0., 1, 0], 'B_b': [1., 1, 0], - 'B_c': [0., 0, 1], 'cat_x': [1., 0, 0], + expected = DataFrame({'C': [1, 2, 3], + 'A_a': [1., 0, 1], + 'A_b': [0., 1, 0], + 'B_b': [1., 1, 0], + 'B_c': [0., 0, 1], + 'cat_x': [1., 0, 0], 'cat_y': [0., 1, 1]}) - expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', - 'cat_x', 'cat_y']] + expected = expected[['C', 'A_a', 'A_b', 'B_b', 'B_c', 'cat_x', 'cat_y' + ]] assert_frame_equal(result, expected) @@ -360,14 +419,15 @@ class TestGetDummiesSparse(TestGetDummies): class TestLreshape(tm.TestCase): def test_pairs(self): - data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', - '21dec2008', '11jan2009'], + data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', '21dec2008', + '11jan2009'], 'birthwt': [1766, 3301, 1454, 3139, 4133], 'id': [101, 102, 103, 104, 105], 'sex': ['Male', 'Female', 'Female', 'Female', 'Female'], 'visitdt1': ['11jan2009', '22dec2008', '04jan2009', '29dec2008', '20jan2009'], - 'visitdt2': ['21jan2009', nan, '22jan2009', '31dec2008', '03feb2009'], + 'visitdt2': + ['21jan2009', nan, '22jan2009', '31dec2008', '03feb2009'], 'visitdt3': ['05feb2009', nan, nan, '02jan2009', '15feb2009'], 'wt1': [1823, 3338, 1549, 3298, 4306], 'wt2': [2011.0, nan, 1892.0, 3338.0, 4575.0], @@ -379,49 +439,47 @@ def test_pairs(self): 'wt': ['wt%d' % i for i in range(1, 4)]} result = lreshape(df, spec) - exp_data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', - '21dec2008', '11jan2009', '08jan2009', - '30dec2008', '21dec2008', '11jan2009', - '08jan2009', '21dec2008', '11jan2009'], - 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, - 1454, 3139, 4133, 1766, 3139, 4133], - 'id': [101, 102, 103, 104, 105, 101, - 103, 104, 105, 101, 104, 105], + exp_data = {'birthdt': + ['08jan2009', '20dec2008', '30dec2008', '21dec2008', + '11jan2009', '08jan2009', '30dec2008', '21dec2008', + '11jan2009', '08jan2009', '21dec2008', '11jan2009'], + 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 1454, 3139, + 4133, 1766, 3139, 4133], + 'id': [101, 102, 103, 104, 105, 101, 103, 104, 105, 101, + 104, 105], 'sex': ['Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Female', 'Female', 'Female', 'Male', 'Female', 'Female'], - 'visitdt': ['11jan2009', '22dec2008', '04jan2009', '29dec2008', - '20jan2009', '21jan2009', '22jan2009', '31dec2008', - '03feb2009', '05feb2009', '02jan2009', '15feb2009'], + 'visitdt': ['11jan2009', '22dec2008', '04jan2009', + '29dec2008', '20jan2009', '21jan2009', + '22jan2009', '31dec2008', '03feb2009', + '05feb2009', '02jan2009', '15feb2009'], 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, 1892.0, 3338.0, 4575.0, 2293.0, 3377.0, 4805.0]} exp = DataFrame(exp_data, columns=result.columns) tm.assert_frame_equal(result, exp) result = lreshape(df, spec, dropna=False) - exp_data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008', - '21dec2008', '11jan2009', - '08jan2009', '20dec2008', '30dec2008', - '21dec2008', '11jan2009', - '08jan2009', '20dec2008', '30dec2008', - '21dec2008', '11jan2009'], - 'birthwt': [1766, 3301, 1454, 3139, 4133, - 1766, 3301, 1454, 3139, 4133, - 1766, 3301, 1454, 3139, 4133], - 'id': [101, 102, 103, 104, 105, - 101, 102, 103, 104, 105, + exp_data = {'birthdt': + ['08jan2009', '20dec2008', '30dec2008', '21dec2008', + '11jan2009', '08jan2009', '20dec2008', '30dec2008', + '21dec2008', '11jan2009', '08jan2009', '20dec2008', + '30dec2008', '21dec2008', '11jan2009'], + 'birthwt': [1766, 3301, 1454, 3139, 4133, 1766, 3301, 1454, + 3139, 4133, 1766, 3301, 1454, 3139, 4133], + 'id': [101, 102, 103, 104, 105, 101, 102, 103, 104, 105, 101, 102, 103, 104, 105], 'sex': ['Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Female', 'Female', 'Female', 'Female', 'Male', 'Female', 'Female', 'Female', 'Female'], 'visitdt': ['11jan2009', '22dec2008', '04jan2009', - '29dec2008', '20jan2009', - '21jan2009', nan, '22jan2009', - '31dec2008', '03feb2009', - '05feb2009', nan, nan, '02jan2009', '15feb2009'], - 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, - nan, 1892.0, 3338.0, 4575.0, 2293.0, nan, nan, - 3377.0, 4805.0]} + '29dec2008', '20jan2009', '21jan2009', nan, + '22jan2009', '31dec2008', '03feb2009', + '05feb2009', nan, nan, '02jan2009', + '15feb2009'], + 'wt': [1823.0, 3338.0, 1549.0, 3298.0, 4306.0, 2011.0, nan, + 1892.0, 3338.0, 4575.0, 2293.0, nan, nan, 3377.0, + 4805.0]} exp = DataFrame(exp_data, columns=result.columns) tm.assert_frame_equal(result, exp) @@ -429,22 +487,32 @@ def test_pairs(self): 'wt': ['wt%d' % i for i in range(1, 4)]} self.assertRaises(ValueError, lreshape, df, spec) + class TestWideToLong(tm.TestCase): + def test_simple(self): np.random.seed(123) x = np.random.randn(3) - df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, - "A1980" : {0 : "d", 1 : "e", 2 : "f"}, - "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, - "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, - "X" : dict(zip(range(3), x)) - }) + df = pd.DataFrame({"A1970": {0: "a", + 1: "b", + 2: "c"}, + "A1980": {0: "d", + 1: "e", + 2: "f"}, + "B1970": {0: 2.5, + 1: 1.2, + 2: .7}, + "B1980": {0: 3.2, + 1: 1.3, + 2: .1}, + "X": dict(zip( + range(3), x))}) df["id"] = df.index - exp_data = {"X" : x.tolist() + x.tolist(), - "A" : ['a', 'b', 'c', 'd', 'e', 'f'], - "B" : [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], - "year" : [1970, 1970, 1970, 1980, 1980, 1980], - "id" : [0, 1, 2, 0, 1, 2]} + exp_data = {"X": x.tolist() + x.tolist(), + "A": ['a', 'b', 'c', 'd', 'e', 'f'], + "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1], + "year": [1970, 1970, 1970, 1980, 1980, 1980], + "id": [0, 1, 2, 0, 1, 2]} exp_frame = DataFrame(exp_data) exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]] long_frame = wide_to_long(df, ["A", "B"], i="id", j="year") @@ -452,12 +520,15 @@ def test_simple(self): def test_stubs(self): # GH9204 - df = pd.DataFrame([[0,1,2,3,8],[4,5,6,7,9]]) + df = pd.DataFrame([[0, 1, 2, 3, 8], [4, 5, 6, 7, 9]]) df.columns = ['id', 'inc1', 'inc2', 'edu1', 'edu2'] stubs = ['inc', 'edu'] - df_long = pd.wide_to_long(df, stubs, i='id', j='age') - self.assertEqual(stubs,['inc', 'edu']) + # TODO: unused? + df_long = pd.wide_to_long(df, stubs, i='id', j='age') # noqa + + self.assertEqual(stubs, ['inc', 'edu']) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 4342417db193b..6be6c53cbb201 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -3,11 +3,11 @@ import pandas.util.testing as tm from pandas import read_csv import os -import nose with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): import pandas.tools.rplot as rplot + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth @@ -37,6 +37,7 @@ class TestUtilityFunctions(tm.TestCase): """ Tests for RPlot utility functions. """ + def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') self.data = read_csv(path, sep=',') @@ -62,8 +63,8 @@ def test_make_aes2(self): alpha=rplot.ScaleShape('test')) def test_dictionary_union(self): - dict1 = {1 : 1, 2 : 2, 3 : 3} - dict2 = {1 : 1, 2 : 2, 4 : 4} + dict1 = {1: 1, 2: 2, 3: 3} + dict2 = {1: 1, 2: 2, 4: 4} union = rplot.dictionary_union(dict1, dict2) self.assertEqual(len(union), 4) keys = list(union.keys()) @@ -103,6 +104,7 @@ def test_sequence_layers(self): @tm.mplskip class TestTrellis(tm.TestCase): + def setUp(self): path = os.path.join(curpath(), 'data/tips.csv') self.data = read_csv(path, sep=',') @@ -151,6 +153,7 @@ def test_trellis_cols_rows(self): @tm.mplskip class TestScaleGradient(tm.TestCase): + def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') self.data = read_csv(path, sep=',') @@ -160,7 +163,7 @@ def setUp(self): def test_gradient(self): for index in range(len(self.data)): - row = self.data.iloc[index] + # row = self.data.iloc[index] r, g, b = self.gradient(self.data, index) r1, g1, b1 = self.gradient.colour1 r2, g2, b2 = self.gradient.colour2 @@ -171,10 +174,12 @@ def test_gradient(self): @tm.mplskip class TestScaleGradient2(tm.TestCase): + def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') self.data = read_csv(path, sep=',') - self.gradient = rplot.ScaleGradient2("SepalLength", colour1=(0.2, 0.3, 0.4), colour2=(0.8, 0.7, 0.6), colour3=(0.5, 0.5, 0.5)) + self.gradient = rplot.ScaleGradient2("SepalLength", colour1=( + 0.2, 0.3, 0.4), colour2=(0.8, 0.7, 0.6), colour3=(0.5, 0.5, 0.5)) def test_gradient2(self): for index in range(len(self.data)): @@ -199,6 +204,7 @@ def test_gradient2(self): @tm.mplskip class TestScaleRandomColour(tm.TestCase): + def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') self.data = read_csv(path, sep=',') @@ -219,6 +225,7 @@ def test_random_colour(self): @tm.mplskip class TestScaleConstant(tm.TestCase): + def test_scale_constant(self): scale = rplot.ScaleConstant(1.0) self.assertEqual(scale(None, None), 1.0) @@ -227,6 +234,7 @@ def test_scale_constant(self): class TestScaleSize(tm.TestCase): + def setUp(self): path = os.path.join(curpath(), 'data/iris.csv') self.data = read_csv(path, sep=',') @@ -236,7 +244,8 @@ def setUp(self): def test_scale_size(self): for index in range(len(self.data)): marker = self.scale1(self.data, index) - self.assertTrue(marker in ['o', '+', 's', '*', '^', '<', '>', 'v', '|', 'x']) + self.assertTrue( + marker in ['o', '+', 's', '*', '^', '<', '>', 'v', '|', 'x']) def test_scale_overflow(self): def f(): @@ -248,6 +257,7 @@ def f(): @tm.mplskip class TestRPlot(tm.TestCase): + def test_rplot1(self): import matplotlib.pyplot as plt path = os.path.join(curpath(), 'data/tips.csv') @@ -255,7 +265,8 @@ def test_rplot1(self): self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') self.plot.add(rplot.TrellisGrid(['sex', 'smoker'])) - self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'))) + self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour( + 'day'), shape=rplot.ScaleShape('size'))) self.fig = plt.gcf() self.plot.render(self.fig) @@ -266,7 +277,8 @@ def test_rplot2(self): self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') self.plot.add(rplot.TrellisGrid(['.', 'smoker'])) - self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'))) + self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour( + 'day'), shape=rplot.ScaleShape('size'))) self.fig = plt.gcf() self.plot.render(self.fig) @@ -277,7 +289,8 @@ def test_rplot3(self): self.data = read_csv(path, sep=',') self.plot = rplot.RPlot(self.data, x='tip', y='total_bill') self.plot.add(rplot.TrellisGrid(['sex', '.'])) - self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour('day'), shape=rplot.ScaleShape('size'))) + self.plot.add(rplot.GeomPoint(colour=rplot.ScaleRandomColour( + 'day'), shape=rplot.ScaleShape('size'))) self.fig = plt.gcf() self.plot.render(self.fig) @@ -287,8 +300,12 @@ def test_rplot_iris(self): plt.figure() self.data = read_csv(path, sep=',') plot = rplot.RPlot(self.data, x='SepalLength', y='SepalWidth') - plot.add(rplot.GeomPoint(colour=rplot.ScaleGradient('PetalLength', colour1=(0.0, 1.0, 0.5), colour2=(1.0, 0.0, 0.5)), - size=rplot.ScaleSize('PetalWidth', min_size=10.0, max_size=200.0), + plot.add(rplot.GeomPoint( + colour=rplot.ScaleGradient('PetalLength', + colour1=(0.0, 1.0, 0.5), + colour2=(1.0, 0.0, 0.5)), + size=rplot.ScaleSize('PetalWidth', min_size=10.0, + max_size=200.0), shape=rplot.ScaleShape('Name'))) self.fig = plt.gcf() plot.render(self.fig) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index a2b1a84e78f22..4045825578aff 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1,7 +1,6 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import re import sys from datetime import datetime, timedelta import operator @@ -9,7 +8,6 @@ from inspect import getargspec from itertools import product, starmap from distutils.version import LooseVersion -import warnings import random import nose @@ -36,15 +34,12 @@ from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict, long from pandas import compat -from pandas.util.testing import (assert_series_equal, - assert_almost_equal, - assert_frame_equal, - assert_index_equal, +from pandas.util.testing import (assert_series_equal, assert_almost_equal, + assert_frame_equal, assert_index_equal, ensure_clean) import pandas.util.testing as tm - -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # Series test cases JOIN_TYPES = ['inner', 'outer', 'left', 'right'] @@ -108,20 +103,21 @@ def get_expected(s, name): result = result.astype('int64') elif not com.is_list_like(result): return result - return Series(result,index=s.index) + return Series(result, index=s.index) def compare(s, name): - a = getattr(s.dt,prop) - b = get_expected(s,prop) + a = getattr(s.dt, prop) + b = get_expected(s, prop) if not (com.is_list_like(a) and com.is_list_like(b)): - self.assertEqual(a,b) + self.assertEqual(a, b) else: - tm.assert_series_equal(a,b) + tm.assert_series_equal(a, b) # datetimeindex - for s in [Series(date_range('20130101',periods=5)), - Series(date_range('20130101',periods=5,freq='s')), - Series(date_range('20130101 00:00:00',periods=5,freq='ms'))]: + for s in [Series(date_range('20130101', periods=5)), + Series(date_range('20130101', periods=5, freq='s')), + Series(date_range('20130101 00:00:00', periods=5, freq='ms')) + ]: for prop in ok_for_dt: # we test freq below if prop != 'freq': @@ -131,64 +127,65 @@ def compare(s, name): getattr(s.dt, prop) result = s.dt.to_pydatetime() - self.assertIsInstance(result,np.ndarray) + self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_localize('US/Eastern') - expected = Series(DatetimeIndex(s.values).tz_localize('US/Eastern'),index=s.index) + expected = Series( + DatetimeIndex(s.values).tz_localize('US/Eastern'), + index=s.index) tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'US/Eastern') freq_result = s.dt.freq - self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) + self.assertEqual(freq_result, DatetimeIndex(s.values, + freq='infer').freq) # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') - expected = Series(DatetimeIndex(s.values).tz_localize('UTC').tz_convert('US/Eastern'),index=s.index) + expected = Series( + DatetimeIndex(s.values).tz_localize('UTC').tz_convert( + 'US/Eastern'), index=s.index) tm.assert_series_equal(result, expected) # round - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00'])) + s = Series(pd.to_datetime( + ['2012-01-01 13:00:00', '2012-01-01 12:01:00', + '2012-01-01 08:00:00'])) result = s.dt.round('D') - expected = Series(pd.to_datetime(['2012-01-02', - '2012-01-02', + expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', '2012-01-01'])) tm.assert_series_equal(result, expected) # round with tz - result = s.dt.tz_localize('UTC').dt.tz_convert( - 'US/Eastern').dt.round('D') - expected = Series(pd.to_datetime(['2012-01-01', - '2012-01-01', - '2012-01-01'] - ).tz_localize('US/Eastern')) + result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round( + 'D') + expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', + '2012-01-01']).tz_localize( + 'US/Eastern')) tm.assert_series_equal(result, expected) # floor - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00'])) + s = Series(pd.to_datetime( + ['2012-01-01 13:00:00', '2012-01-01 12:01:00', + '2012-01-01 08:00:00'])) result = s.dt.floor('D') - expected = Series(pd.to_datetime(['2012-01-01', - '2012-01-01', + expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01'])) tm.assert_series_equal(result, expected) # ceil - s = Series(pd.to_datetime(['2012-01-01 13:00:00', - '2012-01-01 12:01:00', - '2012-01-01 08:00:00'])) + s = Series(pd.to_datetime( + ['2012-01-01 13:00:00', '2012-01-01 12:01:00', + '2012-01-01 08:00:00'])) result = s.dt.ceil('D') - expected = Series(pd.to_datetime(['2012-01-02', - '2012-01-02', + expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02', '2012-01-02'])) tm.assert_series_equal(result, expected) # datetimeindex with tz - s = Series(date_range('20130101',periods=5,tz='US/Eastern')) + s = Series(date_range('20130101', periods=5, tz='US/Eastern')) for prop in ok_for_dt: # we test freq below @@ -196,25 +193,28 @@ def compare(s, name): compare(s, prop) for prop in ok_for_dt_methods: - getattr(s.dt,prop) + getattr(s.dt, prop) result = s.dt.to_pydatetime() - self.assertIsInstance(result,np.ndarray) + self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_convert('CET') - expected = Series(s._values.tz_convert('CET'),index=s.index) + expected = Series(s._values.tz_convert('CET'), index=s.index) tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'CET') freq_result = s.dt.freq - self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) + self.assertEqual(freq_result, DatetimeIndex(s.values, + freq='infer').freq) # timedeltaindex - for s in [Series(timedelta_range('1 day',periods=5),index=list('abcde')), - Series(timedelta_range('1 day 01:23:45',periods=5,freq='s')), - Series(timedelta_range('2 days 01:23:45.012345',periods=5,freq='ms'))]: + for s in [Series( + timedelta_range('1 day', periods=5), index=list('abcde')), + Series(timedelta_range('1 day 01:23:45', periods=5, freq='s')), + Series(timedelta_range('2 days 01:23:45.012345', periods=5, + freq='ms'))]: for prop in ok_for_td: # we test freq below if prop != 'freq': @@ -224,30 +224,38 @@ def compare(s, name): getattr(s.dt, prop) result = s.dt.components - self.assertIsInstance(result,DataFrame) - tm.assert_index_equal(result.index,s.index) + self.assertIsInstance(result, DataFrame) + tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() - self.assertIsInstance(result,np.ndarray) + self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.total_seconds() - self.assertIsInstance(result,pd.Series) + self.assertIsInstance(result, pd.Series) self.assertTrue(result.dtype == 'float64') freq_result = s.dt.freq - self.assertEqual(freq_result, TimedeltaIndex(s.values, freq='infer').freq) + self.assertEqual(freq_result, TimedeltaIndex(s.values, + freq='infer').freq) # both - index = date_range('20130101',periods=3,freq='D') - s = Series(date_range('20140204',periods=3,freq='s'),index=index) - tm.assert_series_equal(s.dt.year,Series(np.array([2014,2014,2014],dtype='int64'),index=index)) - tm.assert_series_equal(s.dt.month,Series(np.array([2,2,2],dtype='int64'),index=index)) - tm.assert_series_equal(s.dt.second,Series(np.array([0,1,2],dtype='int64'),index=index)) - tm.assert_series_equal(s.dt.normalize(), pd.Series([s[0]] * 3, index=index)) + index = date_range('20130101', periods=3, freq='D') + s = Series(date_range('20140204', periods=3, freq='s'), index=index) + tm.assert_series_equal(s.dt.year, Series( + np.array( + [2014, 2014, 2014], dtype='int64'), index=index)) + tm.assert_series_equal(s.dt.month, Series( + np.array( + [2, 2, 2], dtype='int64'), index=index)) + tm.assert_series_equal(s.dt.second, Series( + np.array( + [0, 1, 2], dtype='int64'), index=index)) + tm.assert_series_equal(s.dt.normalize(), pd.Series( + [s[0]] * 3, index=index)) # periodindex - for s in [Series(period_range('20130101',periods=5,freq='D'))]: + for s in [Series(period_range('20130101', periods=5, freq='D'))]: for prop in ok_for_period: # we test freq below if prop != 'freq': @@ -261,87 +269,103 @@ def compare(s, name): # test limited display api def get_dir(s): - results = [ r for r in s.dt.__dir__() if not r.startswith('_') ] + results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) - s = Series(date_range('20130101',periods=5,freq='D')) + s = Series(date_range('20130101', periods=5, freq='D')) results = get_dir(s) - tm.assert_almost_equal(results,list(sorted(set(ok_for_dt + ok_for_dt_methods)))) + tm.assert_almost_equal( + results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) - s = Series(period_range('20130101',periods=5,freq='D').asobject) + s = Series(period_range('20130101', periods=5, freq='D').asobject) results = get_dir(s) - tm.assert_almost_equal(results, list(sorted(set(ok_for_period + ok_for_period_methods)))) + tm.assert_almost_equal( + results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T')) s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) - tm.assert_almost_equal(results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) - expected = Series(pd.date_range('2015-01-01', - '2016-01-01', - freq='T', - tz='UTC').tz_convert('America/Chicago')) + tm.assert_almost_equal( + results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) + expected = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T', + tz='UTC').tz_convert( + 'America/Chicago')) tm.assert_series_equal(s, expected) # no setting allowed - s = Series(date_range('20130101',periods=5,freq='D')) + s = Series(date_range('20130101', periods=5, freq='D')) with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy - with pd.option_context('chained_assignment','raise'): + with pd.option_context('chained_assignment', 'raise'): + def f(): s.dt.hour[0] = 5 + self.assertRaises(com.SettingWithCopyError, f) def test_dt_accessor_no_new_attributes(self): # https://github.com/pydata/pandas/issues/10673 - s = Series(date_range('20130101',periods=5,freq='D')) - with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): + s = Series(date_range('20130101', periods=5, freq='D')) + with tm.assertRaisesRegexp(AttributeError, + "You cannot add any new attribute"): s.dt.xlabel = "a" def test_strftime(self): # GH 10086 s = Series(date_range('20130101', periods=5)) result = s.dt.strftime('%Y/%m/%d') - expected = Series(['2013/01/01', '2013/01/02', '2013/01/03', '2013/01/04', '2013/01/05']) + expected = Series(['2013/01/01', '2013/01/02', '2013/01/03', + '2013/01/04', '2013/01/05']) tm.assert_series_equal(result, expected) s = Series(date_range('2015-02-03 11:22:33.4567', periods=5)) result = s.dt.strftime('%Y/%m/%d %H-%M-%S') - expected = Series(['2015/02/03 11-22-33', '2015/02/04 11-22-33', '2015/02/05 11-22-33', - '2015/02/06 11-22-33', '2015/02/07 11-22-33']) + expected = Series(['2015/02/03 11-22-33', '2015/02/04 11-22-33', + '2015/02/05 11-22-33', '2015/02/06 11-22-33', + '2015/02/07 11-22-33']) tm.assert_series_equal(result, expected) s = Series(period_range('20130101', periods=5)) result = s.dt.strftime('%Y/%m/%d') - expected = Series(['2013/01/01', '2013/01/02', '2013/01/03', '2013/01/04', '2013/01/05']) + expected = Series(['2013/01/01', '2013/01/02', '2013/01/03', + '2013/01/04', '2013/01/05']) tm.assert_series_equal(result, expected) - s = Series(period_range('2015-02-03 11:22:33.4567', periods=5, freq='s')) + s = Series(period_range( + '2015-02-03 11:22:33.4567', periods=5, freq='s')) result = s.dt.strftime('%Y/%m/%d %H-%M-%S') - expected = Series(['2015/02/03 11-22-33', '2015/02/03 11-22-34', '2015/02/03 11-22-35', - '2015/02/03 11-22-36', '2015/02/03 11-22-37']) + expected = Series(['2015/02/03 11-22-33', '2015/02/03 11-22-34', + '2015/02/03 11-22-35', '2015/02/03 11-22-36', + '2015/02/03 11-22-37']) tm.assert_series_equal(result, expected) s = Series(date_range('20130101', periods=5)) s.iloc[0] = pd.NaT result = s.dt.strftime('%Y/%m/%d') - expected = Series(['NaT', '2013/01/02', '2013/01/03', '2013/01/04', '2013/01/05']) + expected = Series(['NaT', '2013/01/02', '2013/01/03', '2013/01/04', + '2013/01/05']) tm.assert_series_equal(result, expected) datetime_index = date_range('20150301', periods=5) result = datetime_index.strftime("%Y/%m/%d") - expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', '2015/03/05'], dtype=object) + expected = np.array( + ['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', + '2015/03/05'], dtype=object) self.assert_numpy_array_equal(result, expected) period_index = period_range('20150301', periods=5) result = period_index.strftime("%Y/%m/%d") - expected = np.array(['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', '2015/03/05'], dtype=object) + expected = np.array( + ['2015/03/01', '2015/03/02', '2015/03/03', '2015/03/04', + '2015/03/05'], dtype=object) self.assert_numpy_array_equal(result, expected) - s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) + s = Series([datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, + 32, 1)]) result = s.dt.strftime('%Y-%m-%d %H:%M:%S') expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"]) tm.assert_series_equal(result, expected) @@ -353,8 +377,9 @@ def test_strftime(self): s = Series(period_range('20130101', periods=4, freq='L')) result = s.dt.strftime('%Y/%m/%d %H:%M:%S.%l') - expected = Series(["2013/01/01 00:00:00.000", "2013/01/01 00:00:00.001", - "2013/01/01 00:00:00.002", "2013/01/01 00:00:00.003"]) + expected = Series( + ["2013/01/01 00:00:00.000", "2013/01/01 00:00:00.001", + "2013/01/01 00:00:00.002", "2013/01/01 00:00:00.003"]) tm.assert_series_equal(result, expected) def test_valid_dt_with_missing_values(self): @@ -362,21 +387,25 @@ def test_valid_dt_with_missing_values(self): from datetime import date, time # GH 8689 - s = Series(date_range('20130101',periods=5,freq='D')) + s = Series(date_range('20130101', periods=5, freq='D')) s.iloc[2] = pd.NaT - for attr in ['microsecond','nanosecond','second','minute','hour','day']: - expected = getattr(s.dt,attr).copy() + for attr in ['microsecond', 'nanosecond', 'second', 'minute', 'hour', + 'day']: + expected = getattr(s.dt, attr).copy() expected.iloc[2] = np.nan - result = getattr(s.dt,attr) + result = getattr(s.dt, attr) tm.assert_series_equal(result, expected) result = s.dt.date - expected = Series([date(2013,1,1),date(2013,1,2),np.nan,date(2013,1,4),date(2013,1,5)],dtype='object') + expected = Series( + [date(2013, 1, 1), date(2013, 1, 2), np.nan, date(2013, 1, 4), + date(2013, 1, 5)], dtype='object') tm.assert_series_equal(result, expected) result = s.dt.time - expected = Series([time(0),time(0),np.nan,time(0),time(0)],dtype='object') + expected = Series( + [time(0), time(0), np.nan, time(0), time(0)], dtype='object') tm.assert_series_equal(result, expected) def test_dt_accessor_api(self): @@ -388,8 +417,7 @@ def test_dt_accessor_api(self): s = Series(date_range('2000-01-01', periods=3)) self.assertIsInstance(s.dt, DatetimeProperties) - for s in [Series(np.arange(5)), - Series(list('abcde')), + for s in [Series(np.arange(5)), Series(list('abcde')), Series(np.random.randn(5))]: with tm.assertRaisesRegexp(AttributeError, "only use .dt accessor"): @@ -410,19 +438,18 @@ def test_tab_completion(self): self.assertTrue('str' not in dir(s)) self.assertTrue('cat' not in dir(s)) - # similiarly for .cat, but with the twist that str and dt should be there - # if the categories are of that type - # first cat and str + # similiarly for .cat, but with the twist that str and dt should be + # there if the categories are of that type first cat and str s = Series(list('abbcd'), dtype="category") self.assertTrue('cat' in dir(s)) - self.assertTrue('str' in dir(s)) # as it is a string categorical + self.assertTrue('str' in dir(s)) # as it is a string categorical self.assertTrue('dt' not in dir(s)) # similar to cat and str s = Series(date_range('1/1/2015', periods=5)).astype("category") self.assertTrue('cat' in dir(s)) self.assertTrue('str' not in dir(s)) - self.assertTrue('dt' in dir(s)) # as it is a datetime categorical + self.assertTrue('dt' in dir(s)) # as it is a datetime categorical def test_binop_maybe_preserve_name(self): # names match, preserve @@ -477,38 +504,39 @@ def test_combine_first_dt64(self): def test_get(self): # GH 6383 - s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, - 45, 51, 39, 55, 43, 54, 52, 51, 54])) + s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, + 51, 39, 55, 43, 54, 52, 51, 54])) result = s.get(25, 0) expected = 0 - self.assertEqual(result,expected) + self.assertEqual(result, expected) s = Series(np.array([43, 48, 60, 48, 50, 51, 50, 45, 57, 48, 56, 45, 51, 39, 55, 43, 54, 52, 51, 54]), - index=pd.Float64Index([25.0, 36.0, 49.0, 64.0, 81.0, 100.0, - 121.0, 144.0, 169.0, 196.0, 1225.0, - 1296.0, 1369.0, 1444.0, 1521.0, 1600.0, - 1681.0, 1764.0, 1849.0, 1936.0], - dtype='object')) + index=pd.Float64Index( + [25.0, 36.0, 49.0, 64.0, 81.0, 100.0, + 121.0, 144.0, 169.0, 196.0, 1225.0, + 1296.0, 1369.0, 1444.0, 1521.0, 1600.0, + 1681.0, 1764.0, 1849.0, 1936.0], + dtype='object')) result = s.get(25, 0) expected = 43 - self.assertEqual(result,expected) + self.assertEqual(result, expected) # GH 7407 # with a boolean accessor - df = pd.DataFrame({'i':[0]*3, 'b':[False]*3}) + df = pd.DataFrame({'i': [0] * 3, 'b': [False] * 3}) vc = df.i.value_counts() - result = vc.get(99,default='Missing') - self.assertEqual(result,'Missing') + result = vc.get(99, default='Missing') + self.assertEqual(result, 'Missing') vc = df.b.value_counts() - result = vc.get(False,default='Missing') - self.assertEqual(result,3) + result = vc.get(False, default='Missing') + self.assertEqual(result, 3) - result = vc.get(True,default='Missing') - self.assertEqual(result,'Missing') + result = vc.get(True, default='Missing') + self.assertEqual(result, 'Missing') def test_delitem(self): @@ -517,36 +545,42 @@ def test_delitem(self): s = Series(lrange(5)) del s[0] - expected = Series(lrange(1,5),index=lrange(1,5)) + expected = Series(lrange(1, 5), index=lrange(1, 5)) assert_series_equal(s, expected) del s[1] - expected = Series(lrange(2,5),index=lrange(2,5)) + expected = Series(lrange(2, 5), index=lrange(2, 5)) assert_series_equal(s, expected) # empty s = Series() + def f(): del s[0] + self.assertRaises(KeyError, f) # only 1 left, del, add, del s = Series(1) del s[0] - assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='int64'))) + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='int64'))) s[0] = 1 assert_series_equal(s, Series(1)) del s[0] - assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='int64'))) + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='int64'))) # Index(dtype=object) s = Series(1, index=['a']) del s['a'] - assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='object'))) + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='object'))) s['a'] = 1 assert_series_equal(s, Series(1, index=['a'])) del s['a'] - assert_series_equal(s, Series(dtype='int64', index=Index([], dtype='object'))) + assert_series_equal(s, Series(dtype='int64', index=Index( + [], dtype='object'))) def test_getitem_preserve_name(self): result = self.ts[self.ts > 0] @@ -576,30 +610,24 @@ def test_getitem_negative_out_of_bounds(self): self.assertRaises(IndexError, s.__setitem__, -11, 'foo') def test_multilevel_name_print(self): - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) s = Series(lrange(0, len(index)), index=index, name='sth') - expected = ["first second", - "foo one 0", - " two 1", - " three 2", - "bar one 3", - " two 4", - "baz two 5", - " three 6", - "qux one 7", - " two 8", - " three 9", - "Name: sth, dtype: int64"] + expected = ["first second", "foo one 0", + " two 1", " three 2", + "bar one 3", " two 4", + "baz two 5", " three 6", + "qux one 7", " two 8", + " three 9", "Name: sth, dtype: int64"] expected = "\n".join(expected) self.assertEqual(repr(s), expected) def test_multilevel_preserve_name(self): - index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], - ['one', 'two', 'three']], + index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', + 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) @@ -695,25 +723,25 @@ def test_nansum_buglet(self): def test_overflow(self): # GH 6915 # overflowing on the smaller int dtypes - for dtype in ['int32','int64']: - v = np.arange(5000000,dtype=dtype) + for dtype in ['int32', 'int64']: + v = np.arange(5000000, dtype=dtype) s = Series(v) # no bottleneck result = s.sum(skipna=False) - self.assertEqual(int(result),v.sum(dtype='int64')) + self.assertEqual(int(result), v.sum(dtype='int64')) result = s.min(skipna=False) - self.assertEqual(int(result),0) + self.assertEqual(int(result), 0) result = s.max(skipna=False) - self.assertEqual(int(result),v[-1]) + self.assertEqual(int(result), v[-1]) # use bottleneck if available result = s.sum() - self.assertEqual(int(result),v.sum(dtype='int64')) + self.assertEqual(int(result), v.sum(dtype='int64')) result = s.min() - self.assertEqual(int(result),0) + self.assertEqual(int(result), 0) result = s.max() - self.assertEqual(int(result),v[-1]) + self.assertEqual(int(result), v[-1]) for dtype in ['float32', 'float64']: v = np.arange(5000000, dtype=dtype) @@ -735,18 +763,19 @@ def test_overflow(self): result = s.max() self.assertTrue(np.allclose(float(result), v[-1])) + class SafeForSparse(object): pass + _ts = tm.makeTimeSeries() + class TestSeries(tm.TestCase, CheckNameIntegration): _multiprocess_can_split_ = True def setUp(self): - import warnings - self.ts = _ts.copy() self.ts.name = 'ts' @@ -769,11 +798,10 @@ def test_scalar_conversion(self): self.assertEqual(int(Series([1.])), 1) self.assertEqual(long(Series([1.])), 1) - def test_astype(self): - s = Series(np.random.randn(5),name='foo') + s = Series(np.random.randn(5), name='foo') - for dtype in ['float32','float64','int64','int32']: + for dtype in ['float32', 'float64', 'int64', 'int32']: astyped = s.astype(dtype) self.assertEqual(astyped.dtype, dtype) self.assertEqual(astyped.name, s.name) @@ -782,7 +810,7 @@ def test_TimeSeries_deprecation(self): # deprecation TimeSeries, #10890 with tm.assert_produces_warning(FutureWarning): - pd.TimeSeries(1,index=date_range('20130101',periods=3)) + pd.TimeSeries(1, index=date_range('20130101', periods=3)) def test_constructor(self): # Recognize TimeSeries @@ -845,8 +873,8 @@ def test_constructor_series(self): def test_constructor_iterator(self): - expected = Series(list(range(10)),dtype='int64') - result = Series(range(10),dtype='int64') + expected = Series(list(range(10)), dtype='int64') + result = Series(range(10), dtype='int64') assert_series_equal(result, expected) def test_constructor_generator(self): @@ -875,12 +903,13 @@ def test_constructor_map(self): assert_series_equal(result, exp) def test_constructor_categorical(self): - cat = pd.Categorical([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'], fastpath=True) + cat = pd.Categorical([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'], + fastpath=True) res = Series(cat) self.assertTrue(res.values.equals(cat)) def test_constructor_maskedarray(self): - data = ma.masked_all((3,), dtype=float) + data = ma.masked_all((3, ), dtype=float) result = Series(data) expected = Series([nan, nan, nan]) assert_series_equal(result, expected) @@ -897,7 +926,7 @@ def test_constructor_maskedarray(self): expected = Series([0.0, 1.0, 2.0], index=index) assert_series_equal(result, expected) - data = ma.masked_all((3,), dtype=int) + data = ma.masked_all((3, ), dtype=int) result = Series(data) expected = Series([nan, nan, nan], dtype=float) assert_series_equal(result, expected) @@ -914,7 +943,7 @@ def test_constructor_maskedarray(self): expected = Series([0, 1, 2], index=index, dtype=int) assert_series_equal(result, expected) - data = ma.masked_all((3,), dtype=bool) + data = ma.masked_all((3, ), dtype=bool) result = Series(data) expected = Series([nan, nan, nan], dtype=object) assert_series_equal(result, expected) @@ -932,7 +961,7 @@ def test_constructor_maskedarray(self): assert_series_equal(result, expected) from pandas import tslib - data = ma.masked_all((3,), dtype='M8[ns]') + data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) expected = Series([tslib.iNaT, tslib.iNaT, tslib.iNaT], dtype='M8[ns]') assert_series_equal(result, expected) @@ -979,7 +1008,7 @@ def test_constructor_pass_none(self): # inference on the index s = Series(index=np.array([None])) expected = Series(index=Index([None])) - assert_series_equal(s,expected) + assert_series_equal(s, expected) def test_constructor_cast(self): self.assertRaises(ValueError, Series, ['a', 'b', 'c'], dtype=float) @@ -996,19 +1025,23 @@ def test_constructor_dtype_nocast(self): def test_constructor_datelike_coercion(self): # GH 9477 - # incorrectly infering on dateimelike looking when object dtype is specified - s = Series([Timestamp('20130101'),'NOV'],dtype=object) - self.assertEqual(s.iloc[0],Timestamp('20130101')) - self.assertEqual(s.iloc[1],'NOV') + # incorrectly infering on dateimelike looking when object dtype is + # specified + s = Series([Timestamp('20130101'), 'NOV'], dtype=object) + self.assertEqual(s.iloc[0], Timestamp('20130101')) + self.assertEqual(s.iloc[1], 'NOV') self.assertTrue(s.dtype == object) - # the dtype was being reset on the slicing and re-inferred to datetime even - # thought the blocks are mixed + # the dtype was being reset on the slicing and re-inferred to datetime + # even thought the blocks are mixed belly = '216 3T19'.split() wing1 = '2T15 4H19'.split() wing2 = '416 4T20'.split() mat = pd.to_datetime('2016-01-22 2019-09-07'.split()) - df = pd.DataFrame({'wing1':wing1, 'wing2':wing2, 'mat':mat}, index=belly) + df = pd.DataFrame( + {'wing1': wing1, + 'wing2': wing2, + 'mat': mat}, index=belly) result = df.loc['3T19'] self.assertTrue(result.dtype == object) @@ -1042,7 +1075,7 @@ def test_constructor_dtype_datetime64(self): np.datetime64(datetime(2013, 1, 1)), np.datetime64(datetime(2013, 1, 2)), np.datetime64(datetime(2013, 1, 3)), - ] + ] s = Series(dates) self.assertEqual(s.dtype, 'M8[ns]') @@ -1057,18 +1090,18 @@ def test_constructor_dtype_datetime64(self): # GH3414 related self.assertRaises(TypeError, lambda x: Series( Series(dates).astype('int') / 1000000, dtype='M8[ms]')) - self.assertRaises( - TypeError, lambda x: Series(dates, dtype='datetime64')) + self.assertRaises(TypeError, + lambda x: Series(dates, dtype='datetime64')) # invalid dates can be help as object - result = Series([datetime(2,1,1)]) - self.assertEqual(result[0], datetime(2,1,1,0,0)) + result = Series([datetime(2, 1, 1)]) + self.assertEqual(result[0], datetime(2, 1, 1, 0, 0)) - result = Series([datetime(3000,1,1)]) - self.assertEqual(result[0], datetime(3000,1,1,0,0)) + result = Series([datetime(3000, 1, 1)]) + self.assertEqual(result[0], datetime(3000, 1, 1, 0, 0)) # don't mix types - result = Series([ Timestamp('20130101'), 1],index=['a','b']) + result = Series([Timestamp('20130101'), 1], index=['a', 'b']) self.assertEqual(result['a'], Timestamp('20130101')) self.assertEqual(result['b'], 1) @@ -1081,32 +1114,32 @@ def test_constructor_dtype_datetime64(self): for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, dates) - assert_series_equal(result,expected) + assert_series_equal(result, expected) # leave datetime.date alone dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) series1 = Series(dates2, dates) - self.assert_numpy_array_equal(series1.values,dates2) - self.assertEqual(series1.dtype,object) + self.assert_numpy_array_equal(series1.values, dates2) + self.assertEqual(series1.dtype, object) # these will correctly infer a datetime s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype,'datetime64[ns]') + self.assertEqual(s.dtype, 'datetime64[ns]') s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype,'datetime64[ns]') + self.assertEqual(s.dtype, 'datetime64[ns]') s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype,'datetime64[ns]') + self.assertEqual(s.dtype, 'datetime64[ns]') s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001']) - self.assertEqual(s.dtype,'datetime64[ns]') + self.assertEqual(s.dtype, 'datetime64[ns]') # tz-aware (UTC and other tz's) # GH 8411 - dr = date_range('20130101',periods=3) + dr = date_range('20130101', periods=3) self.assertTrue(Series(dr).iloc[0].tz is None) - dr = date_range('20130101',periods=3,tz='UTC') + dr = date_range('20130101', periods=3, tz='UTC') self.assertTrue(str(Series(dr).iloc[0].tz) == 'UTC') - dr = date_range('20130101',periods=3,tz='US/Eastern') + dr = date_range('20130101', periods=3, tz='US/Eastern') self.assertTrue(str(Series(dr).iloc[0].tz) == 'US/Eastern') # non-convertible @@ -1132,7 +1165,7 @@ def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz - dr = date_range('20130101',periods=3,tz='US/Eastern') + dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) self.assertTrue(s.dtype.name == 'datetime64[ns, US/Eastern]') self.assertTrue(s.dtype == 'datetime64[ns, US/Eastern]') @@ -1143,23 +1176,26 @@ def test_constructor_with_datetime_tz(self): result = s.values self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == 'datetime64[ns]') - self.assertTrue(dr.equals(pd.DatetimeIndex(result).tz_localize('UTC').tz_convert(tz=s.dt.tz))) + self.assertTrue(dr.equals(pd.DatetimeIndex(result).tz_localize( + 'UTC').tz_convert(tz=s.dt.tz))) # indexing result = s.iloc[0] - self.assertEqual(result,Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) + self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern', offset='D')) result = s[0] - self.assertEqual(result,Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', offset='D')) + self.assertEqual(result, Timestamp('2013-01-01 00:00:00-0500', + tz='US/Eastern', offset='D')) - result = s[Series([True,True,False],index=s.index)] - assert_series_equal(result,s[0:2]) + result = s[Series([True, True, False], index=s.index)] + assert_series_equal(result, s[0:2]) result = s.iloc[0:1] - assert_series_equal(result,Series(dr[0:1])) + assert_series_equal(result, Series(dr[0:1])) # concat - result = pd.concat([s.iloc[0:1],s.iloc[1:]]) - assert_series_equal(result,s) + result = pd.concat([s.iloc[0:1], s.iloc[1:]]) + assert_series_equal(result, s) # astype result = s.astype(object) @@ -1177,7 +1213,7 @@ def test_constructor_with_datetime_tz(self): assert_series_equal(result, s) result = s.astype('datetime64[ns, CET]') - expected = Series(date_range('20130101 06:00:00',periods=3,tz='CET')) + expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) assert_series_equal(result, expected) # short str @@ -1189,18 +1225,20 @@ def test_constructor_with_datetime_tz(self): self.assertTrue('NaT' in str(result)) # long str - t = Series(date_range('20130101',periods=1000,tz='US/Eastern')) + t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) self.assertTrue('datetime64[ns, US/Eastern]' in str(t)) - result = pd.DatetimeIndex(s,freq='infer') + result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference - s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) + s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific')]) self.assertTrue(s.dtype == 'datetime64[ns, US/Pacific]') self.assertTrue(lib.infer_dtype(s) == 'datetime64') - s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'),pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) + s = Series([pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), + pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern')]) self.assertTrue(s.dtype == 'object') self.assertTrue(lib.infer_dtype(s) == 'datetime') @@ -1208,7 +1246,7 @@ def test_constructor_periodindex(self): # GH7932 # converting a PeriodIndex when put in a Series - pi = period_range('20130101',periods=5,freq='D') + pi = period_range('20130101', periods=5, freq='D') s = Series(pi) expected = Series(pi.asobject) assert_series_equal(s, expected) @@ -1229,8 +1267,7 @@ def test_constructor_dict(self): def test_constructor_dict_multiindex(self): check = lambda result, expected: tm.assert_series_equal( - result, expected, check_dtype=True, - check_series_type=True) + result, expected, check_dtype=True, check_series_type=True) d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} _d = sorted(d.items()) ser = Series(d) @@ -1241,9 +1278,8 @@ def test_constructor_dict_multiindex(self): d['z'] = 111. _d.insert(0, ('z', d['z'])) ser = Series(d) - expected = Series( - [x[1] for x in _d], - index=Index([x[0] for x in _d], tupleize_cols=False)) + expected = Series([x[1] for x in _d], index=Index( + [x[0] for x in _d], tupleize_cols=False)) ser = ser.reindex(index=expected.index) check(ser, expected) @@ -1291,6 +1327,7 @@ def test_orderedDict_subclass_ctor(self): class A(OrderedDict): pass + data = A([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) self.assertTrue(all(s.values == list(data.values()))) @@ -1349,11 +1386,7 @@ def test_array_finalize(self): def test_pop(self): # GH 6600 - df = DataFrame({ - 'A': 0, - 'B': np.arange(5,dtype='int64'), - 'C': 0, - }) + df = DataFrame({'A': 0, 'B': np.arange(5, dtype='int64'), 'C': 0, }) k = df.iloc[4] result = k.pop('B') @@ -1517,7 +1550,7 @@ def test_getitem_boolean_empty(self): # GH5877 # indexing with empty series s = Series(['A', 'B']) - expected = Series(np.nan,index=['C'],dtype=object) + expected = Series(np.nan, index=['C'], dtype=object) result = s[Series(['C'], dtype=object)] assert_series_equal(result, expected) @@ -1530,10 +1563,12 @@ def test_getitem_boolean_empty(self): # that's empty or not-aligned def f(): s[Series([], dtype=bool)] + self.assertRaises(IndexingError, f) def f(): s[Series([True], dtype=bool)] + self.assertRaises(IndexingError, f) def test_getitem_generator(self): @@ -1557,7 +1592,7 @@ def test_getitem_boolean_object(self): assert_series_equal(result, expected) # setitem - s2 = s.copy() + s2 = s.copy() cop = s.copy() cop[omask] = 5 s2[mask] = 5 @@ -1576,13 +1611,13 @@ def test_getitem_setitem_boolean_corner(self): self.assertRaises(Exception, ts.__getitem__, mask_shifted) self.assertRaises(Exception, ts.__setitem__, mask_shifted, 1) - #ts[mask_shifted] - #ts[mask_shifted] = 1 + # ts[mask_shifted] + # ts[mask_shifted] = 1 self.assertRaises(Exception, ts.ix.__getitem__, mask_shifted) self.assertRaises(Exception, ts.ix.__setitem__, mask_shifted, 1) - #ts.ix[mask_shifted] - #ts.ix[mask_shifted] = 2 + # ts.ix[mask_shifted] + # ts.ix[mask_shifted] = 2 def test_getitem_setitem_slice_integers(self): s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) @@ -1635,16 +1670,16 @@ def test_getitem_dups_with_missing(self): assert_series_equal(result, expected) def test_getitem_dups(self): - s = Series(range(5),index=['A','A','B','C','C'],dtype=np.int64) - expected = Series([3,4],index=['C','C'],dtype=np.int64) + s = Series(range(5), index=['A', 'A', 'B', 'C', 'C'], dtype=np.int64) + expected = Series([3, 4], index=['C', 'C'], dtype=np.int64) result = s['C'] assert_series_equal(result, expected) def test_getitem_dataframe(self): rng = list(range(10)) - s = pd.Series(10, index=rng) - df = pd.DataFrame(rng, index=rng) - self.assertRaises(TypeError, s.__getitem__, df>5) + s = pd.Series(10, index=rng) + df = pd.DataFrame(rng, index=rng) + self.assertRaises(TypeError, s.__getitem__, df > 5) def test_setitem_ambiguous_keyerror(self): s = Series(lrange(10), index=lrange(0, 20, 2)) @@ -1652,13 +1687,13 @@ def test_setitem_ambiguous_keyerror(self): # equivalent of an append s2 = s.copy() s2[1] = 5 - expected = s.append(Series([5],index=[1])) - assert_series_equal(s2,expected) + expected = s.append(Series([5], index=[1])) + assert_series_equal(s2, expected) s2 = s.copy() s2.ix[1] = 5 - expected = s.append(Series([5],index=[1])) - assert_series_equal(s2,expected) + expected = s.append(Series([5], index=[1])) + assert_series_equal(s2, expected) def test_setitem_float_labels(self): # note labels are floats @@ -1684,8 +1719,8 @@ def test_slice(self): self.assertEqual(numSlice.index[1], self.series.index[11]) - self.assertTrue(tm.equalContents(numSliceEnd, - np.array(self.series)[-10:])) + self.assertTrue(tm.equalContents(numSliceEnd, np.array(self.series)[ + -10:])) # test return view sl = self.series[10:20] @@ -1694,13 +1729,15 @@ def test_slice(self): def test_slice_can_reorder_not_uniquely_indexed(self): s = Series(1, index=['a', 'a', 'b', 'b', 'c']) - result = s[::-1] # it works! + s[::-1] # it works! def test_slice_float_get_set(self): - self.assertRaises(TypeError, lambda : self.ts[4.0:10.0]) + self.assertRaises(TypeError, lambda: self.ts[4.0:10.0]) + def f(): self.ts[4.0:10.0] = 0 + self.assertRaises(TypeError, f) self.assertRaises(TypeError, self.ts.__getitem__, slice(4.5, 10.0)) @@ -1783,27 +1820,27 @@ def test_setitem_dtypes(self): # change dtypes # GH 4463 - expected = Series([np.nan,2,3]) + expected = Series([np.nan, 2, 3]) - s = Series([1,2,3]) + s = Series([1, 2, 3]) s.iloc[0] = np.nan - assert_series_equal(s,expected) + assert_series_equal(s, expected) - s = Series([1,2,3]) + s = Series([1, 2, 3]) s.loc[0] = np.nan - assert_series_equal(s,expected) + assert_series_equal(s, expected) - s = Series([1,2,3]) + s = Series([1, 2, 3]) s[0] = np.nan - assert_series_equal(s,expected) + assert_series_equal(s, expected) s = Series([False]) s.loc[0] = np.nan - assert_series_equal(s,Series([np.nan])) + assert_series_equal(s, Series([np.nan])) - s = Series([False,True]) + s = Series([False, True]) s.loc[0] = np.nan - assert_series_equal(s,Series([np.nan,1.0])) + assert_series_equal(s, Series([np.nan, 1.0])) def test_set_value(self): idx = self.ts.index[10] @@ -1849,7 +1886,7 @@ def test_basic_getitem_setitem_corner(self): def test_reshape_non_2d(self): # GH 4554 x = Series(np.random.random(201), name='x') - self.assertTrue(x.reshape(x.shape,) is x) + self.assertTrue(x.reshape(x.shape, ) is x) # GH 2719 a = Series([1, 2, 3, 4]) @@ -1999,11 +2036,11 @@ def test_where(self): assert_series_equal(rs, s.abs()) rs = s.where(cond) - assert(s.shape == rs.shape) - assert(rs is not s) + assert (s.shape == rs.shape) + assert (rs is not s) # test alignment - cond = Series([True,False,False,True,False],index=s.index) + cond = Series([True, False, False, True, False], index=s.index) s2 = -(s.abs()) expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) @@ -2025,13 +2062,14 @@ def test_where(self): assert_series_equal(s, expected) # failures - self.assertRaises( - ValueError, s.__setitem__, tuple([[[True, False]]]), [0, 2, 3]) - self.assertRaises( - ValueError, s.__setitem__, tuple([[[True, False]]]), []) + self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), + [0, 2, 3]) + self.assertRaises(ValueError, s.__setitem__, tuple([[[True, False]]]), + []) # unsafe dtype changes - for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]: + for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, + np.float32, np.float64]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 s[mask] = lrange(2, 7) @@ -2081,17 +2119,21 @@ def test_where(self): s = Series(np.arange(10)) mask = s > 5 + def f(): - s[mask] = [5,4,3,2,1] + s[mask] = [5, 4, 3, 2, 1] + self.assertRaises(ValueError, f) + def f(): s[mask] = [0] * 5 + self.assertRaises(ValueError, f) # dtype changes - s = Series([1,2,3,4]) - result = s.where(s>2,np.nan) - expected = Series([np.nan,np.nan,3,4]) + s = Series([1, 2, 3, 4]) + result = s.where(s > 2, np.nan) + expected = Series([np.nan, np.nan, 3, 4]) assert_series_equal(result, expected) # GH 4667 @@ -2104,7 +2146,7 @@ def f(): s = Series(range(10)).astype(float) s[s > 8] = None result = s[isnull(s)] - expected = Series(np.nan,index=[9]) + expected = Series(np.nan, index=[9]) assert_series_equal(result, expected) def test_where_setitem_invalid(self): @@ -2114,72 +2156,87 @@ def test_where_setitem_invalid(self): # slice s = Series(list('abc')) + def f(): s[0:3] = list(range(27)) + self.assertRaises(ValueError, f) s[0:3] = list(range(3)) - expected = Series([0,1,2]) + expected = Series([0, 1, 2]) assert_series_equal(s.astype(np.int64), expected, ) # slice with step s = Series(list('abcdef')) + def f(): s[0:4:2] = list(range(27)) + self.assertRaises(ValueError, f) s = Series(list('abcdef')) s[0:4:2] = list(range(2)) - expected = Series([0,'b',1,'d','e','f']) + expected = Series([0, 'b', 1, 'd', 'e', 'f']) assert_series_equal(s, expected) # neg slices s = Series(list('abcdef')) + def f(): s[:-1] = list(range(27)) + self.assertRaises(ValueError, f) s[-3:-1] = list(range(2)) - expected = Series(['a','b','c',0,1,'f']) + expected = Series(['a', 'b', 'c', 0, 1, 'f']) assert_series_equal(s, expected) # list s = Series(list('abc')) + def f(): - s[[0,1,2]] = list(range(27)) + s[[0, 1, 2]] = list(range(27)) + self.assertRaises(ValueError, f) s = Series(list('abc')) + def f(): - s[[0,1,2]] = list(range(2)) + s[[0, 1, 2]] = list(range(2)) + self.assertRaises(ValueError, f) # scalar s = Series(list('abc')) s[0] = list(range(10)) - expected = Series([list(range(10)),'b','c']) + expected = Series([list(range(10)), 'b', 'c']) assert_series_equal(s, expected) def test_where_broadcast(self): # Test a variety of differently sized series for size in range(2, 6): # Test a variety of boolean indices - for selection in [np.resize([True, False, False, False, False], size), # First element should be set - # Set alternating elements] - np.resize([True, False], size), - np.resize([False], size)]: # No element should be set + for selection in [ + # First element should be set + np.resize([True, False, False, False, False], size), + # Set alternating elements] + np.resize([True, False], size), + # No element should be set + np.resize([False], size)]: + # Test a variety of different numbers as content - for item in [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min]: + for item in [2.0, np.nan, np.finfo(np.float).max, + np.finfo(np.float).min]: # Test numpy arrays, lists and tuples as the input to be # broadcast - for arr in [np.array([item]), [item], (item,)]: + for arr in [np.array([item]), [item], (item, )]: data = np.arange(size, dtype=float) s = Series(data) s[selection] = arr # Construct the expected series by taking the source # data or item based on the selection - expected = Series([item if use_item else data[i] - for i, use_item in enumerate(selection)]) + expected = Series([item if use_item else data[ + i] for i, use_item in enumerate(selection)]) assert_series_equal(s, expected) s = Series(data) @@ -2205,19 +2262,20 @@ def test_where_dups(self): # where crashes with dups in index s1 = Series(list(range(3))) s2 = Series(list(range(3))) - comb = pd.concat([s1,s2]) + comb = pd.concat([s1, s2]) result = comb.where(comb < 2) - expected = Series([0,1,np.nan,0,1,np.nan],index=[0,1,2,0,1,2]) + expected = Series([0, 1, np.nan, 0, 1, np.nan], + index=[0, 1, 2, 0, 1, 2]) assert_series_equal(result, expected) # GH 4548 # inplace updating not working with dups - comb[comb<1] = 5 - expected = Series([5,1,2,5,1,2],index=[0,1,2,0,1,2]) + comb[comb < 1] = 5 + expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) - comb[comb<2] += 10 - expected = Series([5,11,2,5,11,2],index=[0,1,2,0,1,2]) + comb[comb < 2] += 10 + expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) def test_where_datetime(self): @@ -2292,8 +2350,8 @@ def test_mask(self): self.assertRaises(ValueError, s.mask, cond[:3].values, -s) # dtype changes - s = Series([1,2,3,4]) - result = s.mask(s>2, np.nan) + s = Series([1, 2, 3, 4]) + result = s.mask(s > 2, np.nan) expected = Series([1, 2, np.nan, np.nan]) assert_series_equal(result, expected) @@ -2301,17 +2359,21 @@ def test_mask_broadcast(self): # GH 8801 # copied from test_where_broadcast for size in range(2, 6): - for selection in [np.resize([True, False, False, False, False], size), # First element should be set - # Set alternating elements] - np.resize([True, False], size), - np.resize([False], size)]: # No element should be set - for item in [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min]: - for arr in [np.array([item]), [item], (item,)]: + for selection in [ + # First element should be set + np.resize([True, False, False, False, False], size), + # Set alternating elements] + np.resize([True, False], size), + # No element should be set + np.resize([False], size)]: + for item in [2.0, np.nan, np.finfo(np.float).max, + np.finfo(np.float).min]: + for arr in [np.array([item]), [item], (item, )]: data = np.arange(size, dtype=float) s = Series(data) result = s.mask(selection, arr) - expected = Series([item if use_item else data[i] - for i, use_item in enumerate(selection)]) + expected = Series([item if use_item else data[ + i] for i, use_item in enumerate(selection)]) assert_series_equal(result, expected) def test_mask_inplace(self): @@ -2330,35 +2392,35 @@ def test_mask_inplace(self): def test_drop(self): # unique - s = Series([1,2],index=['one','two']) - expected = Series([1],index=['one']) + s = Series([1, 2], index=['one', 'two']) + expected = Series([1], index=['one']) result = s.drop(['two']) - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s.drop('two', axis='rows') - assert_series_equal(result,expected) + assert_series_equal(result, expected) # non-unique # GH 5248 - s = Series([1,1,2],index=['one','two','one']) - expected = Series([1,2],index=['one','one']) + s = Series([1, 1, 2], index=['one', 'two', 'one']) + expected = Series([1, 2], index=['one', 'one']) result = s.drop(['two'], axis=0) - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s.drop('two') - assert_series_equal(result,expected) + assert_series_equal(result, expected) - expected = Series([1],index=['two']) + expected = Series([1], index=['two']) result = s.drop(['one']) - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s.drop('one') - assert_series_equal(result,expected) + assert_series_equal(result, expected) # single string/tuple-like - s = Series(range(3),index=list('abc')) + s = Series(range(3), index=list('abc')) self.assertRaises(ValueError, s.drop, 'bc') - self.assertRaises(ValueError, s.drop, ('a',)) + self.assertRaises(ValueError, s.drop, ('a', )) # errors='ignore' - s = Series(range(3),index=list('abc')) + s = Series(range(3), index=list('abc')) result = s.drop('bc', errors='ignore') assert_series_equal(result, s) result = s.drop(['a', 'd'], errors='ignore') @@ -2369,11 +2431,11 @@ def test_drop(self): self.assertRaises(ValueError, s.drop, 'one', axis='columns') # GH 8522 - s = Series([2,3], index=[True, False]) + s = Series([2, 3], index=[True, False]) self.assertTrue(s.index.is_object()) result = s.drop(True) - expected = Series([3],index=[False]) - assert_series_equal(result,expected) + expected = Series([3], index=[False]) + assert_series_equal(result, expected) def test_ix_setitem(self): inds = self.series.index[[3, 4, 7]] @@ -2404,7 +2466,7 @@ def test_ix_setitem(self): def test_where_numeric_with_string(self): # GH 9280 s = pd.Series([1, 2, 3]) - w = s.where(s>1, 'X') + w = s.where(s > 1, 'X') self.assertFalse(com.is_integer(w[0])) self.assertTrue(com.is_integer(w[1])) @@ -2412,14 +2474,14 @@ def test_where_numeric_with_string(self): self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') - w = s.where(s>1, ['X', 'Y', 'Z']) + w = s.where(s > 1, ['X', 'Y', 'Z']) self.assertFalse(com.is_integer(w[0])) self.assertTrue(com.is_integer(w[1])) self.assertTrue(com.is_integer(w[2])) self.assertTrue(isinstance(w[0], str)) self.assertTrue(w.dtype == 'object') - w = s.where(s>1, np.array(['X', 'Y', 'Z'])) + w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) self.assertFalse(com.is_integer(w[0])) self.assertTrue(com.is_integer(w[1])) self.assertTrue(com.is_integer(w[2])) @@ -2498,9 +2560,7 @@ def test_repr(self): # various names for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'), 'loooooooooooooooooooooooooooooooooooooooooooooooooooong', - ('foo', 'bar', 'baz'), - (1, 2), - ('foo', 1, 2.3), + ('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3), (u('\u03B1'), u('\u03B2'), u('\u03B3')), (u('\u03B1'), 'bar')]: self.series.name = name @@ -2535,7 +2595,7 @@ def test_repr(self): def test_tidy_repr(self): a = Series([u("\u05d0")] * 1000) a.name = 'title1' - repr(a) # should not raise exception + repr(a) # should not raise exception def test_repr_bool_fails(self): s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)]) @@ -2546,7 +2606,7 @@ def test_repr_bool_fails(self): tmp = sys.stderr sys.stderr = buf try: - # it works (with no Cython exception barf)! + # it works (with no Cython exception barf)! repr(s) finally: sys.stderr = tmp @@ -2558,7 +2618,7 @@ def test_repr_name_iterable_indexable(self): # it works! repr(s) - s.name = (u("\u05d0"),) * 2 + s.name = (u("\u05d0"), ) * 2 repr(s) def test_repr_should_return_str(self): @@ -2576,7 +2636,7 @@ def test_repr_should_return_str(self): def test_repr_max_rows(self): # GH 6863 with pd.option_context('max_rows', None): - str(Series(range(1001))) # should not raise exception + str(Series(range(1001))) # should not raise exception def test_unicode_string_with_unicode(self): df = Series([u("\u05d0")], name=u("\u05d1")) @@ -2676,7 +2736,9 @@ def test_mode(self): exp = Series([11, 12]) assert_series_equal(s.mode(), exp) - assert_series_equal(Series([1, 2, 3]).mode(), Series([], dtype='int64')) + assert_series_equal( + Series([1, 2, 3]).mode(), Series( + [], dtype='int64')) lst = [5] * 20 + [1] * 10 + [6] * 25 np.random.shuffle(lst) @@ -2733,11 +2795,12 @@ def test_var_std(self): self.assertTrue(isnull(result)) def test_sem(self): - alt = lambda x: np.std(x, ddof=1)/np.sqrt(len(x)) + alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) self._check_stat_op('sem', alt) result = self.ts.sem(ddof=4) - expected = np.std(self.ts.values, ddof=4)/np.sqrt(len(self.ts.values)) + expected = np.std(self.ts.values, + ddof=4) / np.sqrt(len(self.ts.values)) assert_almost_equal(result, expected) # 1 - element series with ddof=1 @@ -2752,7 +2815,8 @@ def test_skew(self): alt = lambda x: skew(x, bias=False) self._check_stat_op('skew', alt) - # test corner cases, skew() returns NaN unless there's at least 3 values + # test corner cases, skew() returns NaN unless there's at least 3 + # values min_N = 3 for i in range(1, min_N + 1): s = Series(np.ones(i)) @@ -2772,13 +2836,13 @@ def test_kurt(self): self._check_stat_op('kurt', alt) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], - [0, 1, 2, 0, 1, 2], + labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) self.assertAlmostEqual(s.kurt(), s.kurt(level=0)['bar']) - # test corner cases, kurt() returns NaN unless there's at least 4 values + # test corner cases, kurt() returns NaN unless there's at least 4 + # values min_N = 4 for i in range(1, min_N + 1): s = Series(np.ones(i)) @@ -2824,8 +2888,7 @@ def test_argsort_stable(self): def test_reorder_levels(self): index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], - [0, 1, 2, 0, 1, 2], + labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], names=['L0', 'L1', 'L2']) s = Series(np.arange(6), index=index) @@ -2841,8 +2904,7 @@ def test_reorder_levels(self): # rotate, position result = s.reorder_levels([1, 2, 0]) e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']], - labels=[[0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1], + labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1], [0, 0, 0, 0, 0, 0]], names=['L1', 'L2', 'L0']) expected = Series(np.arange(6), index=e_idx) @@ -2850,8 +2912,7 @@ def test_reorder_levels(self): result = s.reorder_levels([0, 0, 0]) e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']], - labels=[[0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0], + labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], names=['L0', 'L0', 'L0']) expected = Series(range(6), index=e_idx) @@ -2887,58 +2948,84 @@ def test_cummax(self): self.assert_numpy_array_equal(result, expected) def test_cummin_datetime64(self): - s = pd.Series(pd.to_datetime( - ['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-3'])) + s = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', '2000-1-1', + 'NaT', '2000-1-3'])) - expected = pd.Series(pd.to_datetime( - ['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-1'])) + expected = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', + '2000-1-1', 'NaT', '2000-1-1'])) result = s.cummin(skipna=True) self.assert_series_equal(expected, result) expected = pd.Series(pd.to_datetime( - ['NaT', '2000-1-2', '2000-1-2', '2000-1-1', '2000-1-1', '2000-1-1'])) + ['NaT', '2000-1-2', '2000-1-2', '2000-1-1', '2000-1-1', '2000-1-1' + ])) result = s.cummin(skipna=False) self.assert_series_equal(expected, result) def test_cummax_datetime64(self): - s = pd.Series(pd.to_datetime( - ['NaT', '2000-1-2', 'NaT', '2000-1-1', 'NaT', '2000-1-3'])) + s = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', '2000-1-1', + 'NaT', '2000-1-3'])) - expected = pd.Series(pd.to_datetime( - ['NaT', '2000-1-2', 'NaT', '2000-1-2', 'NaT', '2000-1-3'])) + expected = pd.Series(pd.to_datetime(['NaT', '2000-1-2', 'NaT', + '2000-1-2', 'NaT', '2000-1-3'])) result = s.cummax(skipna=True) self.assert_series_equal(expected, result) expected = pd.Series(pd.to_datetime( - ['NaT', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-3'])) + ['NaT', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-2', '2000-1-3' + ])) result = s.cummax(skipna=False) self.assert_series_equal(expected, result) def test_cummin_timedelta64(self): - s = pd.Series(pd.to_timedelta( - ['NaT', '2 min', 'NaT', '1 min', 'NaT', '3 min', ])) - - expected = pd.Series(pd.to_timedelta( - ['NaT', '2 min', 'NaT', '1 min', 'NaT', '1 min', ])) + s = pd.Series(pd.to_timedelta(['NaT', + '2 min', + 'NaT', + '1 min', + 'NaT', + '3 min', ])) + + expected = pd.Series(pd.to_timedelta(['NaT', + '2 min', + 'NaT', + '1 min', + 'NaT', + '1 min', ])) result = s.cummin(skipna=True) self.assert_series_equal(expected, result) - expected = pd.Series(pd.to_timedelta( - ['NaT', '2 min', '2 min', '1 min', '1 min', '1 min', ])) + expected = pd.Series(pd.to_timedelta(['NaT', + '2 min', + '2 min', + '1 min', + '1 min', + '1 min', ])) result = s.cummin(skipna=False) self.assert_series_equal(expected, result) def test_cummax_timedelta64(self): - s = pd.Series(pd.to_timedelta( - ['NaT', '2 min', 'NaT', '1 min', 'NaT', '3 min', ])) - - expected = pd.Series(pd.to_timedelta( - ['NaT', '2 min', 'NaT', '2 min', 'NaT', '3 min', ])) + s = pd.Series(pd.to_timedelta(['NaT', + '2 min', + 'NaT', + '1 min', + 'NaT', + '3 min', ])) + + expected = pd.Series(pd.to_timedelta(['NaT', + '2 min', + 'NaT', + '2 min', + 'NaT', + '3 min', ])) result = s.cummax(skipna=True) self.assert_series_equal(expected, result) - expected = pd.Series(pd.to_timedelta( - ['NaT', '2 min', '2 min', '2 min', '2 min', '3 min', ])) + expected = pd.Series(pd.to_timedelta(['NaT', + '2 min', + '2 min', + '2 min', + '2 min', + '3 min', ])) result = s.cummax(skipna=False) self.assert_series_equal(expected, result) @@ -2952,7 +3039,8 @@ def test_npdiff(self): r = np.diff(s) assert_series_equal(Series([nan, 0, 0, 0, nan]), r) - def _check_stat_op(self, name, alternate, check_objects=False, check_allna=False): + def _check_stat_op(self, name, alternate, check_objects=False, + check_allna=False): import pandas.core.nanops as nanops def testit(): @@ -2962,7 +3050,7 @@ def testit(): self.series[5:15] = np.NaN # idxmax, idxmin, min, and max are valid for dates - if name not in ['max','min']: + if name not in ['max', 'min']: ds = Series(date_range('1/1/2001', periods=10)) self.assertRaises(TypeError, f, ds) @@ -2982,9 +3070,9 @@ def testit(): # bottleneck >= 1.0 give 0.0 for an allna Series sum try: self.assertTrue(nanops._USE_BOTTLENECK) - import bottleneck as bn + import bottleneck as bn # noqa self.assertTrue(bn.__version__ >= LooseVersion('1.0')) - self.assertEqual(f(allna),0.0) + self.assertEqual(f(allna), 0.0) except: self.assertTrue(np.isnan(f(allna))) @@ -2994,7 +3082,7 @@ def testit(): # 2888 l = [0] - l.extend(lrange(2 ** 40, 2 ** 40+1000)) + l.extend(lrange(2 ** 40, 2 ** 40 + 1000)) s = Series(l, dtype='int64') assert_almost_equal(float(f(s)), float(alternate(s.values))) @@ -3006,7 +3094,7 @@ def testit(): self.assertEqual(res, exp) # check on string data - if name not in ['sum','min','max']: + if name not in ['sum', 'min', 'max']: self.assertRaises(TypeError, f, Series(list('abc'))) # Invalid axis. @@ -3020,7 +3108,7 @@ def testit(): testit() try: - import bottleneck as bn + import bottleneck as bn # noqa nanops._USE_BOTTLENECK = False testit() nanops._USE_BOTTLENECK = True @@ -3052,7 +3140,8 @@ def test_round(self): def test_built_in_round(self): if not compat.PY3: - raise nose.SkipTest('build in round cannot be overriden prior to Python 3') + raise nose.SkipTest( + 'build in round cannot be overriden prior to Python 3') s = Series([1.123, 2.123, 3.123], index=lrange(3)) result = round(s) @@ -3064,7 +3153,6 @@ def test_built_in_round(self): result = round(s, decimals) self.assert_series_equal(result, expected_rounded) - def test_prod_numpy16_bug(self): s = Series([1., 1., 1.], index=lrange(3)) result = s.prod() @@ -3080,7 +3168,7 @@ def test_quantile(self): self.assertEqual(q, percentile(self.ts.valid(), 90)) # object dtype - q = Series(self.ts,dtype=object).quantile(0.9) + q = Series(self.ts, dtype=object).quantile(0.9) self.assertEqual(q, percentile(self.ts.valid(), 90)) # datetime64[ns] dtype @@ -3121,8 +3209,8 @@ def test_quantile_multi(self): assert_series_equal(result, expected) result = self.ts.quantile([]) - expected = pd.Series([], name=self.ts.name, - index=Index([], dtype=float)) + expected = pd.Series([], name=self.ts.name, index=Index( + [], dtype=float)) assert_series_equal(result, expected) def test_quantile_interpolation(self): @@ -3255,8 +3343,8 @@ def test_modulo(self): # GH3590, modulo as ints p = DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]}) result = p['first'] % p['second'] - expected = Series(p['first'].values % - p['second'].values, dtype='float64') + expected = Series(p['first'].values % p['second'].values, + dtype='float64') expected.iloc[0:3] = np.nan assert_series_equal(result, expected) @@ -3306,20 +3394,21 @@ def test_div(self): p = DataFrame({'first': [3, 4, 5, 8], 'second': [1, 1, 1, 1]}) result = p['first'] / p['second'] - assert_series_equal(result, p['first'].astype('float64'), check_names=False) + assert_series_equal(result, p['first'].astype('float64'), + check_names=False) self.assertTrue(result.name is None) self.assertFalse(np.array_equal(result, p['second'] / p['first'])) # inf signing - s = Series([np.nan,1.,-1.]) + s = Series([np.nan, 1., -1.]) result = s / 0 - expected = Series([np.nan,np.inf,-np.inf]) + expected = Series([np.nan, np.inf, -np.inf]) assert_series_equal(result, expected) # float/integer issue # GH 7785 - p = DataFrame({'first': (1,0), 'second': (-0.01,-0.02)}) - expected = Series([-0.01,-np.inf]) + p = DataFrame({'first': (1, 0), 'second': (-0.01, -0.02)}) + expected = Series([-0.01, -np.inf]) result = p['second'].div(p['first']) assert_series_equal(result, expected, check_names=False) @@ -3343,7 +3432,6 @@ def test_div(self): assert_series_equal(result, expected) def test_operators(self): - def _check_op(series, other, op, pos_only=False): left = np.abs(series) if pos_only else series right = np.abs(other) if pos_only else other @@ -3398,18 +3486,19 @@ def test_constructor_dtype_timedelta64(self): td = Series([timedelta(days=1)]) self.assertEqual(td.dtype, 'timedelta64[ns]') - td = Series([timedelta(days=1),timedelta(days=2),np.timedelta64(1,'s')]) + td = Series([timedelta(days=1), timedelta(days=2), np.timedelta64( + 1, 's')]) self.assertEqual(td.dtype, 'timedelta64[ns]') # mixed with NaT from pandas import tslib - td = Series([timedelta(days=1),tslib.NaT ], dtype='m8[ns]' ) + td = Series([timedelta(days=1), tslib.NaT], dtype='m8[ns]') self.assertEqual(td.dtype, 'timedelta64[ns]') - td = Series([timedelta(days=1),np.nan ], dtype='m8[ns]' ) + td = Series([timedelta(days=1), np.nan], dtype='m8[ns]') self.assertEqual(td.dtype, 'timedelta64[ns]') - td = Series([np.timedelta64(300000000), pd.NaT],dtype='m8[ns]') + td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]') self.assertEqual(td.dtype, 'timedelta64[ns]') # improved inference @@ -3426,11 +3515,11 @@ def test_constructor_dtype_timedelta64(self): td = Series([pd.NaT, np.timedelta64(300000000)]) self.assertEqual(td.dtype, 'timedelta64[ns]') - td = Series([np.timedelta64(1,'s')]) + td = Series([np.timedelta64(1, 's')]) self.assertEqual(td.dtype, 'timedelta64[ns]') # these are frequency conversion astypes - #for t in ['s', 'D', 'us', 'ms']: + # for t in ['s', 'D', 'us', 'ms']: # self.assertRaises(TypeError, td.astype, 'm8[%s]' % t) # valid astype @@ -3441,7 +3530,8 @@ def test_constructor_dtype_timedelta64(self): # this is an invalid casting def f(): - Series([timedelta(days=1), 'foo'],dtype='m8[ns]') + Series([timedelta(days=1), 'foo'], dtype='m8[ns]') + self.assertRaises(Exception, f) # leave as object here @@ -3450,30 +3540,30 @@ def f(): # these will correctly infer a timedelta s = Series([None, pd.NaT, '1 Day']) - self.assertEqual(s.dtype,'timedelta64[ns]') + self.assertEqual(s.dtype, 'timedelta64[ns]') s = Series([np.nan, pd.NaT, '1 Day']) - self.assertEqual(s.dtype,'timedelta64[ns]') + self.assertEqual(s.dtype, 'timedelta64[ns]') s = Series([pd.NaT, None, '1 Day']) - self.assertEqual(s.dtype,'timedelta64[ns]') + self.assertEqual(s.dtype, 'timedelta64[ns]') s = Series([pd.NaT, np.nan, '1 Day']) - self.assertEqual(s.dtype,'timedelta64[ns]') + self.assertEqual(s.dtype, 'timedelta64[ns]') def test_operators_timedelta64(self): # invalid ops self.assertRaises(Exception, self.objSeries.__add__, 1) - self.assertRaises( - Exception, self.objSeries.__add__, np.array(1, dtype=np.int64)) + self.assertRaises(Exception, self.objSeries.__add__, + np.array(1, dtype=np.int64)) self.assertRaises(Exception, self.objSeries.__sub__, 1) - self.assertRaises( - Exception, self.objSeries.__sub__, np.array(1, dtype=np.int64)) + self.assertRaises(Exception, self.objSeries.__sub__, + np.array(1, dtype=np.int64)) # seriese ops v1 = date_range('2012-1-1', periods=3, freq='D') v2 = date_range('2012-1-2', periods=3, freq='D') rs = Series(v2) - Series(v1) - xp = Series(1e9 * 3600 * 24, rs.index).astype( - 'int64').astype('timedelta64[ns]') + xp = Series(1e9 * 3600 * 24, + rs.index).astype('int64').astype('timedelta64[ns]') assert_series_equal(rs, xp) self.assertEqual(rs.dtype, 'timedelta64[ns]') @@ -3497,13 +3587,15 @@ def test_operators_timedelta64(self): # timestamp on lhs result = resultb + df['A'] - values = [Timestamp('20111230'), Timestamp('20120101'), Timestamp('20120103')] + values = [Timestamp('20111230'), Timestamp('20120101'), + Timestamp('20120103')] expected = Series(values, name='A') assert_series_equal(result, expected) # datetimes on rhs result = df['A'] - datetime(2001, 1, 1) - expected = Series([timedelta(days=4017 + i) for i in range(3)], name='A') + expected = Series( + [timedelta(days=4017 + i) for i in range(3)], name='A') assert_series_equal(result, expected) self.assertEqual(result.dtype, 'm8[ns]') @@ -3530,8 +3622,8 @@ def test_operators_timedelta64(self): self.assertEqual(resultb.dtype, 'M8[ns]') # inplace - value = rs[2] + np.timedelta64(timedelta(minutes=5,seconds=1)) - rs[2] += np.timedelta64(timedelta(minutes=5,seconds=1)) + value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1)) + rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1)) self.assertEqual(rs[2], value) def test_timedeltas_with_DateOffset(self): @@ -3542,54 +3634,54 @@ def test_timedeltas_with_DateOffset(self): result = s + pd.offsets.Second(5) result2 = pd.offsets.Second(5) + s - expected = Series( - [Timestamp('20130101 9:01:05'), Timestamp('20130101 9:02:05')]) + expected = Series([Timestamp('20130101 9:01:05'), Timestamp( + '20130101 9:02:05')]) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = s - pd.offsets.Second(5) result2 = -pd.offsets.Second(5) + s - expected = Series( - [Timestamp('20130101 9:00:55'), Timestamp('20130101 9:01:55')]) + expected = Series([Timestamp('20130101 9:00:55'), Timestamp( + '20130101 9:01:55')]) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = s + pd.offsets.Milli(5) result2 = pd.offsets.Milli(5) + s - expected = Series( - [Timestamp('20130101 9:01:00.005'), Timestamp('20130101 9:02:00.005')]) + expected = Series([Timestamp('20130101 9:01:00.005'), Timestamp( + '20130101 9:02:00.005')]) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5) - expected = Series( - [Timestamp('20130101 9:06:00.005'), Timestamp('20130101 9:07:00.005')]) + expected = Series([Timestamp('20130101 9:06:00.005'), Timestamp( + '20130101 9:07:00.005')]) assert_series_equal(result, expected) # operate with np.timedelta64 correctly result = s + np.timedelta64(1, 's') result2 = np.timedelta64(1, 's') + s - expected = Series( - [Timestamp('20130101 9:01:01'), Timestamp('20130101 9:02:01')]) + expected = Series([Timestamp('20130101 9:01:01'), Timestamp( + '20130101 9:02:01')]) assert_series_equal(result, expected) assert_series_equal(result2, expected) result = s + np.timedelta64(5, 'ms') result2 = np.timedelta64(5, 'ms') + s - expected = Series( - [Timestamp('20130101 9:01:00.005'), Timestamp('20130101 9:02:00.005')]) + expected = Series([Timestamp('20130101 9:01:00.005'), Timestamp( + '20130101 9:02:00.005')]) assert_series_equal(result, expected) assert_series_equal(result2, expected) # valid DateOffsets - for do in [ 'Hour', 'Minute', 'Second', 'Day', 'Micro', - 'Milli', 'Nano' ]: - op = getattr(pd.offsets,do) + for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', + 'Nano']: + op = getattr(pd.offsets, do) s + op(5) op(5) + s def test_timedelta_series_ops(self): - #GH11925 + # GH11925 s = Series(timedelta_range('1 day', periods=3)) ts = Timestamp('2012-01-01') @@ -3601,7 +3693,6 @@ def test_timedelta_series_ops(self): assert_series_equal(ts - s, expected2) assert_series_equal(ts + (-s), expected2) - def test_timedelta64_operations_with_DateOffset(self): # GH 10699 td = Series([timedelta(minutes=5, seconds=3)] * 3) @@ -3615,9 +3706,8 @@ def test_timedelta64_operations_with_DateOffset(self): result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3), pd.offsets.Hour(2)]) - expected = Series([timedelta(minutes=6, seconds=3), - timedelta(minutes=5, seconds=6), - timedelta(hours=2, minutes=5, seconds=3)]) + expected = Series([timedelta(minutes=6, seconds=3), timedelta( + minutes=5, seconds=6), timedelta(hours=2, minutes=5, seconds=3)]) assert_series_equal(result, expected) result = td + pd.offsets.Minute(1) + pd.offsets.Second(12) @@ -3625,9 +3715,9 @@ def test_timedelta64_operations_with_DateOffset(self): assert_series_equal(result, expected) # valid DateOffsets - for do in [ 'Hour', 'Minute', 'Second', 'Day', 'Micro', - 'Milli', 'Nano' ]: - op = getattr(pd.offsets,do) + for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli', + 'Nano']: + op = getattr(pd.offsets, do) td + op(5) op(5) + td td - op(5) @@ -3639,36 +3729,36 @@ def test_timedelta64_operations_with_timedeltas(self): td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td2 = timedelta(minutes=5, seconds=4) result = td1 - td2 - expected = Series([timedelta(seconds=0)] * 3) -Series( - [timedelta(seconds=1)] * 3) + expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta( + seconds=1)] * 3) self.assertEqual(result.dtype, 'm8[ns]') assert_series_equal(result, expected) result2 = td2 - td1 - expected = (Series([timedelta(seconds=1)] * 3) - - Series([timedelta(seconds=0)] * 3)) + expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta( + seconds=0)] * 3)) assert_series_equal(result2, expected) # roundtrip - assert_series_equal(result + td2,td1) + assert_series_equal(result + td2, td1) # Now again, using pd.to_timedelta, which should build # a Series or a scalar, depending on input. td1 = Series(pd.to_timedelta(['00:05:03'] * 3)) td2 = pd.to_timedelta('00:05:04') result = td1 - td2 - expected = Series([timedelta(seconds=0)] * 3) -Series( - [timedelta(seconds=1)] * 3) + expected = Series([timedelta(seconds=0)] * 3) - Series([timedelta( + seconds=1)] * 3) self.assertEqual(result.dtype, 'm8[ns]') assert_series_equal(result, expected) result2 = td2 - td1 - expected = (Series([timedelta(seconds=1)] * 3) - - Series([timedelta(seconds=0)] * 3)) + expected = (Series([timedelta(seconds=1)] * 3) - Series([timedelta( + seconds=0)] * 3)) assert_series_equal(result2, expected) # roundtrip - assert_series_equal(result + td2,td1) + assert_series_equal(result + td2, td1) def test_timedelta64_operations_with_integers(self): @@ -3683,54 +3773,55 @@ def test_timedelta64_operations_with_integers(self): expected = Series(s1.values.astype(np.int64) / s2, dtype='m8[ns]') expected[2] = np.nan result = s1 / s2 - assert_series_equal(result,expected) + assert_series_equal(result, expected) s2 = Series([20, 30, 40]) expected = Series(s1.values.astype(np.int64) / s2, dtype='m8[ns]') expected[2] = np.nan result = s1 / s2 - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s1 / 2 expected = Series(s1.values.astype(np.int64) / 2, dtype='m8[ns]') expected[2] = np.nan - assert_series_equal(result,expected) + assert_series_equal(result, expected) s2 = Series([20, 30, 40]) expected = Series(s1.values.astype(np.int64) * s2, dtype='m8[ns]') expected[2] = np.nan result = s1 * s2 - assert_series_equal(result,expected) + assert_series_equal(result, expected) - for dtype in ['int32','int16','uint32','uint64','uint32','uint16','uint8']: - s2 = Series([20, 30, 40],dtype=dtype) - expected = Series(s1.values.astype(np.int64) * s2.astype(np.int64), dtype='m8[ns]') + for dtype in ['int32', 'int16', 'uint32', 'uint64', 'uint32', 'uint16', + 'uint8']: + s2 = Series([20, 30, 40], dtype=dtype) + expected = Series( + s1.values.astype(np.int64) * s2.astype(np.int64), + dtype='m8[ns]') expected[2] = np.nan result = s1 * s2 - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s1 * 2 expected = Series(s1.values.astype(np.int64) * 2, dtype='m8[ns]') expected[2] = np.nan - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s1 * -1 expected = Series(s1.values.astype(np.int64) * -1, dtype='m8[ns]') expected[2] = np.nan - assert_series_equal(result,expected) + assert_series_equal(result, expected) # invalid ops assert_series_equal(s1 / s2.astype(float), - Series([Timedelta('2 days 22:48:00'), - Timedelta('1 days 23:12:00'), - Timedelta('NaT')])) + Series([Timedelta('2 days 22:48:00'), Timedelta( + '1 days 23:12:00'), Timedelta('NaT')])) assert_series_equal(s1 / 2.0, - Series([Timedelta('29 days 12:00:00'), - Timedelta('29 days 12:00:00'), - Timedelta('NaT')])) + Series([Timedelta('29 days 12:00:00'), Timedelta( + '29 days 12:00:00'), Timedelta('NaT')])) - for op in ['__add__','__sub__']: - sop = getattr(s1,op,None) + for op in ['__add__', '__sub__']: + sop = getattr(s1, op, None) if sop is not None: self.assertRaises(TypeError, sop, 1) self.assertRaises(TypeError, sop, s2.values) @@ -3743,11 +3834,11 @@ def test_timedelta64_conversions(self): s1[2] = np.nan for m in [1, 3, 10]: - for unit in ['D','h','m','s','ms','us','ns']: + for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']: # op - expected = s1.apply(lambda x: x / np.timedelta64(m,unit)) - result = s1 / np.timedelta64(m,unit) + expected = s1.apply(lambda x: x / np.timedelta64(m, unit)) + result = s1 / np.timedelta64(m, unit) assert_series_equal(result, expected) if m == 1 and unit != 'ns': @@ -3757,27 +3848,33 @@ def test_timedelta64_conversions(self): assert_series_equal(result, expected) # reverse op - expected = s1.apply(lambda x: Timedelta(np.timedelta64(m,unit)) / x) - result = np.timedelta64(m,unit) / s1 + expected = s1.apply( + lambda x: Timedelta(np.timedelta64(m, unit)) / x) + result = np.timedelta64(m, unit) / s1 # astype - s = Series(date_range('20130101',periods=3)) + s = Series(date_range('20130101', periods=3)) result = s.astype(object) - self.assertIsInstance(result.iloc[0],datetime) + self.assertIsInstance(result.iloc[0], datetime) self.assertTrue(result.dtype == np.object_) result = s1.astype(object) - self.assertIsInstance(result.iloc[0],timedelta) + self.assertIsInstance(result.iloc[0], timedelta) self.assertTrue(result.dtype == np.object_) def test_timedelta64_equal_timedelta_supported_ops(self): ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'), - Timestamp('20130228 22:00:00'), - Timestamp('20130228 21:00:00')]) + Timestamp('20130228 22:00:00'), Timestamp( + '20130228 21:00:00')]) intervals = 'D', 'h', 'm', 's', 'us' - npy16_mappings = {'D': 24 * 60 * 60 * 1000000, 'h': 60 * 60 * 1000000, - 'm': 60 * 1000000, 's': 1000000, 'us': 1} + + # TODO: unused + # npy16_mappings = {'D': 24 * 60 * 60 * 1000000, + # 'h': 60 * 60 * 1000000, + # 'm': 60 * 1000000, + # 's': 1000000, + # 'us': 1} def timedelta64(*args): return sum(starmap(np.timedelta64, zip(args, intervals))) @@ -3794,41 +3891,44 @@ def timedelta64(*args): assert_series_equal(lhs, rhs) except: raise AssertionError( - "invalid comparsion [op->{0},d->{1},h->{2},m->{3},s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, us, lhs, rhs)) + "invalid comparsion [op->{0},d->{1},h->{2},m->{3}," + "s->{4},us->{5}]\n{6}\n{7}\n".format(op, d, h, m, s, + us, lhs, rhs)) def test_timedelta_assignment(self): # GH 8209 s = Series([]) s.loc['B'] = timedelta(1) - tm.assert_series_equal(s,Series(Timedelta('1 days'),index=['B'])) + tm.assert_series_equal(s, Series(Timedelta('1 days'), index=['B'])) s = s.reindex(s.index.insert(0, 'A')) - tm.assert_series_equal(s,Series([np.nan,Timedelta('1 days')],index=['A','B'])) + tm.assert_series_equal(s, Series( + [np.nan, Timedelta('1 days')], index=['A', 'B'])) result = s.fillna(timedelta(1)) - expected = Series(Timedelta('1 days'),index=['A','B']) + expected = Series(Timedelta('1 days'), index=['A', 'B']) tm.assert_series_equal(result, expected) s.loc['A'] = timedelta(1) tm.assert_series_equal(s, expected) def test_operators_datetimelike(self): - def run_ops(ops, get_ser, test_ser): # check that we are getting a TypeError - # with 'operate' (from core/ops.py) for the ops that are not defined + # with 'operate' (from core/ops.py) for the ops that are not + # defined for op_str in ops: op = getattr(get_ser, op_str, None) with tm.assertRaisesRegexp(TypeError, 'operate'): op(test_ser) - ### timedelta64 ### - td1 = Series([timedelta(minutes=5,seconds=3)]*3) + # ## timedelta64 ### + td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan - td2 = timedelta(minutes=5,seconds=4) - ops = ['__mul__','__floordiv__','__pow__', - '__rmul__','__rfloordiv__','__rpow__'] + td2 = timedelta(minutes=5, seconds=4) + ops = ['__mul__', '__floordiv__', '__pow__', '__rmul__', + '__rfloordiv__', '__rpow__'] run_ops(ops, td1, td2) td1 + td2 td2 + td1 @@ -3837,12 +3937,12 @@ def run_ops(ops, get_ser, test_ser): td1 / td2 td2 / td1 - ### datetime64 ### - dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), - Timestamp('20120103')]) + # ## datetime64 ### + dt1 = Series([Timestamp('20111230'), Timestamp('20120101'), Timestamp( + '20120103')]) dt1.iloc[2] = np.nan - dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), - Timestamp('20120104')]) + dt2 = Series([Timestamp('20111231'), Timestamp('20120102'), Timestamp( + '20120104')]) ops = ['__add__', '__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__radd__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] @@ -3850,7 +3950,7 @@ def run_ops(ops, get_ser, test_ser): dt1 - dt2 dt2 - dt1 - ### datetime64 with timetimedelta ### + # ## datetime64 with timetimedelta ### ops = ['__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] @@ -3861,10 +3961,10 @@ def run_ops(ops, get_ser, test_ser): # TODO: Decide if this ought to work. # td1 - dt1 - ### timetimedelta with datetime64 ### + # ## timetimedelta with datetime64 ### ops = ['__sub__', '__mul__', '__floordiv__', '__truediv__', '__div__', - '__pow__', '__rmul__', '__rfloordiv__', - '__rtruediv__', '__rdiv__', '__rpow__'] + '__pow__', '__rmul__', '__rfloordiv__', '__rtruediv__', + '__rdiv__', '__rpow__'] run_ops(ops, td1, dt1) td1 + dt1 dt1 + td1 @@ -3874,56 +3974,68 @@ def run_ops(ops, get_ser, test_ser): ops = ['__mul__', '__floordiv__', '__truediv__', '__div__', '__pow__', '__rmul__', '__rfloordiv__', '__rtruediv__', '__rdiv__', '__rpow__'] - dt1 = Series(date_range('2000-01-01 09:00:00',periods=5,tz='US/Eastern'),name='foo') + dt1 = Series( + date_range('2000-01-01 09:00:00', periods=5, + tz='US/Eastern'), name='foo') dt2 = dt1.copy() dt2.iloc[2] = np.nan - td1 = Series(timedelta_range('1 days 1 min',periods=5, freq='H')) + td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H')) td2 = td1.copy() td2.iloc[1] = np.nan run_ops(ops, dt1, td1) result = dt1 + td1[0] - expected = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') + expected = ( + dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) result = dt2 + td2[0] - expected = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') + expected = ( + dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) # odd numpy behavior with scalar timedeltas if not _np_version_under1p8: result = td1[0] + dt1 - expected = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') + expected = ( + dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) result = td2[0] + dt2 - expected = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') + expected = ( + dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) result = dt1 - td1[0] - expected = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize('US/Eastern') + expected = ( + dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) self.assertRaises(TypeError, lambda: td1[0] - dt1) result = dt2 - td2[0] - expected = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize('US/Eastern') + expected = ( + dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) self.assertRaises(TypeError, lambda: td2[0] - dt2) result = dt1 + td1 - expected = (dt1.dt.tz_localize(None) + td1).dt.tz_localize('US/Eastern') + expected = ( + dt1.dt.tz_localize(None) + td1).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) result = dt2 + td2 - expected = (dt2.dt.tz_localize(None) + td2).dt.tz_localize('US/Eastern') + expected = ( + dt2.dt.tz_localize(None) + td2).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) result = dt1 - td1 - expected = (dt1.dt.tz_localize(None) - td1).dt.tz_localize('US/Eastern') + expected = ( + dt1.dt.tz_localize(None) - td1).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) result = dt2 - td2 - expected = (dt2.dt.tz_localize(None) - td2).dt.tz_localize('US/Eastern') + expected = ( + dt2.dt.tz_localize(None) - td2).dt.tz_localize('US/Eastern') assert_series_equal(result, expected) self.assertRaises(TypeError, lambda: td1 - dt1) @@ -3933,14 +4045,16 @@ def test_ops_nat(self): # GH 11349 timedelta_series = Series([NaT, Timedelta('1s')]) datetime_series = Series([NaT, Timestamp('19900315')]) - nat_series_dtype_timedelta = Series([NaT, NaT], dtype='timedelta64[ns]') + nat_series_dtype_timedelta = Series( + [NaT, NaT], dtype='timedelta64[ns]') nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]') single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]') single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]') # subtraction assert_series_equal(timedelta_series - NaT, nat_series_dtype_timedelta) - assert_series_equal(-NaT + timedelta_series, nat_series_dtype_timedelta) + assert_series_equal(-NaT + timedelta_series, + nat_series_dtype_timedelta) assert_series_equal(timedelta_series - single_nat_dtype_timedelta, nat_series_dtype_timedelta) @@ -3957,7 +4071,7 @@ def test_ops_nat(self): assert_series_equal(datetime_series - single_nat_dtype_timedelta, nat_series_dtype_timestamp) - assert_series_equal(-single_nat_dtype_timedelta + datetime_series , + assert_series_equal(-single_nat_dtype_timedelta + datetime_series, nat_series_dtype_timestamp) # without a Series wrapping the NaT, it is ambiguous @@ -3968,14 +4082,17 @@ def test_ops_nat(self): assert_series_equal(-NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) - assert_series_equal(nat_series_dtype_timestamp - single_nat_dtype_datetime, + assert_series_equal(nat_series_dtype_timestamp - + single_nat_dtype_datetime, nat_series_dtype_timedelta) with tm.assertRaises(TypeError): -single_nat_dtype_datetime + nat_series_dtype_timestamp - assert_series_equal(nat_series_dtype_timestamp - single_nat_dtype_timedelta, + assert_series_equal(nat_series_dtype_timestamp - + single_nat_dtype_timedelta, nat_series_dtype_timestamp) - assert_series_equal(-single_nat_dtype_timedelta + nat_series_dtype_timestamp, + assert_series_equal(-single_nat_dtype_timedelta + + nat_series_dtype_timestamp, nat_series_dtype_timestamp) with tm.assertRaises(TypeError): @@ -3987,9 +4104,11 @@ def test_ops_nat(self): assert_series_equal(NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) - assert_series_equal(nat_series_dtype_timestamp + single_nat_dtype_timedelta, + assert_series_equal(nat_series_dtype_timestamp + + single_nat_dtype_timedelta, nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timestamp, + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timestamp, nat_series_dtype_timestamp) assert_series_equal(nat_series_dtype_timedelta + NaT, @@ -3997,9 +4116,11 @@ def test_ops_nat(self): assert_series_equal(NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta) - assert_series_equal(nat_series_dtype_timedelta + single_nat_dtype_timedelta, + assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_timedelta, nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timedelta, + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timedelta, nat_series_dtype_timedelta) assert_series_equal(timedelta_series + NaT, nat_series_dtype_timedelta) @@ -4015,9 +4136,11 @@ def test_ops_nat(self): assert_series_equal(NaT + nat_series_dtype_timestamp, nat_series_dtype_timestamp) - assert_series_equal(nat_series_dtype_timestamp + single_nat_dtype_timedelta, + assert_series_equal(nat_series_dtype_timestamp + + single_nat_dtype_timedelta, nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timestamp, + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timestamp, nat_series_dtype_timestamp) assert_series_equal(nat_series_dtype_timedelta + NaT, @@ -4025,14 +4148,18 @@ def test_ops_nat(self): assert_series_equal(NaT + nat_series_dtype_timedelta, nat_series_dtype_timedelta) - assert_series_equal(nat_series_dtype_timedelta + single_nat_dtype_timedelta, + assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_timedelta, nat_series_dtype_timedelta) - assert_series_equal(single_nat_dtype_timedelta + nat_series_dtype_timedelta, + assert_series_equal(single_nat_dtype_timedelta + + nat_series_dtype_timedelta, nat_series_dtype_timedelta) - assert_series_equal(nat_series_dtype_timedelta + single_nat_dtype_datetime, + assert_series_equal(nat_series_dtype_timedelta + + single_nat_dtype_datetime, nat_series_dtype_timestamp) - assert_series_equal(single_nat_dtype_datetime + nat_series_dtype_timedelta, + assert_series_equal(single_nat_dtype_datetime + + nat_series_dtype_timedelta, nat_series_dtype_timestamp) # multiplication @@ -4066,14 +4193,12 @@ def test_ops_nat(self): Series([NaT, Timedelta('0.5s')])) assert_series_equal(timedelta_series / 2.0, Series([NaT, Timedelta('0.5s')])) - assert_series_equal(timedelta_series / nan, - nat_series_dtype_timedelta) + assert_series_equal(timedelta_series / nan, nat_series_dtype_timedelta) with tm.assertRaises(TypeError): nat_series_dtype_timestamp / 1.0 with tm.assertRaises(TypeError): nat_series_dtype_timestamp / 1 - def test_ops_datetimelike_align(self): # GH 7500 # datetimelike ops need to align @@ -4091,13 +4216,11 @@ def test_ops_datetimelike_align(self): assert_series_equal(result, expected) def test_timedelta64_functions(self): - - from datetime import timedelta from pandas import date_range # index min/max td = Series(date_range('2012-1-1', periods=3, freq='D')) - \ - Timestamp('20120101') + Timestamp('20120101') result = td.idxmin() self.assertEqual(result, 0) @@ -4121,7 +4244,7 @@ def test_timedelta64_functions(self): expected = Series(s2 - s1) # this fails as numpy returns timedelta64[us] - #result = np.abs(s1-s2) + # result = np.abs(s1-s2) # assert_frame_equal(result,expected) result = (s1 - s2).abs() @@ -4143,7 +4266,7 @@ def test_ops_consistency_on_empty(self): # float result = Series(dtype=float).sum() - self.assertEqual(result,0) + self.assertEqual(result, 0) result = Series(dtype=float).mean() self.assertTrue(isnull(result)) @@ -4162,37 +4285,38 @@ def test_ops_consistency_on_empty(self): self.assertTrue(result is pd.NaT) def test_timedelta_fillna(self): - #GH 3371 - s = Series([Timestamp('20130101'), Timestamp('20130101'), - Timestamp('20130102'), Timestamp('20130103 9:01:01')]) + # GH 3371 + s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( + '20130102'), Timestamp('20130103 9:01:01')]) td = s.diff() # reg fillna result = td.fillna(0) - expected = Series([timedelta(0), timedelta(0), timedelta(1), - timedelta(days=1, seconds=9*3600+60+1)]) + expected = Series([timedelta(0), timedelta(0), timedelta(1), timedelta( + days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) # interprested as seconds result = td.fillna(1) - expected = Series([timedelta(seconds=1), timedelta(0), - timedelta(1), timedelta(days=1, seconds=9*3600+60+1)]) + expected = Series([timedelta(seconds=1), timedelta(0), timedelta(1), + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) result = td.fillna(timedelta(days=1, seconds=1)) - expected = Series([timedelta(days=1, seconds=1), timedelta(0), - timedelta(1), timedelta(days=1, seconds=9*3600+60+1)]) + expected = Series([timedelta(days=1, seconds=1), timedelta( + 0), timedelta(1), timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) result = td.fillna(np.timedelta64(int(1e9))) expected = Series([timedelta(seconds=1), timedelta(0), timedelta(1), - timedelta(days=1, seconds=9*3600+60+1)]) + timedelta(days=1, seconds=9 * 3600 + 60 + 1)]) assert_series_equal(result, expected) from pandas import tslib result = td.fillna(tslib.NaT) expected = Series([tslib.NaT, timedelta(0), timedelta(1), - timedelta(days=1, seconds=9*3600+60+1)], dtype='m8[ns]') + timedelta(days=1, seconds=9 * 3600 + 60 + 1)], + dtype='m8[ns]') assert_series_equal(result, expected) # ffill @@ -4206,19 +4330,19 @@ def test_timedelta_fillna(self): td[2] = np.nan result = td.bfill() expected = td.fillna(0) - expected[2] = timedelta(days=1, seconds=9*3600+60+1) + expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) assert_series_equal(result, expected) def test_datetime64_fillna(self): - s = Series([Timestamp('20130101'), Timestamp('20130101'), - Timestamp('20130102'), Timestamp('20130103 9:01:01')]) + s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp( + '20130102'), Timestamp('20130103 9:01:01')]) s[2] = np.nan # reg fillna result = s.fillna(Timestamp('20130104')) - expected = Series([Timestamp('20130101'), Timestamp('20130101'), - Timestamp('20130104'), Timestamp('20130103 9:01:01')]) + expected = Series([Timestamp('20130101'), Timestamp( + '20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')]) assert_series_equal(result, expected) from pandas import tslib @@ -4228,59 +4352,62 @@ def test_datetime64_fillna(self): # ffill result = s.ffill() - expected = Series([Timestamp('20130101'), Timestamp('20130101'), - Timestamp('20130101'), Timestamp('20130103 9:01:01')]) + expected = Series([Timestamp('20130101'), Timestamp( + '20130101'), Timestamp('20130101'), Timestamp('20130103 9:01:01')]) assert_series_equal(result, expected) # bfill result = s.bfill() expected = Series([Timestamp('20130101'), Timestamp('20130101'), - Timestamp('20130103 9:01:01'), - Timestamp('20130103 9:01:01')]) + Timestamp('20130103 9:01:01'), Timestamp( + '20130103 9:01:01')]) assert_series_equal(result, expected) # GH 6587 # make sure that we are treating as integer when filling # this also tests inference of a datetime-like with NaT's s = Series([pd.NaT, pd.NaT, '2013-08-05 15:30:00.000001']) - expected = Series(['2013-08-05 15:30:00.000001', '2013-08-05 15:30:00.000001', '2013-08-05 15:30:00.000001'], dtype='M8[ns]') + expected = Series( + ['2013-08-05 15:30:00.000001', '2013-08-05 15:30:00.000001', + '2013-08-05 15:30:00.000001'], dtype='M8[ns]') result = s.fillna(method='backfill') assert_series_equal(result, expected) def test_datetime64_tz_fillna(self): for tz in ['US/Eastern', 'Asia/Tokyo']: # DatetimeBlock - s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, - Timestamp('2011-01-03 10:00'), pd.NaT]) + s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp( + '2011-01-03 10:00'), pd.NaT]) result = s.fillna(pd.Timestamp('2011-01-02 10:00')) - expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'), - Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( + '2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp( + '2011-01-02 10:00')]) self.assert_series_equal(expected, result) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) - expected = Series([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz=tz), - Timestamp('2011-01-03 10:00'), - Timestamp('2011-01-02 10:00', tz=tz)]) + expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( + '2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-02 10:00', tz=tz)]) self.assert_series_equal(expected, result) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00'), 'AAA', - Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object) + Timestamp('2011-01-03 10:00'), 'AAA'], + dtype=object) self.assert_series_equal(expected, result) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) - expected = Series([Timestamp('2011-01-01 10:00'), - Timestamp('2011-01-02 10:00', tz=tz), - Timestamp('2011-01-03 10:00'), - Timestamp('2011-01-04 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( + '2011-01-02 10:00', tz=tz), Timestamp('2011-01-03 10:00'), + Timestamp('2011-01-04 10:00')]) self.assert_series_equal(expected, result) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'), 3: pd.Timestamp('2011-01-04 10:00')}) - expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00'), - Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00'), Timestamp( + '2011-01-02 10:00'), Timestamp('2011-01-03 10:00'), Timestamp( + '2011-01-04 10:00')]) self.assert_series_equal(expected, result) # DatetimeBlockTZ @@ -4288,10 +4415,9 @@ def test_datetime64_tz_fillna(self): '2011-01-03 10:00', pd.NaT], tz=tz) s = pd.Series(idx) result = s.fillna(pd.Timestamp('2011-01-02 10:00')) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), - Timestamp('2011-01-02 10:00'), - Timestamp('2011-01-03 10:00', tz=tz), - Timestamp('2011-01-02 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( + '2011-01-02 10:00'), Timestamp('2011-01-03 10:00', tz=tz), + Timestamp('2011-01-02 10:00')]) self.assert_series_equal(expected, result) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) @@ -4301,7 +4427,8 @@ def test_datetime64_tz_fillna(self): expected = Series(idx) self.assert_series_equal(expected, result) - result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime()) + result = s.fillna(pd.Timestamp( + '2011-01-02 10:00', tz=tz).to_pydatetime()) idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', '2011-01-03 10:00', '2011-01-02 10:00'], tz=tz) @@ -4316,33 +4443,31 @@ def test_datetime64_tz_fillna(self): result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), - Timestamp('2011-01-02 10:00', tz=tz), - Timestamp('2011-01-03 10:00', tz=tz), - Timestamp('2011-01-04 10:00')]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( + '2011-01-02 10:00', tz=tz), Timestamp( + '2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')]) self.assert_series_equal(expected, result) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00', tz=tz)}) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), - Timestamp('2011-01-02 10:00', tz=tz), - Timestamp('2011-01-03 10:00', tz=tz), - Timestamp('2011-01-04 10:00', tz=tz)]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( + '2011-01-02 10:00', tz=tz), Timestamp( + '2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00', + tz=tz)]) self.assert_series_equal(expected, result) # filling with a naive/other zone, coerce to object result = s.fillna(Timestamp('20130101')) - expected = Series([Timestamp('2011-01-01 10:00', tz=tz), - Timestamp('2013-01-01'), - Timestamp('2011-01-03 10:00', tz=tz), - Timestamp('2013-01-01')]) + expected = Series([Timestamp('2011-01-01 10:00', tz=tz), Timestamp( + '2013-01-01'), Timestamp('2011-01-03 10:00', tz=tz), Timestamp( + '2013-01-01')]) self.assert_series_equal(expected, result) - result = s.fillna(Timestamp('20130101',tz='US/Pacific')) + result = s.fillna(Timestamp('20130101', tz='US/Pacific')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), - Timestamp('2013-01-01',tz='US/Pacific'), + Timestamp('2013-01-01', tz='US/Pacific'), Timestamp('2011-01-03 10:00', tz=tz), - Timestamp('2013-01-01',tz='US/Pacific')]) + Timestamp('2013-01-01', tz='US/Pacific')]) self.assert_series_equal(expected, result) def test_fillna_int(self): @@ -4370,7 +4495,6 @@ def test_isnull_for_inf(self): tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) - # TimeSeries-specific def test_fillna(self): @@ -4395,39 +4519,39 @@ def test_fillna(self): s2 = Series([1]) result = s1.fillna(s2) expected = Series([1.]) - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = s1.fillna({}) - assert_series_equal(result,s1) + assert_series_equal(result, s1) result = s1.fillna(Series(())) - assert_series_equal(result,s1) + assert_series_equal(result, s1) result = s2.fillna(s1) - assert_series_equal(result,s2) - result = s1.fillna({ 0 : 1}) - assert_series_equal(result,expected) - result = s1.fillna({ 1 : 1}) - assert_series_equal(result,Series([np.nan])) - result = s1.fillna({ 0 : 1, 1 : 1}) - assert_series_equal(result,expected) - result = s1.fillna(Series({ 0 : 1, 1 : 1})) - assert_series_equal(result,expected) - result = s1.fillna(Series({ 0 : 1, 1 : 1},index=[4,5])) - assert_series_equal(result,s1) + assert_series_equal(result, s2) + result = s1.fillna({0: 1}) + assert_series_equal(result, expected) + result = s1.fillna({1: 1}) + assert_series_equal(result, Series([np.nan])) + result = s1.fillna({0: 1, 1: 1}) + assert_series_equal(result, expected) + result = s1.fillna(Series({0: 1, 1: 1})) + assert_series_equal(result, expected) + result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5])) + assert_series_equal(result, s1) s1 = Series([0, 1, 2], list('abc')) s2 = Series([0, np.nan, 2], list('bac')) result = s2.fillna(s1) - expected = Series([0,0,2.], list('bac')) - assert_series_equal(result,expected) + expected = Series([0, 0, 2.], list('bac')) + assert_series_equal(result, expected) # limit - s = Series(np.nan,index=[0,1,2]) - result = s.fillna(999,limit=1) - expected = Series([999,np.nan,np.nan],index=[0,1,2]) - assert_series_equal(result,expected) + s = Series(np.nan, index=[0, 1, 2]) + result = s.fillna(999, limit=1) + expected = Series([999, np.nan, np.nan], index=[0, 1, 2]) + assert_series_equal(result, expected) - result = s.fillna(999,limit=2) - expected = Series([999,999,np.nan],index=[0,1,2]) - assert_series_equal(result,expected) + result = s.fillna(999, limit=2) + expected = Series([999, 999, np.nan], index=[0, 1, 2]) + assert_series_equal(result, expected) # GH 9043 # make sure a string representation of int/float values can be filled @@ -4502,8 +4626,8 @@ def test_datetime64_with_index(self): result = s - s.index.to_period() assert_series_equal(result, expected) - df = DataFrame(np.random.randn(5,2), - index=date_range('20130101', periods=5)) + df = DataFrame(np.random.randn(5, 2), + index=date_range('20130101', periods=5)) df['date'] = Timestamp('20130102') df['expected'] = df['date'] - df.index.to_series() df['result'] = df['date'] - df.index @@ -4536,16 +4660,17 @@ def test_timedelta64_nan(self): # boolean setting # this doesn't work, not sure numpy even supports it - #result = td[(td>np.timedelta64(timedelta(days=3))) & (tdnp.timedelta64(timedelta(days=3))) & + # td= -0.5) & (self.ts <= 0.5) - # assert_series_equal(selector, expected) + # def test_logical_range_select(self): + # np.random.seed(12345) + # selector = -0.5 <= self.ts <= 0.5 + # expected = (self.ts >= -0.5) & (self.ts <= 0.5) + # assert_series_equal(selector, expected) def test_operators_na_handling(self): from decimal import Decimal @@ -4585,35 +4710,35 @@ def test_object_comparisons(self): def test_comparison_tuples(self): # GH11339 # comparisons vs tuple - s = Series([(1,1),(1,2)]) + s = Series([(1, 1), (1, 2)]) - result = s == (1,2) - expected = Series([False,True]) + result = s == (1, 2) + expected = Series([False, True]) assert_series_equal(result, expected) - result = s != (1,2) + result = s != (1, 2) expected = Series([True, False]) assert_series_equal(result, expected) - result = s == (0,0) + result = s == (0, 0) expected = Series([False, False]) assert_series_equal(result, expected) - result = s != (0,0) + result = s != (0, 0) expected = Series([True, True]) assert_series_equal(result, expected) - s = Series([(1,1),(1,1)]) + s = Series([(1, 1), (1, 1)]) - result = s == (1,1) + result = s == (1, 1) expected = Series([True, True]) assert_series_equal(result, expected) - result = s != (1,1) + result = s != (1, 1) expected = Series([False, False]) assert_series_equal(result, expected) - s = Series([frozenset([1]),frozenset([1,2])]) + s = Series([frozenset([1]), frozenset([1, 2])]) result = s == frozenset([1]) expected = Series([True, False]) @@ -4645,7 +4770,7 @@ def test_comparison_operators_with_nas(self): # expected = f(val, s.dropna()).reindex(s.index) # assert_series_equal(result, expected) - # boolean &, |, ^ should work with object arrays and propagate NAs + # boolean &, |, ^ should work with object arrays and propagate NAs ops = ['and_', 'or_', 'xor'] mask = s.isnull() @@ -4679,13 +4804,13 @@ def test_comparison_invalid(self): s = Series(range(5)) s2 = Series(date_range('20010101', periods=5)) - for (x, y) in [(s,s2),(s2,s)]: - self.assertRaises(TypeError, lambda : x == y) - self.assertRaises(TypeError, lambda : x != y) - self.assertRaises(TypeError, lambda : x >= y) - self.assertRaises(TypeError, lambda : x > y) - self.assertRaises(TypeError, lambda : x < y) - self.assertRaises(TypeError, lambda : x <= y) + for (x, y) in [(s, s2), (s2, s)]: + self.assertRaises(TypeError, lambda: x == y) + self.assertRaises(TypeError, lambda: x != y) + self.assertRaises(TypeError, lambda: x >= y) + self.assertRaises(TypeError, lambda: x > y) + self.assertRaises(TypeError, lambda: x < y) + self.assertRaises(TypeError, lambda: x <= y) def test_more_na_comparisons(self): left = Series(['a', np.nan, 'c']) @@ -4726,15 +4851,15 @@ def test_comparison_label_based(self): expected = Series([True, False, False], list('bca')) result = a & b - assert_series_equal(result,expected) + assert_series_equal(result, expected) expected = Series([True, False, True], list('bca')) result = a | b - assert_series_equal(result,expected) + assert_series_equal(result, expected) expected = Series([False, False, True], list('bca')) result = a ^ b - assert_series_equal(result,expected) + assert_series_equal(result, expected) # rhs is bigger a = Series([True, False, True], list('bca')) @@ -4742,66 +4867,67 @@ def test_comparison_label_based(self): expected = Series([True, False, False], list('bca')) result = a & b - assert_series_equal(result,expected) + assert_series_equal(result, expected) expected = Series([True, False, True], list('bca')) result = a | b - assert_series_equal(result,expected) + assert_series_equal(result, expected) # filling # vs empty result = a & Series([]) expected = Series([False, False, False], list('bca')) - assert_series_equal(result,expected) + assert_series_equal(result, expected) result = a | Series([]) expected = Series([True, False, True], list('bca')) - assert_series_equal(result,expected) + assert_series_equal(result, expected) # vs non-matching - result = a & Series([1],['z']) + result = a & Series([1], ['z']) expected = Series([False, False, False], list('bca')) - assert_series_equal(result,expected) + assert_series_equal(result, expected) - result = a | Series([1],['z']) + result = a | Series([1], ['z']) expected = Series([True, False, True], list('bca')) - assert_series_equal(result,expected) + assert_series_equal(result, expected) # identity # we would like s[s|e] == s to hold for any e, whether empty or not - for e in [Series([]),Series([1],['z']),Series(['z']),Series(np.nan,b.index),Series(np.nan,a.index)]: + for e in [Series([]), Series([1], ['z']), Series(['z']), + Series(np.nan, b.index), Series(np.nan, a.index)]: result = a[a | e] - assert_series_equal(result,a[a]) + assert_series_equal(result, a[a]) # vs scalars index = list('bca') - t = Series([True,False,True]) - - for v in [True,1,2]: - result = Series([True,False,True],index=index) | v - expected = Series([True,True,True],index=index) - assert_series_equal(result,expected) - - for v in [np.nan,'foo']: - self.assertRaises(TypeError, lambda : t | v) - - for v in [False,0]: - result = Series([True,False,True],index=index) | v - expected = Series([True,False,True],index=index) - assert_series_equal(result,expected) - - for v in [True,1]: - result = Series([True,False,True],index=index) & v - expected = Series([True,False,True],index=index) - assert_series_equal(result,expected) - - for v in [False,0]: - result = Series([True,False,True],index=index) & v - expected = Series([False,False,False],index=index) - assert_series_equal(result,expected) + t = Series([True, False, True]) + + for v in [True, 1, 2]: + result = Series([True, False, True], index=index) | v + expected = Series([True, True, True], index=index) + assert_series_equal(result, expected) + + for v in [np.nan, 'foo']: + self.assertRaises(TypeError, lambda: t | v) + + for v in [False, 0]: + result = Series([True, False, True], index=index) | v + expected = Series([True, False, True], index=index) + assert_series_equal(result, expected) + + for v in [True, 1]: + result = Series([True, False, True], index=index) & v + expected = Series([True, False, True], index=index) + assert_series_equal(result, expected) + + for v in [False, 0]: + result = Series([True, False, True], index=index) & v + expected = Series([False, False, False], index=index) + assert_series_equal(result, expected) for v in [np.nan]: - self.assertRaises(TypeError, lambda : t & v) + self.assertRaises(TypeError, lambda: t & v) def test_operators_bitwise(self): # GH 9016: support bitwise op for integer types @@ -4811,8 +4937,11 @@ def test_operators_bitwise(self): s_fff = Series([False, False, False], index=index) s_tff = Series([True, False, False], index=index) s_empty = Series([]) - s_0101 = Series([0,1,0,1]) - s_0123 = Series(range(4),dtype='int64') + + # TODO: unused + # s_0101 = Series([0, 1, 0, 1]) + + s_0123 = Series(range(4), dtype='int64') s_3333 = Series([3] * 4) s_4444 = Series([4] * 4) @@ -4825,11 +4954,11 @@ def test_operators_bitwise(self): assert_series_equal(res, expected) res = s_0123 & s_3333 - expected = Series(range(4),dtype='int64') + expected = Series(range(4), dtype='int64') assert_series_equal(res, expected) res = s_0123 | s_4444 - expected = Series(range(4, 8),dtype='int64') + expected = Series(range(4, 8), dtype='int64') assert_series_equal(res, expected) s_a0b1c0 = Series([1], list('b')) @@ -4860,7 +4989,7 @@ def test_operators_bitwise(self): expected = Series([0, 1, 0, 1]) assert_series_equal(res, expected) - s_1111 = Series([1]*4, dtype='int8') + s_1111 = Series([1] * 4, dtype='int8') res = s_0123 & s_1111 expected = Series([0, 1, 0, 1], dtype='int64') assert_series_equal(res, expected) @@ -4870,7 +4999,7 @@ def test_operators_bitwise(self): assert_series_equal(res, expected) self.assertRaises(TypeError, lambda: s_1111 & 'a') - self.assertRaises(TypeError, lambda: s_1111 & ['a','b','c','d']) + self.assertRaises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) self.assertRaises(TypeError, lambda: s_0123 & np.NaN) self.assertRaises(TypeError, lambda: s_0123 & 3.14) self.assertRaises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) @@ -4883,12 +5012,13 @@ def test_operators_bitwise(self): assert_series_equal(s_0123 ^ False, Series([False, True, True, True])) assert_series_equal(s_0123 & [False], Series([False] * 4)) assert_series_equal(s_0123 & (False), Series([False] * 4)) - assert_series_equal(s_0123 & Series([False, np.NaN, False, False]), Series([False] * 4)) + assert_series_equal(s_0123 & Series([False, np.NaN, False, False]), + Series([False] * 4)) s_ftft = Series([False, True, False, True]) assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft) - s_abNd = Series(['a','b',np.NaN,'d']) + s_abNd = Series(['a', 'b', np.NaN, 'd']) res = s_0123 & s_abNd expected = s_ftft assert_series_equal(res, expected) @@ -4918,7 +5048,8 @@ def test_setitem_na(self): s[::2] = np.nan assert_series_equal(s, expected) - expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, 9]) + expected = Series([np.nan, np.nan, np.nan, np.nan, np.nan, 5, 6, 7, 8, + 9]) s = Series(np.arange(10)) s[:5] = np.nan assert_series_equal(s, expected) @@ -4934,7 +5065,7 @@ def tester(a, b): s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) s[::2] = np.nan - expected = Series(True,index=s.index) + expected = Series(True, index=s.index) expected[::2] = False assert_series_equal(tester(s, list(s)), expected) @@ -5007,7 +5138,7 @@ def test_idxmax(self): # Float64Index # GH 5914 - s = pd.Series([1,2,3],[1.1,2.1,3.1]) + s = pd.Series([1, 2, 3], [1.1, 2.1, 3.1]) result = s.idxmax() self.assertEqual(result, 3.1) result = s.idxmin() @@ -5027,9 +5158,10 @@ def test_ndarray_compat(self): def f(x): return x[x.argmax()] + result = tsdf.apply(f) expected = tsdf.max() - assert_series_equal(result,expected) + assert_series_equal(result, expected) # .item() s = Series([1]) @@ -5040,12 +5172,12 @@ def f(x): # using an ndarray like function s = Series(np.random.randn(10)) result = np.ones_like(s) - expected = Series(1,index=range(10),dtype='float64') - #assert_series_equal(result,expected) + expected = Series(1, index=range(10), dtype='float64') + # assert_series_equal(result,expected) # ravel s = Series(np.random.randn(10)) - tm.assert_almost_equal(s.ravel(order='F'),s.values.ravel(order='F')) + tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F')) # compress # GH 6658 @@ -5072,47 +5204,53 @@ def test_complexx(self): # GH4819 # complex access for ndarray compat a = np.arange(5) - b = Series(a + 4j*a) - tm.assert_almost_equal(a,b.real) - tm.assert_almost_equal(4*a,b.imag) + b = Series(a + 4j * a) + tm.assert_almost_equal(a, b.real) + tm.assert_almost_equal(4 * a, b.imag) - b.real = np.arange(5)+5 - tm.assert_almost_equal(a+5,b.real) - tm.assert_almost_equal(4*a,b.imag) + b.real = np.arange(5) + 5 + tm.assert_almost_equal(a + 5, b.real) + tm.assert_almost_equal(4 * a, b.imag) def test_underlying_data_conversion(self): # GH 4080 - df = DataFrame(dict((c, [1,2,3]) for c in ['a', 'b', 'c'])) + df = DataFrame(dict((c, [1, 2, 3]) for c in ['a', 'b', 'c'])) df.set_index(['a', 'b', 'c'], inplace=True) - s = Series([1], index=[(2,2,2)]) + s = Series([1], index=[(2, 2, 2)]) df['val'] = 0 df df['val'].update(s) - expected = DataFrame(dict(a = [1,2,3], b = [1,2,3], c = [1,2,3], val = [0,1,0])) + expected = DataFrame( + dict(a=[1, 2, 3], b=[1, 2, 3], c=[1, 2, 3], val=[0, 1, 0])) expected.set_index(['a', 'b', 'c'], inplace=True) - tm.assert_frame_equal(df,expected) + tm.assert_frame_equal(df, expected) # GH 3970 # these are chained assignments as well - pd.set_option('chained_assignment',None) - df = DataFrame({ "aa":range(5), "bb":[2.2]*5}) + pd.set_option('chained_assignment', None) + df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) df["cc"] = 0.0 - ck = [True]*len(df) + + ck = [True] * len(df) + df["bb"].iloc[0] = .13 - df_tmp = df.iloc[ck] + + # TODO: unused + df_tmp = df.iloc[ck] # noqa + df["bb"].iloc[0] = .15 self.assertEqual(df['bb'].iloc[0], 0.15) - pd.set_option('chained_assignment','raise') + pd.set_option('chained_assignment', 'raise') # GH 3217 - df = DataFrame(dict(a = [1,3], b = [np.nan, 2])) + df = DataFrame(dict(a=[1, 3], b=[np.nan, 2])) df['c'] = np.nan - df['c'].update(pd.Series(['foo'],index=[0])) + df['c'].update(pd.Series(['foo'], index=[0])) - expected = DataFrame(dict(a = [1,3], b = [np.nan, 2], c = ['foo',np.nan])) - tm.assert_frame_equal(df,expected) + expected = DataFrame(dict(a=[1, 3], b=[np.nan, 2], c=['foo', np.nan])) + tm.assert_frame_equal(df, expected) def test_operators_corner(self): series = self.ts @@ -5139,8 +5277,7 @@ def test_operators_corner(self): def test_operators_reverse_object(self): # GH 56 - arr = Series(np.random.randn(10), index=np.arange(10), - dtype=object) + arr = Series(np.random.randn(10), index=np.arange(10), dtype=object) def _check_op(arr, op): result = op(1., arr) @@ -5223,7 +5360,8 @@ def _check_fill(meth, op, a, b, fill_value=0): if compat.PY3: pairings.append((Series.div, operator.truediv, 1)) - pairings.append((Series.rdiv, lambda x, y: operator.truediv(y, x), 1)) + pairings.append((Series.rdiv, lambda x, y: operator.truediv(y, x), + 1)) else: pairings.append((Series.div, operator.div, 1)) pairings.append((Series.rdiv, lambda x, y: operator.div(y, x), 1)) @@ -5340,12 +5478,12 @@ def test_corr_rank(self): "{0}".format(scipy.__version__)) # results from R - A = Series([-0.89926396, 0.94209606, -1.03289164, -0.95445587, - 0.76910310, -0.06430576, -2.09704447, 0.40660407, - -0.89926396, 0.94209606]) - B = Series([-1.01270225, -0.62210117, -1.56895827, 0.59592943, - -0.01680292, 1.17258718, -1.06009347, -0.10222060, - -0.89076239, 0.89372375]) + A = Series( + [-0.89926396, 0.94209606, -1.03289164, -0.95445587, 0.76910310, - + 0.06430576, -2.09704447, 0.40660407, -0.89926396, 0.94209606]) + B = Series( + [-1.01270225, -0.62210117, -1.56895827, 0.59592943, -0.01680292, + 1.17258718, -1.06009347, -0.10222060, -0.89076239, 0.89372375]) kexp = 0.4319297 sexp = 0.5853767 self.assertAlmostEqual(A.corr(B, method='kendall'), kexp) @@ -5356,8 +5494,8 @@ def test_cov(self): self.assertAlmostEqual(self.ts.cov(self.ts), self.ts.std() ** 2) # partial overlap - self.assertAlmostEqual( - self.ts[:15].cov(self.ts[5:]), self.ts[5:15].std() ** 2) + self.assertAlmostEqual(self.ts[:15].cov(self.ts[5:]), + self.ts[5:15].std() ** 2) # No overlap self.assertTrue(np.isnan(self.ts[::2].cov(self.ts[1::2]))) @@ -5377,7 +5515,7 @@ def test_cov(self): def test_copy(self): for deep in [None, False, True]: - s = Series(np.arange(10),dtype='float64') + s = Series(np.arange(10), dtype='float64') # default deep is True if deep is None: @@ -5443,8 +5581,9 @@ def test_dtype(self): self.assertEqual(self.ts.dtypes, np.dtype('float64')) self.assertEqual(self.ts.ftype, 'float64:dense') self.assertEqual(self.ts.ftypes, 'float64:dense') - assert_series_equal(self.ts.get_dtype_counts(),Series(1,['float64'])) - assert_series_equal(self.ts.get_ftype_counts(),Series(1,['float64:dense'])) + assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64'])) + assert_series_equal(self.ts.get_ftype_counts(), Series( + 1, ['float64:dense'])) def test_dot(self): a = Series(np.random.randn(4), index=['p', 'q', 'r', 's']) @@ -5452,8 +5591,7 @@ def test_dot(self): columns=['p', 'q', 'r', 's']).T result = a.dot(b) - expected = Series(np.dot(a.values, b.values), - index=['1', '2', '3']) + expected = Series(np.dot(a.values, b.values), index=['1', '2', '3']) assert_series_equal(result, expected) # Check index alignment @@ -5478,7 +5616,7 @@ def test_value_counts_nunique(self): # basics.rst doc example series = Series(np.random.randn(500)) series[20:500] = np.nan - series[10:20] = 5000 + series[10:20] = 5000 result = series.nunique() self.assertEqual(result, 11) @@ -5518,8 +5656,8 @@ def test_dropna_empty(self): def test_datetime64_tz_dropna(self): # DatetimeBlock - s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, - Timestamp('2011-01-03 10:00'), pd.NaT]) + s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp( + '2011-01-03 10:00'), pd.NaT]) result = s.dropna() expected = Series([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-03 10:00')], index=[0, 2]) @@ -5539,8 +5677,8 @@ def test_datetime64_tz_dropna(self): self.assert_series_equal(result, expected) def test_dropna_no_nan(self): - for s in [Series([1, 2, 3], name='x'), - Series([False, True, False], name='x')]: + for s in [Series([1, 2, 3], name='x'), Series( + [False, True, False], name='x')]: result = s.dropna() self.assert_series_equal(result, s) @@ -5578,7 +5716,8 @@ def test_drop_duplicates(self): with tm.assert_produces_warning(FutureWarning): assert_series_equal(s.duplicated(take_last=True), expected) with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.drop_duplicates(take_last=True), s[~expected]) + assert_series_equal( + s.drop_duplicates(take_last=True), s[~expected]) sc = s.copy() with tm.assert_produces_warning(FutureWarning): sc.drop_duplicates(take_last=True, inplace=True) @@ -5611,7 +5750,8 @@ def test_drop_duplicates(self): with tm.assert_produces_warning(FutureWarning): assert_series_equal(s.duplicated(take_last=True), expected) with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.drop_duplicates(take_last=True), s[~expected]) + assert_series_equal( + s.drop_duplicates(take_last=True), s[~expected]) sc = s.copy() with tm.assert_produces_warning(FutureWarning): sc.drop_duplicates(take_last=True, inplace=True) @@ -5637,15 +5777,17 @@ def test_sort_values(self): ts.sort_values(ascending=False, inplace=True) self.assert_numpy_array_equal(ts, self.ts.sort_values(ascending=False)) - self.assert_numpy_array_equal(ts.index, - self.ts.sort_values(ascending=False).index) + self.assert_numpy_array_equal(ts.index, self.ts.sort_values( + ascending=False).index) # GH 5856/5853 # Series.sort_values operating on a view - df = DataFrame(np.random.randn(10,4)) - s = df.iloc[:,0] + df = DataFrame(np.random.randn(10, 4)) + s = df.iloc[:, 0] + def f(): s.sort_values(inplace=True) + self.assertRaises(ValueError, f) # test order/sort inplace @@ -5654,13 +5796,13 @@ def f(): ts1.sort_values(ascending=False, inplace=True) ts2 = self.ts.copy() ts2.sort_values(ascending=False, inplace=True) - assert_series_equal(ts1,ts2) + assert_series_equal(ts1, ts2) ts1 = self.ts.copy() ts1 = ts1.sort_values(ascending=False, inplace=False) ts2 = self.ts.copy() ts2 = ts.sort_values(ascending=False) - assert_series_equal(ts1,ts2) + assert_series_equal(ts1, ts2) def test_sort_index(self): rindex = list(self.ts.index) @@ -5686,8 +5828,7 @@ def test_sort_index_inplace(self): result = random_order.sort_index(ascending=False, inplace=True) self.assertIs(result, None, msg='sort_index() inplace should return None') - assert_series_equal(random_order, - self.ts.reindex(self.ts.index[::-1])) + assert_series_equal(random_order, self.ts.reindex(self.ts.index[::-1])) # ascending random_order = self.ts.reindex(rindex) @@ -5720,12 +5861,13 @@ def test_sort_API(self): sorted_series = random_order.sort_index(axis=0) assert_series_equal(sorted_series, self.ts) - self.assertRaises(ValueError, lambda : random_order.sort_values(axis=1)) + self.assertRaises(ValueError, lambda: random_order.sort_values(axis=1)) sorted_series = random_order.sort_index(level=0, axis=0) assert_series_equal(sorted_series, self.ts) - self.assertRaises(ValueError, lambda : random_order.sort_index(level=0, axis=1)) + self.assertRaises(ValueError, + lambda: random_order.sort_index(level=0, axis=1)) def test_order(self): @@ -5801,13 +5943,15 @@ def test_nsmallest_nlargest(self): assert_series_equal(s.nsmallest(2, keep='last'), s.iloc[[2, 3]]) with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.nsmallest(2, take_last=True), s.iloc[[2, 3]]) + assert_series_equal( + s.nsmallest(2, take_last=True), s.iloc[[2, 3]]) assert_series_equal(s.nlargest(3), s.iloc[[4, 0, 1]]) assert_series_equal(s.nlargest(3, keep='last'), s.iloc[[4, 0, 3]]) with tm.assert_produces_warning(FutureWarning): - assert_series_equal(s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]]) + assert_series_equal( + s.nlargest(3, take_last=True), s.iloc[[4, 0, 3]]) empty = s.iloc[0:0] assert_series_equal(s.nsmallest(0), empty) @@ -5847,7 +5991,7 @@ def test_rank(self): filled = self.ts.fillna(np.inf) # rankdata returns a ndarray - exp = Series(rankdata(filled),index=filled.index) + exp = Series(rankdata(filled), index=filled.index) exp[mask] = np.nan assert_almost_equal(ranks, exp) @@ -5870,7 +6014,7 @@ def test_rank(self): iseries[1] = np.nan exp = Series(np.repeat(50.0 / 99.0, 100)) - exp[1] = np.nan + exp[1] = np.nan iranks = iseries.rank(pct=True) assert_series_equal(iranks, exp) @@ -5898,12 +6042,14 @@ def test_rank(self): iranks = iseries.rank(pct=True) assert_series_equal(iranks, exp) - iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20+1e-30, 1e-1]) + iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1]) exp = Series([2, 1, 3, 5, 4, 6.0]) iranks = iseries.rank() assert_series_equal(iranks, exp) - values = np.array([-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40], dtype='float64') + values = np.array( + [-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40 + ], dtype='float64') random_order = np.random.permutation(len(values)) iseries = Series(values[random_order]) exp = Series(random_order + 1.0, dtype='float64') @@ -5911,16 +6057,18 @@ def test_rank(self): assert_series_equal(iranks, exp) def test_rank_inf(self): - raise nose.SkipTest('DataFrame.rank does not currently rank np.inf and -np.inf properly') + raise nose.SkipTest('DataFrame.rank does not currently rank ' + 'np.inf and -np.inf properly') - values = np.array([-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40, np.inf], dtype='float64') + values = np.array( + [-np.inf, -50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, + 2, 40, np.inf], dtype='float64') random_order = np.random.permutation(len(values)) iseries = Series(values[random_order]) exp = Series(random_order + 1.0, dtype='float64') iranks = iseries.rank() assert_series_equal(iranks, exp) - def test_from_csv(self): with ensure_clean() as path: @@ -5951,8 +6099,8 @@ def test_from_csv(self): outfile.write('1998-01-01|1.0\n1999-01-01|2.0') outfile.close() series = Series.from_csv(path, sep='|') - checkseries = Series( - {datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}) + checkseries = Series({datetime(1998, 1, 1): 1.0, + datetime(1999, 1, 1): 2.0}) assert_series_equal(checkseries, series) series = Series.from_csv(path, sep='|', parse_dates=False) @@ -5966,7 +6114,7 @@ def test_to_csv(self): self.ts.to_csv(path) lines = io.open(path, newline=None).readlines() - assert(lines[1] != '\n') + assert (lines[1] != '\n') self.ts.to_csv(path, index=False) arr = np.loadtxt(path) @@ -6005,7 +6153,8 @@ def test_to_frame(self): assert_frame_equal(rs, xp) rs = self.ts.to_frame(name='testdifferent') - xp = pd.DataFrame(dict(testdifferent=self.ts.values), index=self.ts.index) + xp = pd.DataFrame( + dict(testdifferent=self.ts.values), index=self.ts.index) assert_frame_equal(rs, xp) def test_to_dict(self): @@ -6066,9 +6215,9 @@ def test_clip(self): def test_clip_types_and_nulls(self): - sers = [Series([np.nan, 1.0, 2.0, 3.0]), - Series([None, 'a', 'b', 'c']), - Series(pd.to_datetime([np.nan, 1, 2, 3], unit='D'))] + sers = [Series([np.nan, 1.0, 2.0, 3.0]), Series([None, 'a', 'b', 'c']), + Series(pd.to_datetime( + [np.nan, 1, 2, 3], unit='D'))] for s in sers: thresh = s[2] @@ -6093,22 +6242,25 @@ def test_clip_against_series(self): assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5])) assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5])) - def test_clip_with_datetimes(self): # GH 11838 # naive and tz-aware datetimes t = Timestamp('2015-12-01 09:30:30') - s = Series([ Timestamp('2015-12-01 09:30:00'), Timestamp('2015-12-01 09:31:00') ]) + s = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( + '2015-12-01 09:31:00')]) result = s.clip(upper=t) - expected = Series([ Timestamp('2015-12-01 09:30:00'), Timestamp('2015-12-01 09:30:30') ]) + expected = Series([Timestamp('2015-12-01 09:30:00'), Timestamp( + '2015-12-01 09:30:30')]) assert_series_equal(result, expected) t = Timestamp('2015-12-01 09:30:30', tz='US/Eastern') - s = Series([ Timestamp('2015-12-01 09:30:00', tz='US/Eastern'), Timestamp('2015-12-01 09:31:00', tz='US/Eastern') ]) + s = Series([Timestamp('2015-12-01 09:30:00', tz='US/Eastern'), + Timestamp('2015-12-01 09:31:00', tz='US/Eastern')]) result = s.clip(upper=t) - expected = Series([ Timestamp('2015-12-01 09:30:00', tz='US/Eastern'), Timestamp('2015-12-01 09:30:30', tz='US/Eastern') ]) + expected = Series([Timestamp('2015-12-01 09:30:00', tz='US/Eastern'), + Timestamp('2015-12-01 09:30:30', tz='US/Eastern')]) assert_series_equal(result, expected) def test_valid(self): @@ -6122,15 +6274,15 @@ def test_valid(self): def test_isnull(self): ser = Series([0, 5.4, 3, nan, -0.001]) - np.array_equal( - ser.isnull(), Series([False, False, False, True, False]).values) + np.array_equal(ser.isnull(), + Series([False, False, False, True, False]).values) ser = Series(["hi", "", nan]) np.array_equal(ser.isnull(), Series([False, False, True]).values) def test_notnull(self): ser = Series([0, 5.4, 3, nan, -0.001]) - np.array_equal( - ser.notnull(), Series([True, True, True, False, True]).values) + np.array_equal(ser.notnull(), + Series([True, True, True, False, True]).values) ser = Series(["hi", "", nan]) np.array_equal(ser.notnull(), Series([True, True, False]).values) @@ -6180,23 +6332,27 @@ def test_shift(self): # 32-bit taking # GH 8129 - index=date_range('2000-01-01',periods=5) - for dtype in ['int32','int64']: - s1 = Series(np.arange(5,dtype=dtype),index=index) + index = date_range('2000-01-01', periods=5) + for dtype in ['int32', 'int64']: + s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) - expected = Series([np.nan,0,1,2,3],index=index) - assert_series_equal(result,expected) + expected = Series([np.nan, 0, 1, 2, 3], index=index) + assert_series_equal(result, expected) # xref 8260 # with tz - s = Series(date_range('2000-01-01 09:00:00',periods=5,tz='US/Eastern'),name='foo') - result = s-s.shift() - assert_series_equal(result,Series(TimedeltaIndex(['NaT'] + ['1 days']*4),name='foo')) + s = Series( + date_range('2000-01-01 09:00:00', periods=5, + tz='US/Eastern'), name='foo') + result = s - s.shift() + assert_series_equal(result, Series( + TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')) # incompat tz - s2 = Series(date_range('2000-01-01 09:00:00',periods=5,tz='CET'),name='foo') - self.assertRaises(ValueError, lambda : s-s2) + s2 = Series( + date_range('2000-01-01 09:00:00', periods=5, tz='CET'), name='foo') + self.assertRaises(ValueError, lambda: s - s2) def test_tshift(self): # PeriodIndex @@ -6299,10 +6455,10 @@ def test_truncate(self): # corner case, empty series returned truncated = ts.truncate(after=self.ts.index[0] - offset) - assert(len(truncated) == 0) + assert (len(truncated) == 0) truncated = ts.truncate(before=self.ts.index[-1] + offset) - assert(len(truncated) == 0) + assert (len(truncated) == 0) self.assertRaises(ValueError, ts.truncate, before=self.ts.index[-1] + offset, @@ -6319,7 +6475,7 @@ def test_ptp(self): self.assertEqual(s.ptp(), 13) self.assertTrue(pd.isnull(s.ptp(skipna=False))) - mi = pd.MultiIndex.from_product([['a','b'], [1,2,3]]) + mi = pd.MultiIndex.from_product([['a', 'b'], [1, 2, 3]]) s = pd.Series([1, np.nan, 7, 3, 5, np.nan], index=mi) expected = pd.Series([6, 2], index=['a', 'b'], dtype=np.float64) @@ -6338,7 +6494,6 @@ def test_ptp(self): with self.assertRaises(NotImplementedError): s.ptp(numeric_only=True) - def test_asof(self): # array or list or dates N = 50 @@ -6498,13 +6653,13 @@ def test_getitem_setitem_datetime_tz_pytz(self): result[date] = ts[4] assert_series_equal(result, ts) - def test_getitem_setitem_datetime_tz_dateutil(self): tm._skip_if_no_dateutil() from dateutil.tz import tzutc from pandas.tslib import _dateutil_gettz as gettz - tz = lambda x: tzutc() if x == 'UTC' else gettz(x) # handle special case for utc in dateutil + tz = lambda x: tzutc() if x == 'UTC' else gettz( + x) # handle special case for utc in dateutil from pandas import date_range N = 50 @@ -6653,23 +6808,23 @@ def test_cast_on_putmask(self): def test_type_promote_putmask(self): # GH8387: test that changing types does not break alignment - ts = Series(np.random.randn(100), index=np.arange(100,0,-1)).round(5) + ts = Series(np.random.randn(100), index=np.arange(100, 0, -1)).round(5) left, mask = ts.copy(), ts > 0 right = ts[mask].copy().map(str) left[mask] = right assert_series_equal(left, ts.map(lambda t: str(t) if t > 0 else t)) - s = Series([0, 1, 2, 0 ]) + s = Series([0, 1, 2, 0]) mask = s > 0 - s2 = s[ mask ].map( str ) + s2 = s[mask].map(str) s[mask] = s2 assert_series_equal(s, Series([0, '1', '2', 0])) - s = Series([0, 'foo', 'bar', 0 ]) + s = Series([0, 'foo', 'bar', 0]) mask = Series([False, True, True, False]) - s2 = s[ mask ] + s2 = s[mask] s[mask] = s2 - assert_series_equal(s, Series([0, 'foo','bar', 0])) + assert_series_equal(s, Series([0, 'foo', 'bar', 0])) def test_astype_cast_nan_int(self): df = Series([1.0, 2.0, 3.0, np.nan]) @@ -6706,8 +6861,7 @@ def test_astype_datetimes(self): def test_astype_str(self): # GH4405 digits = string.digits - s1 = Series([digits * 10, tm.rands(63), tm.rands(64), - tm.rands(1000)]) + s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]) s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0]) types = (compat.text_type, np.str_) for typ in types: @@ -6747,23 +6901,22 @@ def test_astype_unicode(self): former_encoding = None if not compat.PY3: - # in python we can force the default encoding - # for this test + # in python we can force the default encoding for this test former_encoding = sys.getdefaultencoding() - reload(sys) + reload(sys) # noqa sys.setdefaultencoding("utf-8") if sys.getdefaultencoding() == "utf-8": - test_series.append(Series([u('野菜食べないとやばい').encode("utf-8")])) + test_series.append(Series([u('野菜食べないとやばい') + .encode("utf-8")])) for s in test_series: res = s.astype("unicode") expec = s.map(compat.text_type) assert_series_equal(res, expec) # restore the former encoding if former_encoding is not None and former_encoding != "utf-8": - reload(sys) + reload(sys) # noqa sys.setdefaultencoding(former_encoding) - def test_map(self): index, data = tm.getMixedTypeDict() @@ -6796,7 +6949,8 @@ def test_map(self): self.assert_series_equal(a.map(c), exp) a = Series(['a', 'b', 'c', 'd']) - b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) + b = Series([1, 2, 3, 4], + index=pd.CategoricalIndex(['b', 'c', 'd', 'e'])) c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e'])) exp = Series([np.nan, 1, 2, 3]) @@ -6816,10 +6970,10 @@ def test_map(self): def test_map_compat(self): # related GH 8024 - s = Series([True,True,False],index=[1,2,3]) - result = s.map({ True : 'foo', False : 'bar' }) - expected = Series(['foo','foo','bar'],index=[1,2,3]) - assert_series_equal(result,expected) + s = Series([True, True, False], index=[1, 2, 3]) + result = s.map({True: 'foo', False: 'bar'}) + expected = Series(['foo', 'foo', 'bar'], index=[1, 2, 3]) + assert_series_equal(result, expected) def test_map_int(self): left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4}) @@ -6844,13 +6998,13 @@ def test_divide_decimal(self): expected = Series([Decimal(5)]) - s = Series([Decimal(10)]) - s = s/Decimal(2) + s = Series([Decimal(10)]) + s = s / Decimal(2) tm.assert_series_equal(expected, s) - s = Series([Decimal(10)]) - s = s//Decimal(2) + s = Series([Decimal(10)]) + s = s // Decimal(2) tm.assert_series_equal(expected, s) @@ -6875,17 +7029,13 @@ def test_map_dict_with_tuple_keys(self): converted to a multi-index, preventing tuple values from being mapped properly. ''' - df = pd.DataFrame({'a': [(1,), (2,), (3, 4), (5, 6)]}) - label_mappings = { - (1,): 'A', - (2,): 'B', - (3, 4): 'A', - (5, 6): 'B' - } + df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]}) + label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'} df['labels'] = df['a'].map(label_mappings) df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index) # All labels should be filled now - tm.assert_series_equal(df['labels'], df['expected_labels'], check_names=False) + tm.assert_series_equal(df['labels'], df['expected_labels'], + check_names=False) def test_apply(self): assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts)) @@ -6895,8 +7045,8 @@ def test_apply(self): assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts)) # how to handle Series result, #2316 - result = self.ts.apply(lambda x: Series([x, x ** 2], - index=['x', 'x^2'])) + result = self.ts.apply(lambda x: Series( + [x, x ** 2], index=['x', 'x^2'])) expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2}) tm.assert_frame_equal(result, expected) @@ -6939,20 +7089,23 @@ def test_convert_objects(self): s = Series([1., 2, 3], index=['a', 'b', 'c']) with tm.assert_produces_warning(FutureWarning): - result = s.convert_objects(convert_dates=False, convert_numeric=True) + result = s.convert_objects(convert_dates=False, + convert_numeric=True) assert_series_equal(result, s) # force numeric conversion r = s.copy().astype('O') r['a'] = '1' with tm.assert_produces_warning(FutureWarning): - result = r.convert_objects(convert_dates=False, convert_numeric=True) + result = r.convert_objects(convert_dates=False, + convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = '1.' with tm.assert_produces_warning(FutureWarning): - result = r.convert_objects(convert_dates=False, convert_numeric=True) + result = r.convert_objects(convert_dates=False, + convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') @@ -6960,7 +7113,8 @@ def test_convert_objects(self): expected = s.copy() expected['a'] = np.nan with tm.assert_produces_warning(FutureWarning): - result = r.convert_objects(convert_dates=False, convert_numeric=True) + result = r.convert_objects(convert_dates=False, + convert_numeric=True) assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) @@ -6977,14 +7131,17 @@ def test_convert_objects(self): assert_series_equal(result, expected) # dates - s = Series( - [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0)]) - s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime( - 2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'], dtype='O') + s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0)]) + s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1, + Timestamp('20010104'), '20010105'], + dtype='O') with tm.assert_produces_warning(FutureWarning): - result = s.convert_objects(convert_dates=True, convert_numeric=False) - expected = Series( - [Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103')], dtype='M8[ns]') + result = s.convert_objects(convert_dates=True, + convert_numeric=False) + expected = Series([Timestamp('20010101'), Timestamp('20010102'), + Timestamp('20010103')], dtype='M8[ns]') assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): @@ -6995,10 +7152,10 @@ def test_convert_objects(self): convert_numeric=True) assert_series_equal(result, expected) - expected = Series( - [Timestamp( - '20010101'), Timestamp('20010102'), Timestamp('20010103'), - lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') + expected = Series([Timestamp('20010101'), Timestamp('20010102'), + Timestamp('20010103'), + lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), + Timestamp('20010105')], dtype='M8[ns]') with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=False) @@ -7022,10 +7179,10 @@ def test_convert_objects(self): convert_numeric=False) assert_series_equal(result, s) - #r = s.copy() - #r[0] = np.nan - #result = r.convert_objects(convert_dates=True,convert_numeric=False) - #self.assertEqual(result.dtype, 'M8[ns]') + # r = s.copy() + # r[0] = np.nan + # result = r.convert_objects(convert_dates=True,convert_numeric=False) + # self.assertEqual(result.dtype, 'M8[ns]') # dateutil parses some single letters into today's value as a date for x in 'abcdefghijklmnopqrstuvwxyz': @@ -7097,19 +7254,20 @@ def test_convert(self): assert_series_equal(results, s) # test pass-through and non-conversion when other types selected - s = Series(['1.0','2.0','3.0']) + s = Series(['1.0', '2.0', '3.0']) results = s._convert(datetime=True, numeric=True, timedelta=True) - expected = Series([1.0,2.0,3.0]) + expected = Series([1.0, 2.0, 3.0]) assert_series_equal(results, expected) - results = s._convert(True,False,True) + results = s._convert(True, False, True) assert_series_equal(results, s) - s = Series([datetime(2001, 1, 1, 0, 0),datetime(2001, 1, 1, 0, 0)], + s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype='O') results = s._convert(datetime=True, numeric=True, timedelta=True) - expected = Series([datetime(2001, 1, 1, 0, 0),datetime(2001, 1, 1, 0, 0)]) + expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, + 0)]) assert_series_equal(results, expected) - results = s._convert(datetime=False,numeric=True,timedelta=True) + results = s._convert(datetime=False, numeric=True, timedelta=True) assert_series_equal(results, s) td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) @@ -7117,10 +7275,9 @@ def test_convert(self): results = s._convert(datetime=True, numeric=True, timedelta=True) expected = Series([td, td]) assert_series_equal(results, expected) - results = s._convert(True,True,False) + results = s._convert(True, True, False) assert_series_equal(results, s) - s = Series([1., 2, 3], index=['a', 'b', 'c']) result = s._convert(numeric=True) assert_series_equal(result, s) @@ -7154,34 +7311,33 @@ def test_convert(self): assert_series_equal(result, expected) # dates - s = Series( - [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0)]) - s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime( - 2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'], dtype='O') + s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0)]) + s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), + datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1, + Timestamp('20010104'), '20010105'], dtype='O') result = s._convert(datetime=True) - expected = Series( - [Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103')], dtype='M8[ns]') + expected = Series([Timestamp('20010101'), Timestamp('20010102'), + Timestamp('20010103')], dtype='M8[ns]') assert_series_equal(result, expected) result = s._convert(datetime=True, coerce=True) assert_series_equal(result, expected) - expected = Series( - [Timestamp( - '20010101'), Timestamp('20010102'), Timestamp('20010103'), - lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') - result = s2._convert(datetime=True, - numeric=False, - timedelta=False, - coerce=True) + expected = Series([Timestamp('20010101'), Timestamp('20010102'), + Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, + Timestamp('20010104'), Timestamp('20010105')], + dtype='M8[ns]') + result = s2._convert(datetime=True, numeric=False, timedelta=False, + coerce=True) assert_series_equal(result, expected) result = s2._convert(datetime=True, coerce=True) assert_series_equal(result, expected) s = Series(['foo', 'bar', 1, 1.0], dtype='O') result = s._convert(datetime=True, coerce=True) - expected = Series([lib.NaT]*4) + expected = Series([lib.NaT] * 4) assert_series_equal(result, expected) # preserver if non-object @@ -7189,10 +7345,10 @@ def test_convert(self): result = s._convert(datetime=True, coerce=True) assert_series_equal(result, s) - #r = s.copy() - #r[0] = np.nan - #result = r._convert(convert_dates=True,convert_numeric=False) - #self.assertEqual(result.dtype, 'M8[ns]') + # r = s.copy() + # r[0] = np.nan + # result = r._convert(convert_dates=True,convert_numeric=False) + # self.assertEqual(result.dtype, 'M8[ns]') # dateutil parses some single letters into today's value as a date expected = Series([lib.NaT]) @@ -7205,7 +7361,7 @@ def test_convert(self): assert_series_equal(result, expected) def test_convert_no_arg_error(self): - s = Series(['1.0','2']) + s = Series(['1.0', '2']) self.assertRaises(ValueError, s._convert) def test_convert_preserve_bool(self): @@ -7223,7 +7379,7 @@ def test_convert_preserve_all_bool(self): def test_apply_args(self): s = Series(['foo,bar']) - result = s.apply(str.split, args=(',',)) + result = s.apply(str.split, args=(',', )) self.assertEqual(result[0], ['foo', 'bar']) tm.assertIsInstance(result[0], list) @@ -7287,8 +7443,8 @@ def _check_align(a, b, how='left', method='pad', limit=None): for kind in JOIN_TYPES: for meth in ['pad', 'bfill']: _check_align(self.ts[2:], self.ts[:-5], how=kind, method=meth) - _check_align(self.ts[2:], self.ts[:-5], how=kind, - method=meth, limit=1) + _check_align(self.ts[2:], self.ts[:-5], how=kind, method=meth, + limit=1) # empty left _check_align(self.ts[:0], self.ts[:-5], how=kind, method=meth) @@ -7347,10 +7503,10 @@ def test_align_multiindex(self): # GH 10665 midx = pd.MultiIndex.from_product([range(2), range(3), range(2)], - names=('a', 'b', 'c')) + names=('a', 'b', 'c')) idx = pd.Index(range(2), name='b') - s1 = pd.Series(np.arange(12,dtype='int64'), index=midx) - s2 = pd.Series(np.arange(2,dtype='int64'), index=idx) + s1 = pd.Series(np.arange(12, dtype='int64'), index=midx) + s2 = pd.Series(np.arange(2, dtype='int64'), index=idx) # these must be the same results (but flipped) res1l, res1r = s1.align(s2, join='left') @@ -7382,7 +7538,8 @@ def test_reindex(self): # __array_interface__ is not defined for older numpies # and on some pythons try: - self.assertTrue(np.may_share_memory(self.series.index, identity.index)) + self.assertTrue(np.may_share_memory(self.series.index, + identity.index)) except (AttributeError): pass @@ -7427,7 +7584,7 @@ def test_reindex_nan(self): def test_reindex_corner(self): # (don't forget to fix this) I think it's fixed - reindexed_dep = self.empty.reindex(self.ts.index, method='pad') + self.empty.reindex(self.ts.index, method='pad') # it works # corner case: pad empty series reindexed = self.empty.reindex(self.ts.index, method='pad') @@ -7442,7 +7599,7 @@ def test_reindex_corner(self): def test_reindex_pad(self): - s = Series(np.arange(10),dtype='int64') + s = Series(np.arange(10), dtype='int64') s2 = s[::2] reindexed = s2.reindex(s.index, method='pad') @@ -7453,9 +7610,9 @@ def test_reindex_pad(self): assert_series_equal(reindexed, expected) # GH4604 - s = Series([1,2,3,4,5], index=['a', 'b', 'c', 'd', 'e']) - new_index = ['a','g','c','f'] - expected = Series([1,1,3,3],index=new_index) + s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e']) + new_index = ['a', 'g', 'c', 'f'] + expected = Series([1, 1, 3, 3], index=new_index) # this changes dtype because the ffill happens after result = s.reindex(new_index).ffill() @@ -7469,16 +7626,16 @@ def test_reindex_pad(self): assert_series_equal(result, expected) # inferrence of new dtype - s = Series([True,False,False,True],index=list('abcd')) - new_index='agc' + s = Series([True, False, False, True], index=list('abcd')) + new_index = 'agc' result = s.reindex(list(new_index)).ffill() - expected = Series([True,True,False],index=list(new_index)) + expected = Series([True, True, False], index=list(new_index)) assert_series_equal(result, expected) # GH4618 shifted series downcasting - s = Series(False,index=lrange(0,5)) + s = Series(False, index=lrange(0, 5)) result = s.shift(1).fillna(method='bfill') - expected = Series(False,index=lrange(0,5)) + expected = Series(False, index=lrange(0, 5)) assert_series_equal(result, expected) def test_reindex_nearest(self): @@ -7544,11 +7701,11 @@ def test_reindex_like(self): self.ts.reindex_like(other)) # GH 7179 - day1 = datetime(2013,3,5) - day2 = datetime(2013,5,5) - day3 = datetime(2014,3,5) + day1 = datetime(2013, 3, 5) + day2 = datetime(2013, 5, 5) + day3 = datetime(2014, 3, 5) - series1 = Series([5, None, None],[day1, day2, day3]) + series1 = Series([5, None, None], [day1, day2, day3]) series2 = Series([None, None], [day1, day3]) result = series1.reindex_like(series2, method='pad') @@ -7556,7 +7713,7 @@ def test_reindex_like(self): assert_series_equal(result, expected) def test_reindex_fill_value(self): - #------------------------------------------------------------ + # ----------------------------------------------------------- # floats floats = Series([1., 2., 3.]) result = floats.reindex([1, 2, 3]) @@ -7567,7 +7724,7 @@ def test_reindex_fill_value(self): expected = Series([2., 3., 0], index=[1, 2, 3]) assert_series_equal(result, expected) - #------------------------------------------------------------ + # ----------------------------------------------------------- # ints ints = Series([1, 2, 3]) @@ -7581,7 +7738,7 @@ def test_reindex_fill_value(self): self.assertTrue(issubclass(result.dtype.type, np.integer)) assert_series_equal(result, expected) - #------------------------------------------------------------ + # ----------------------------------------------------------- # objects objects = Series([1, 2, 3], dtype=object) @@ -7593,7 +7750,7 @@ def test_reindex_fill_value(self): expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object) assert_series_equal(result, expected) - #------------------------------------------------------------ + # ------------------------------------------------------------ # bools bools = Series([True, False, True]) @@ -7621,8 +7778,9 @@ def test_rename(self): self.assert_numpy_array_equal(renamed.index, ['a', 'foo', 'c', 'bar']) # index with name - renamer = Series( - np.arange(4), index=Index(['a', 'b', 'c', 'd'], name='name'), dtype='int64') + renamer = Series(np.arange(4), + index=Index(['a', 'b', 'c', 'd'], name='name'), + dtype='int64') renamed = renamer.rename({}) self.assertEqual(renamed.index.name, renamer.index.name) @@ -7645,8 +7803,8 @@ def test_ne(self): self.assertTrue(tm.equalContents(~(ts.index == 5), expected)) def test_pad_nan(self): - x = Series([np.nan, 1., np.nan, 3., np.nan], - ['z', 'a', 'b', 'c', 'd'], dtype=float) + x = Series([np.nan, 1., np.nan, 3., np.nan], ['z', 'a', 'b', 'c', 'd'], + dtype=float) x.fillna(method='pad', inplace=True) @@ -7675,20 +7833,18 @@ def test_unstack(self): assert_frame_equal(unstacked, expected.T) index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], - [0, 1, 2, 0, 1, 2], + labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) exp_index = MultiIndex(levels=[['one', 'two', 'three'], [0, 1]], - labels=[[0, 1, 2, 0, 1, 2], - [0, 1, 0, 1, 0, 1]]) + labels=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) expected = DataFrame({'bar': s.values}, index=exp_index).sortlevel(0) unstacked = s.unstack(0) assert_frame_equal(unstacked, expected) # GH5873 idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) - ts = pd.Series([1,2], index=idx) + ts = pd.Series([1, 2], index=idx) left = ts.unstack() right = DataFrame([[nan, 1], [2, nan]], index=[101, 102], columns=[nan, 3.5]) @@ -7696,8 +7852,9 @@ def test_unstack(self): print(right) assert_frame_equal(left, right) - idx = pd.MultiIndex.from_arrays([['cat', 'cat', 'cat', 'dog', 'dog'], - ['a', 'a', 'b', 'a', 'b'], [1, 2, 1, 1, np.nan]]) + idx = pd.MultiIndex.from_arrays([['cat', 'cat', 'cat', 'dog', 'dog' + ], ['a', 'a', 'b', 'a', 'b'], + [1, 2, 1, 1, np.nan]]) ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) right = DataFrame([[1.0, 1.3], [1.1, nan], [nan, 1.4], [1.2, nan]], columns=['cat', 'dog']) @@ -7727,6 +7884,7 @@ def test_head_tail(self): assert_series_equal(self.series.head(0), self.series[0:0]) assert_series_equal(self.series.tail(), self.series[-5:]) assert_series_equal(self.series.tail(0), self.series[0:0]) + def test_isin(self): s = Series(['A', 'B', 'C', 'a', 'B', 'B', 'A', 'C']) @@ -7747,11 +7905,11 @@ def test_isin_with_string_scalar(self): def test_isin_with_i8(self): # GH 5021 - expected = Series([True,True,False,False,False]) - expected2 = Series([False,True,False,False,False]) + expected = Series([True, True, False, False, False]) + expected2 = Series([False, True, False, False, False]) # datetime64[ns] - s = Series(date_range('jan-01-2013','jan-05-2013')) + s = Series(date_range('jan-01-2013', 'jan-05-2013')) result = s.isin(s[0:2]) assert_series_equal(result, expected) @@ -7770,12 +7928,13 @@ def test_isin_with_i8(self): assert_series_equal(result, expected2) # timedelta64[ns] - s = Series(pd.to_timedelta(lrange(5),unit='d')) + s = Series(pd.to_timedelta(lrange(5), unit='d')) result = s.isin(s[0:2]) assert_series_equal(result, expected) -#------------------------------------------------------------------------------ -# TimeSeries-specific +# ----------------------------------------------------------------------------- +# timeseries-specific + def test_cummethods_bool(self): # GH 6270 # looks like a buggy np.maximum.accumulate for numpy 1.6.1, py 3.2 @@ -7789,8 +7948,10 @@ def cummax(x): b = ~a c = pd.Series([False] * len(b)) d = ~c - methods = {'cumsum': np.cumsum, 'cumprod': np.cumprod, - 'cummin': cummin, 'cummax': cummax} + methods = {'cumsum': np.cumsum, + 'cumprod': np.cumprod, + 'cummin': cummin, + 'cummax': cummax} args = product((a, b, c, d), methods) for s, method in args: expected = Series(methods[method](s.values)) @@ -7802,7 +7963,9 @@ def cummax(x): cpe = pd.Series([False, 0, nan, 0]) cmin = pd.Series([False, False, nan, False]) cmax = pd.Series([False, True, nan, True]) - expecteds = {'cumsum': cse, 'cumprod': cpe, 'cummin': cmin, + expecteds = {'cumsum': cse, + 'cumprod': cpe, + 'cummin': cmin, 'cummax': cmax} for method in methods: @@ -7893,33 +8056,32 @@ def test_replace(self): expected = ser.ffill() result = ser.replace(np.nan) assert_series_equal(result, expected) - #GH 5797 + # GH 5797 ser = Series(date_range('20130101', periods=5)) expected = ser.copy() expected.loc[2] = Timestamp('20120101') - result = ser.replace({Timestamp('20130103'): - Timestamp('20120101')}) + result = ser.replace({Timestamp('20130103'): Timestamp('20120101')}) assert_series_equal(result, expected) result = ser.replace(Timestamp('20130103'), Timestamp('20120101')) assert_series_equal(result, expected) def test_replace_with_single_list(self): ser = Series([0, 1, 2, 3, 4]) - result = ser.replace([1,2,3]) - assert_series_equal(result, Series([0,0,0,0,4])) + result = ser.replace([1, 2, 3]) + assert_series_equal(result, Series([0, 0, 0, 0, 4])) s = ser.copy() - s.replace([1,2,3],inplace=True) - assert_series_equal(s, Series([0,0,0,0,4])) + s.replace([1, 2, 3], inplace=True) + assert_series_equal(s, Series([0, 0, 0, 0, 4])) # make sure things don't get corrupted when fillna call fails s = ser.copy() with tm.assertRaises(ValueError): - s.replace([1,2,3],inplace=True,method='crash_cymbal') + s.replace([1, 2, 3], inplace=True, method='crash_cymbal') assert_series_equal(s, ser) def test_replace_mixed_types(self): - s = Series(np.arange(5),dtype='int64') + s = Series(np.arange(5), dtype='int64') def check_replace(to_rep, val, expected): sc = s.copy() @@ -7929,35 +8091,36 @@ def check_replace(to_rep, val, expected): assert_series_equal(expected, sc) # should NOT upcast to float - e = Series([0,1,2,3,4]) + e = Series([0, 1, 2, 3, 4]) tr, v = [3], [3.0] check_replace(tr, v, e) # MUST upcast to float - e = Series([0,1,2,3.5,4]) + e = Series([0, 1, 2, 3.5, 4]) tr, v = [3], [3.5] check_replace(tr, v, e) # casts to object - e = Series([0,1,2,3.5,'a']) - tr, v = [3,4], [3.5,'a'] + e = Series([0, 1, 2, 3.5, 'a']) + tr, v = [3, 4], [3.5, 'a'] check_replace(tr, v, e) # again casts to object - e = Series([0,1,2,3.5,Timestamp('20130101')]) - tr, v = [3,4],[3.5,Timestamp('20130101')] + e = Series([0, 1, 2, 3.5, Timestamp('20130101')]) + tr, v = [3, 4], [3.5, Timestamp('20130101')] check_replace(tr, v, e) # casts to float - e = Series([0,1,2,3.5,1]) - tr, v = [3,4],[3.5,True] + e = Series([0, 1, 2, 3.5, 1]) + tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) # test an object with dates + floats + integers + strings dr = date_range('1/1/2001', '1/10/2001', freq='D').to_series().reset_index(drop=True) - result = dr.astype(object).replace([dr[0],dr[1],dr[2]], [1.0,2,'a']) - expected = Series([1.0,2,'a'] + dr[3:].tolist(),dtype=object) + result = dr.astype(object).replace( + [dr[0], dr[1], dr[2]], [1.0, 2, 'a']) + expected = Series([1.0, 2, 'a'] + dr[3:].tolist(), dtype=object) assert_series_equal(result, expected) def test_replace_bool_with_string_no_op(self): @@ -7984,9 +8147,8 @@ def test_replace_with_dict_with_bool_keys(self): s.replace({'asdf': 'asdb', True: 'yes'}) def test_asfreq(self): - ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), - datetime(2009, 11, 30), - datetime(2009, 12, 31)]) + ts = Series([0., 1., 2.], index=[datetime(2009, 10, 30), datetime( + 2009, 11, 30), datetime(2009, 12, 31)]) daily_ts = ts.asfreq('B') monthly_ts = daily_ts.asfreq('BM') @@ -8038,9 +8200,12 @@ def test_diff(self): assert_series_equal(nrs, nxp) # with tz - s = Series(date_range('2000-01-01 09:00:00',periods=5,tz='US/Eastern'), name='foo') + s = Series( + date_range('2000-01-01 09:00:00', periods=5, + tz='US/Eastern'), name='foo') result = s.diff() - assert_series_equal(result,Series(TimedeltaIndex(['NaT'] + ['1 days']*4),name='foo')) + assert_series_equal(result, Series( + TimedeltaIndex(['NaT'] + ['1 days'] * 4), name='foo')) def test_pct_change(self): rs = self.ts.pct_change(fill_method=None) @@ -8116,7 +8281,7 @@ def test_mpl_compat_hack(self): expected = self.ts.values[:, np.newaxis] assert_almost_equal(result, expected) -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # GroupBy def test_select(self): @@ -8129,7 +8294,7 @@ def test_select(self): expected = self.ts[self.ts.index.weekday == 2] assert_series_equal(result, expected) -#------------------------------------------------------------------------------ +# ----------------------------------------------------------------------------- # Misc not safe for sparse def test_dropna_preserve_name(self): @@ -8143,12 +8308,13 @@ def test_dropna_preserve_name(self): def test_numpy_unique(self): # it works! - result = np.unique(self.ts) + np.unique(self.ts) def test_concat_empty_series_dtypes_roundtrips(self): # round-tripping with self & like self - dtypes = map(np.dtype,['float64','int8','uint8','bool','m8[ns]','M8[ns]']) + dtypes = map(np.dtype, ['float64', 'int8', 'uint8', 'bool', 'm8[ns]', + 'M8[ns]']) for dtype in dtypes: self.assertEqual(pd.concat([Series(dtype=dtype)]).dtype, dtype) @@ -8156,16 +8322,19 @@ def test_concat_empty_series_dtypes_roundtrips(self): Series(dtype=dtype)]).dtype, dtype) def int_result_type(dtype, dtype2): - typs = set([dtype.kind,dtype2.kind]) - if not len(typs-set(['i','u','b'])) and (dtype.kind == 'i' or dtype2.kind == 'i'): + typs = set([dtype.kind, dtype2.kind]) + if not len(typs - set(['i', 'u', 'b'])) and (dtype.kind == 'i' or + dtype2.kind == 'i'): return 'i' - elif not len(typs-set(['u','b'])) and (dtype.kind == 'u' or dtype2.kind == 'u'): - return 'u' + elif not len(typs - set(['u', 'b'])) and (dtype.kind == 'u' or + dtype2.kind == 'u'): + return 'u' return None def float_result_type(dtype, dtype2): - typs = set([dtype.kind,dtype2.kind]) - if not len(typs-set(['f','i','u'])) and (dtype.kind == 'f' or dtype2.kind == 'f'): + typs = set([dtype.kind, dtype2.kind]) + if not len(typs - set(['f', 'i', 'u'])) and (dtype.kind == 'f' or + dtype2.kind == 'f'): return 'f' return None @@ -8184,8 +8353,8 @@ def get_result_type(dtype, dtype2): continue expected = get_result_type(dtype, dtype2) - result = pd.concat([Series(dtype=dtype), - Series(dtype=dtype2)]).dtype + result = pd.concat([Series(dtype=dtype), Series(dtype=dtype2) + ]).dtype self.assertEqual(result.kind, expected) def test_concat_empty_series_dtypes(self): @@ -8194,7 +8363,8 @@ def test_concat_empty_series_dtypes(self): self.assertEqual(pd.concat([Series(dtype=np.bool_), Series(dtype=np.int32)]).dtype, np.int32) self.assertEqual(pd.concat([Series(dtype=np.bool_), - Series(dtype=np.float32)]).dtype, np.object_) + Series(dtype=np.float32)]).dtype, + np.object_) # datetimelike self.assertEqual(pd.concat([Series(dtype='m8[ns]'), @@ -8211,27 +8381,29 @@ def test_concat_empty_series_dtypes(self): # categorical self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='category')]).dtype, 'category') + Series(dtype='category')]).dtype, + 'category') self.assertEqual(pd.concat([Series(dtype='category'), - Series(dtype='float64')]).dtype, np.object_) + Series(dtype='float64')]).dtype, + np.object_) self.assertEqual(pd.concat([Series(dtype='category'), Series(dtype='object')]).dtype, 'category') # sparse - result = pd.concat([Series(dtype='float64').to_sparse(), - Series(dtype='float64').to_sparse()]) - self.assertEqual(result.dtype,np.float64) - self.assertEqual(result.ftype,'float64:sparse') - - result = pd.concat([Series(dtype='float64').to_sparse(), - Series(dtype='float64')]) - self.assertEqual(result.dtype,np.float64) - self.assertEqual(result.ftype,'float64:sparse') - - result = pd.concat([Series(dtype='float64').to_sparse(), - Series(dtype='object')]) - self.assertEqual(result.dtype,np.object_) - self.assertEqual(result.ftype,'object:dense') + result = pd.concat([Series(dtype='float64').to_sparse(), Series( + dtype='float64').to_sparse()]) + self.assertEqual(result.dtype, np.float64) + self.assertEqual(result.ftype, 'float64:sparse') + + result = pd.concat([Series(dtype='float64').to_sparse(), Series( + dtype='float64')]) + self.assertEqual(result.dtype, np.float64) + self.assertEqual(result.ftype, 'float64:sparse') + + result = pd.concat([Series(dtype='float64').to_sparse(), Series( + dtype='object')]) + self.assertEqual(result.dtype, np.object_) + self.assertEqual(result.ftype, 'object:dense') def test_searchsorted_numeric_dtypes_scalar(self): s = Series([1, 2, 90, 1000, 3e9]) @@ -8274,6 +8446,7 @@ def test_to_frame_expanddim(self): # GH 9762 class SubclassedSeries(Series): + @property def _constructor_expanddim(self): return SubclassedFrame @@ -8315,7 +8488,6 @@ def test_basic_indexing(self): self.assertRaises(IndexError, s.__getitem__, 5) self.assertRaises(IndexError, s.__setitem__, 5, 0) - def test_int_indexing(self): s = Series(np.random.randn(6), index=[0, 0, 1, 1, 2, 2]) @@ -8371,8 +8543,7 @@ def test_reset_index(self): # level index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]], - labels=[[0, 0, 0, 0, 0, 0], - [0, 1, 2, 0, 1, 2], + labels=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]]) s = Series(np.random.randn(6), index=index) rs = s.reset_index(level=1) @@ -8389,8 +8560,8 @@ def test_set_index_makes_timeseries(self): s.index = idx with tm.assert_produces_warning(FutureWarning): - self.assertTrue(s.is_time_series == True) - self.assertTrue(s.index.is_all_dates == True) + self.assertTrue(s.is_time_series) + self.assertTrue(s.index.is_all_dates) def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) @@ -8453,8 +8624,8 @@ def test_unique_data_ownership(self): def test_datetime_timedelta_quantiles(self): # covers #9694 - self.assertTrue(pd.isnull(Series([],dtype='M8[ns]').quantile(.5))) - self.assertTrue(pd.isnull(Series([],dtype='m8[ns]').quantile(.5))) + self.assertTrue(pd.isnull(Series([], dtype='M8[ns]').quantile(.5))) + self.assertTrue(pd.isnull(Series([], dtype='m8[ns]').quantile(.5))) def test_empty_timeseries_redections_return_nat(self): # covers #11245 @@ -8462,6 +8633,7 @@ def test_empty_timeseries_redections_return_nat(self): self.assertIs(Series([], dtype=dtype).min(), pd.NaT) self.assertIs(Series([], dtype=dtype).max(), pd.NaT) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 9acd7c2233b7b..ef1bd734de776 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -13,6 +13,7 @@ assert_almost_equal) import pandas.util.testing as tm + class TestRank(tm.TestCase): _multiprocess_can_split_ = True s = Series([1, 3, 4, 2, nan, 2, 1, 5, nan, 3]) @@ -49,7 +50,7 @@ def test_rank_methods_series(self): from scipy.stats import rankdata xs = np.random.randn(9) - xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates + xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates np.random.shuffle(xs) index = [chr(ord('a') + i) for i in range(len(xs))] @@ -76,8 +77,9 @@ def test_rank_methods_frame(self): for ax in [0, 1]: for m in ['average', 'min', 'max', 'first', 'dense']: result = df.rank(axis=ax, method=m) - sprank = np.apply_along_axis(rankdata, ax, vals, - m if m != 'first' else 'ordinal') + sprank = np.apply_along_axis( + rankdata, ax, vals, + m if m != 'first' else 'ordinal') expected = DataFrame(sprank, columns=cols) tm.assert_frame_equal(result, expected) @@ -86,11 +88,11 @@ def test_rank_dense_method(self): in_out = [([1], [1]), ([2], [1]), ([0], [1]), - ([2,2], [1,1]), - ([1,2,3], [1,2,3]), - ([4,2,1], [3,2,1],), - ([1,1,5,5,3], [1,1,3,3,2]), - ([-5,-4,-3,-2,-1], [1,2,3,4,5])] + ([2, 2], [1, 1]), + ([1, 2, 3], [1, 2, 3]), + ([4, 2, 1], [3, 2, 1],), + ([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]), + ([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5])] for ser, exp in in_out: for dtype in dtypes: @@ -137,7 +139,6 @@ def test_rank_descending(self): assert_frame_equal(res3, expected) def test_rank_2d_tie_methods(self): - s = self.s df = self.df def _check2d(df, expected, method='average', axis=0): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 269d272525ce6..f8255c4b4a410 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1,11 +1,8 @@ # -*- coding: utf-8 -*- # pylint: disable-msg=E1101,W0612 -from datetime import datetime, timedelta, date -import os -import operator +from datetime import datetime, timedelta import re -import warnings import nose @@ -13,13 +10,12 @@ import numpy as np from numpy.random import randint -from pandas.compat import range, lrange, u, unichr +from pandas.compat import range, u import pandas.compat as compat -from pandas import (Index, Series, DataFrame, isnull, notnull, - bdate_range, date_range, MultiIndex) +from pandas import (Index, Series, DataFrame, isnull, MultiIndex) import pandas.core.common as com -from pandas.util.testing import assert_series_equal, assert_almost_equal +from pandas.util.testing import assert_series_equal import pandas.util.testing as tm import pandas.core.strings as strings @@ -56,7 +52,8 @@ def test_iter(self): for el in s: # each element of the series is either a basestring/str or nan - self.assertTrue(isinstance(el, compat.string_types) or isnull(el)) + self.assertTrue(isinstance(el, compat.string_types) or isnull( + el)) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in @@ -86,8 +83,8 @@ def test_iter_single_element(self): assert_series_equal(ds, s) def test_iter_object_try_string(self): - ds = Series([slice(None, randint(10), randint(10, 20)) - for _ in range(4)]) + ds = Series([slice(None, randint(10), randint(10, 20)) for _ in range( + 4)]) i, s = 100, 'h' @@ -216,9 +213,9 @@ def test_contains(self): tm.assert_almost_equal(result, expected) # na - values = Series(['om', 'foo',np.nan]) + values = Series(['om', 'foo', np.nan]) res = values.str.contains('foo', na="foo") - self.assertEqual (res.ix[2], "foo") + self.assertEqual(res.ix[2], "foo") def test_startswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) @@ -284,8 +281,8 @@ def test_title(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(["FOO", NA, "bar", True, datetime.today(), - "blah", None, 1, 2.]) + mixed = Series(["FOO", NA, "bar", True, datetime.today(), "blah", None, + 1, 2.]) mixed = mixed.str.title() exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA]) tm.assert_almost_equal(mixed, exp) @@ -309,8 +306,8 @@ def test_lower_upper(self): tm.assert_series_equal(result, values) # mixed - mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, - 1, 2.]) + mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, + 2.]) mixed = mixed.str.upper() rs = Series(mixed).str.lower() xp = ['a', NA, 'b', NA, NA, 'foo', NA, NA, NA] @@ -334,8 +331,8 @@ def test_capitalize(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(["FOO", NA, "bar", True, datetime.today(), - "blah", None, 1, 2.]) + mixed = Series(["FOO", NA, "bar", True, datetime.today(), "blah", None, + 1, 2.]) mixed = mixed.str.capitalize() exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA]) tm.assert_almost_equal(mixed, exp) @@ -353,8 +350,8 @@ def test_swapcase(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(["FOO", NA, "bar", True, datetime.today(), - "Blah", None, 1, 2.]) + mixed = Series(["FOO", NA, "bar", True, datetime.today(), "Blah", None, + 1, 2.]) mixed = mixed.str.swapcase() exp = Series(["foo", NA, "BAR", NA, NA, "bLAH", NA, NA, NA]) tm.assert_almost_equal(mixed, exp) @@ -371,8 +368,10 @@ def test_casemethods(self): self.assertEqual(s.str.lower().tolist(), [v.lower() for v in values]) self.assertEqual(s.str.upper().tolist(), [v.upper() for v in values]) self.assertEqual(s.str.title().tolist(), [v.title() for v in values]) - self.assertEqual(s.str.capitalize().tolist(), [v.capitalize() for v in values]) - self.assertEqual(s.str.swapcase().tolist(), [v.swapcase() for v in values]) + self.assertEqual(s.str.capitalize().tolist(), [ + v.capitalize() for v in values]) + self.assertEqual(s.str.swapcase().tolist(), [ + v.swapcase() for v in values]) def test_replace(self): values = Series(['fooBAD__barBAD', NA]) @@ -405,7 +404,7 @@ def test_replace(self): exp = Series([u('foobarBAD'), NA]) tm.assert_series_equal(result, exp) - #flags + unicode + # flags + unicode values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) result = values.str.replace("(?<=\w),(?=\w)", ", ", flags=re.UNICODE) @@ -423,8 +422,8 @@ def test_repeat(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', - None, 1, 2.]) + mixed = Series(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, + 2.]) rs = Series(mixed).str.repeat(3) xp = ['aaa', NA, 'bbb', NA, NA, 'foofoofoo', NA, NA, NA] @@ -432,17 +431,14 @@ def test_repeat(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('a'), u('b'), NA, u('c'), NA, - u('d')]) + values = Series([u('a'), u('b'), NA, u('c'), NA, u('d')]) result = values.str.repeat(3) - exp = Series([u('aaa'), u('bbb'), NA, u('ccc'), NA, - u('ddd')]) + exp = Series([u('aaa'), u('bbb'), NA, u('ccc'), NA, u('ddd')]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) - exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, - u('dddddd')]) + exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, u('dddddd')]) tm.assert_series_equal(result, exp) def test_deprecated_match(self): @@ -527,8 +523,8 @@ def test_extract(self): 'foo', None, 1, 2.]) rs = Series(mixed).str.extract('.*(BAD[_]+).*(BAD)') - exp = DataFrame([['BAD_', 'BAD'], er, ['BAD_', 'BAD'], er, er, - er, er, er, er]) + exp = DataFrame([['BAD_', 'BAD'], er, ['BAD_', 'BAD'], er, er, er, er, + er, er]) tm.assert_frame_equal(rs, exp) # unicode @@ -590,12 +586,14 @@ def test_extract(self): # two named groups result = s.str.extract('(?P[AB])(?P[123])') - exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], columns=['letter', 'number']) + exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], + columns=['letter', 'number']) tm.assert_frame_equal(result, exp) # mix named and unnamed groups result = s.str.extract('([AB])(?P[123])') - exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], columns=[0, 'number']) + exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]], + columns=[0, 'number']) tm.assert_frame_equal(result, exp) # one normal group, one non-capturing group @@ -604,18 +602,23 @@ def test_extract(self): tm.assert_series_equal(result, exp) # two normal groups, one non-capturing group - result = Series(['A11', 'B22', 'C33']).str.extract('([AB])([123])(?:[123])') + result = Series(['A11', 'B22', 'C33']).str.extract( + '([AB])([123])(?:[123])') exp = DataFrame([['A', '1'], ['B', '2'], [NA, NA]]) tm.assert_frame_equal(result, exp) # one optional group followed by one normal group - result = Series(['A1', 'B2', '3']).str.extract('(?P[AB])?(?P[123])') - exp = DataFrame([['A', '1'], ['B', '2'], [NA, '3']], columns=['letter', 'number']) + result = Series(['A1', 'B2', '3']).str.extract( + '(?P[AB])?(?P[123])') + exp = DataFrame([['A', '1'], ['B', '2'], [NA, '3']], + columns=['letter', 'number']) tm.assert_frame_equal(result, exp) # one normal group followed by one optional group - result = Series(['A1', 'B2', 'C']).str.extract('(?P[ABC])(?P[123])?') - exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number']) + result = Series(['A1', 'B2', 'C']).str.extract( + '(?P[ABC])(?P[123])?') + exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], + columns=['letter', 'number']) tm.assert_frame_equal(result, exp) # GH6348 @@ -627,12 +630,15 @@ def check_index(index): exp = Series(['1', '2', NA], index=index) tm.assert_series_equal(result, exp) - result = Series(data, index=index).str.extract('(?P\D)(?P\d)?') - exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=['letter', 'number'], index=index) + result = Series( + data, index=index).str.extract('(?P\D)(?P\d)?') + exp = DataFrame([['A', '1'], ['B', '2'], ['C', NA]], columns=[ + 'letter', 'number' + ], index=index) tm.assert_frame_equal(result, exp) - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, + tm.makeDateIndex, tm.makePeriodIndex]: check_index(index()) def test_extract_single_series_name_is_preserved(self): @@ -661,11 +667,12 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_bool, empty.str.endswith('a')) tm.assert_series_equal(empty_str, empty.str.lower()) tm.assert_series_equal(empty_str, empty.str.upper()) - tm.assert_series_equal(empty_str, empty.str.replace('a','b')) + tm.assert_series_equal(empty_str, empty.str.replace('a', 'b')) tm.assert_series_equal(empty_str, empty.str.repeat(3)) tm.assert_series_equal(empty_bool, empty.str.match('^a')) tm.assert_series_equal(empty_str, empty.str.extract('()')) - tm.assert_frame_equal(DataFrame(columns=[0,1], dtype=str), empty.str.extract('()()')) + tm.assert_frame_equal( + DataFrame(columns=[0, 1], dtype=str), empty.str.extract('()()')) tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies()) tm.assert_series_equal(empty_str, empty_list.str.join('')) tm.assert_series_equal(empty_int, empty.str.len()) @@ -676,8 +683,10 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_str, empty.str.center(42)) tm.assert_series_equal(empty_list, empty.str.split('a')) tm.assert_series_equal(empty_list, empty.str.rsplit('a')) - tm.assert_series_equal(empty_list, empty.str.partition('a', expand=False)) - tm.assert_series_equal(empty_list, empty.str.rpartition('a', expand=False)) + tm.assert_series_equal(empty_list, + empty.str.partition('a', expand=False)) + tm.assert_series_equal(empty_list, + empty.str.rpartition('a', expand=False)) tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) tm.assert_series_equal(empty_str, empty.str.slice(step=1)) tm.assert_series_equal(empty_str, empty.str.strip()) @@ -708,7 +717,7 @@ def test_empty_str_methods(self): tm.assert_series_equal(empty_str, empty.str.translate(table)) def test_empty_str_methods_to_frame(self): - empty_str = empty = Series(dtype=str) + empty = Series(dtype=str) empty_df = DataFrame([]) tm.assert_frame_equal(empty_df, empty.str.partition('a')) tm.assert_frame_equal(empty_df, empty.str.rpartition('a')) @@ -716,14 +725,25 @@ def test_empty_str_methods_to_frame(self): def test_ismethods(self): values = ['A', 'b', 'Xy', '4', '3A', '', 'TT', '55', '-', ' '] str_s = Series(values) - alnum_e = [True, True, True, True, True, False, True, True, False, False] - alpha_e = [True, True, True, False, False, False, True, False, False, False] - digit_e = [False, False, False, True, False, False, False, True, False, False] - num_e = [False, False, False, True, False, False, False, True, False, False] - space_e = [False, False, False, False, False, False, False, False, False, True] - lower_e = [False, True, False, False, False, False, False, False, False, False] - upper_e = [True, False, False, False, True, False, True, False, False, False] - title_e = [True, False, True, False, True, False, False, False, False, False] + alnum_e = [True, True, True, True, True, False, True, True, False, + False] + alpha_e = [True, True, True, False, False, False, True, False, False, + False] + digit_e = [False, False, False, True, False, False, False, True, False, + False] + + # TODO: unused + num_e = [False, False, False, True, False, False, # noqa + False, True, False, False] + + space_e = [False, False, False, False, False, False, False, False, + False, True] + lower_e = [False, True, False, False, False, False, False, False, + False, False] + upper_e = [True, False, False, False, True, False, True, False, False, + False] + title_e = [True, False, True, False, True, False, False, False, False, + False] tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e)) tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e)) @@ -733,13 +753,20 @@ def test_ismethods(self): tm.assert_series_equal(str_s.str.isupper(), Series(upper_e)) tm.assert_series_equal(str_s.str.istitle(), Series(title_e)) - self.assertEqual(str_s.str.isalnum().tolist(), [v.isalnum() for v in values]) - self.assertEqual(str_s.str.isalpha().tolist(), [v.isalpha() for v in values]) - self.assertEqual(str_s.str.isdigit().tolist(), [v.isdigit() for v in values]) - self.assertEqual(str_s.str.isspace().tolist(), [v.isspace() for v in values]) - self.assertEqual(str_s.str.islower().tolist(), [v.islower() for v in values]) - self.assertEqual(str_s.str.isupper().tolist(), [v.isupper() for v in values]) - self.assertEqual(str_s.str.istitle().tolist(), [v.istitle() for v in values]) + self.assertEqual(str_s.str.isalnum().tolist(), [v.isalnum() + for v in values]) + self.assertEqual(str_s.str.isalpha().tolist(), [v.isalpha() + for v in values]) + self.assertEqual(str_s.str.isdigit().tolist(), [v.isdigit() + for v in values]) + self.assertEqual(str_s.str.isspace().tolist(), [v.isspace() + for v in values]) + self.assertEqual(str_s.str.islower().tolist(), [v.islower() + for v in values]) + self.assertEqual(str_s.str.isupper().tolist(), [v.isupper() + for v in values]) + self.assertEqual(str_s.str.istitle().tolist(), [v.istitle() + for v in values]) def test_isnumeric(self): # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER @@ -754,8 +781,10 @@ def test_isnumeric(self): tm.assert_series_equal(s.str.isdecimal(), Series(decimal_e)) unicodes = [u'A', u'3', u'¼', u'★', u'፸', u'3', u'four'] - self.assertEqual(s.str.isnumeric().tolist(), [v.isnumeric() for v in unicodes]) - self.assertEqual(s.str.isdecimal().tolist(), [v.isdecimal() for v in unicodes]) + self.assertEqual(s.str.isnumeric().tolist(), [ + v.isnumeric() for v in unicodes]) + self.assertEqual(s.str.isdecimal().tolist(), [ + v.isdecimal() for v in unicodes]) values = ['A', np.nan, u'¼', u'★', np.nan, u'3', 'four'] s = Series(values) @@ -799,8 +828,7 @@ def test_join(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('a_b_c'), u('c_d_e'), np.nan, - u('f_g_h')]) + values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')]) result = values.str.split('_').str.join('_') tm.assert_series_equal(values, result) @@ -822,8 +850,8 @@ def test_len(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('foo'), u('fooo'), u('fooooo'), np.nan, - u('fooooooo')]) + values = Series([u('foo'), u('fooo'), u('fooooo'), np.nan, u( + 'fooooooo')]) result = values.str.len() exp = values.map(lambda x: len(x) if com.notnull(x) else NA) @@ -847,8 +875,7 @@ def test_findall(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo'), - u('BAD')]) + values = Series([u('fooBAD__barBAD'), NA, u('foo'), u('BAD')]) result = values.str.findall('BAD[_]*') exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]]) @@ -886,10 +913,12 @@ def test_find(self): expected = np.array([v.rfind('EF', 3, 6) for v in values.values]) tm.assert_numpy_array_equal(result.values, expected) - with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"): + with tm.assertRaisesRegexp(TypeError, + "expected a string object, not int"): result = values.str.find(0) - with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"): + with tm.assertRaisesRegexp(TypeError, + "expected a string object, not int"): result = values.str.rfind(0) def test_find_nan(self): @@ -949,7 +978,8 @@ def test_index(self): with tm.assertRaisesRegexp(ValueError, "substring not found"): result = s.str.index('DE') - with tm.assertRaisesRegexp(TypeError, "expected a string object, not int"): + with tm.assertRaisesRegexp(TypeError, + "expected a string object, not int"): result = s.str.index(0) # test with nan @@ -975,8 +1005,8 @@ def test_pad(self): tm.assert_almost_equal(result, exp) # mixed - mixed = Series(['a', NA, 'b', True, datetime.today(), - 'ee', None, 1, 2.]) + mixed = Series(['a', NA, 'b', True, datetime.today(), 'ee', None, 1, 2. + ]) rs = Series(mixed).str.pad(5, side='left') xp = Series([' a', NA, ' b', NA, NA, ' ee', NA, NA, NA]) @@ -984,8 +1014,8 @@ def test_pad(self): tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) - mixed = Series(['a', NA, 'b', True, datetime.today(), - 'ee', None, 1, 2.]) + mixed = Series(['a', NA, 'b', True, datetime.today(), 'ee', None, 1, 2. + ]) rs = Series(mixed).str.pad(5, side='right') xp = Series(['a ', NA, 'b ', NA, NA, 'ee ', NA, NA, NA]) @@ -993,8 +1023,8 @@ def test_pad(self): tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) - mixed = Series(['a', NA, 'b', True, datetime.today(), - 'ee', None, 1, 2.]) + mixed = Series(['a', NA, 'b', True, datetime.today(), 'ee', None, 1, 2. + ]) rs = Series(mixed).str.pad(5, side='both') xp = Series([' a ', NA, ' b ', NA, NA, ' ee ', NA, NA, NA]) @@ -1003,22 +1033,18 @@ def test_pad(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('a'), u('b'), NA, u('c'), NA, - u('eeeeee')]) + values = Series([u('a'), u('b'), NA, u('c'), NA, u('eeeeee')]) result = values.str.pad(5, side='left') - exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, - u('eeeeee')]) + exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='right') - exp = Series([u('a '), u('b '), NA, u('c '), NA, - u('eeeeee')]) + exp = Series([u('a '), u('b '), NA, u('c '), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='both') - exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, - u('eeeeee')]) + exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_pad_fillchar(self): @@ -1037,10 +1063,12 @@ def test_pad_fillchar(self): exp = Series(['XXaXX', 'XXbXX', NA, 'XXcXX', NA, 'eeeeee']) tm.assert_almost_equal(result, exp) - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not str"): result = values.str.pad(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not int"): result = values.str.pad(5, fillchar=5) def test_translate(self): @@ -1065,7 +1093,8 @@ def test_translate(self): expected = klass(['abcde', 'abcc', 'cddd', 'cde']) tm.assert_numpy_array_equal(result, expected) else: - with tm.assertRaisesRegexp(ValueError, "deletechars is not a valid argument"): + with tm.assertRaisesRegexp( + ValueError, "deletechars is not a valid argument"): result = s.str.translate(table, deletechars='fg') # Series with non-string values @@ -1090,44 +1119,40 @@ def test_center_ljust_rjust(self): tm.assert_almost_equal(result, exp) # mixed - mixed = Series(['a', NA, 'b', True, datetime.today(), - 'c', 'eee', None, 1, 2.]) + mixed = Series(['a', NA, 'b', True, datetime.today(), 'c', 'eee', None, + 1, 2.]) rs = Series(mixed).str.center(5) - xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, - NA]) + xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, NA + ]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.ljust(5) - xp = Series(['a ', NA, 'b ', NA, NA, 'c ', 'eee ', NA, NA, - NA]) + xp = Series(['a ', NA, 'b ', NA, NA, 'c ', 'eee ', NA, NA, NA + ]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rjust(5) - xp = Series([' a', NA, ' b', NA, NA, ' c', ' eee', NA, NA, - NA]) + xp = Series([' a', NA, ' b', NA, NA, ' c', ' eee', NA, NA, NA + ]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('a'), u('b'), NA, u('c'), NA, - u('eeeeee')]) + values = Series([u('a'), u('b'), NA, u('c'), NA, u('eeeeee')]) result = values.str.center(5) - exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, - u('eeeeee')]) + exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.ljust(5) - exp = Series([u('a '), u('b '), NA, u('c '), NA, - u('eeeeee')]) + exp = Series([u('a '), u('b '), NA, u('c '), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.rjust(5) - exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, - u('eeeeee')]) + exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_center_ljust_rjust_fillchar(self): @@ -1154,22 +1179,28 @@ def test_center_ljust_rjust_fillchar(self): # If fillchar is not a charatter, normal str raises TypeError # 'aaa'.ljust(5, 'XY') # TypeError: must be char, not str - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not str"): result = values.str.center(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not str"): result = values.str.ljust(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not str"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not str"): result = values.str.rjust(5, fillchar='XY') - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not int"): result = values.str.center(5, fillchar=1) - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not int"): result = values.str.ljust(5, fillchar=1) - with tm.assertRaisesRegexp(TypeError, "fillchar must be a character, not int"): + with tm.assertRaisesRegexp(TypeError, + "fillchar must be a character, not int"): result = values.str.rjust(5, fillchar=1) def test_zfill(self): @@ -1208,11 +1239,11 @@ def test_split(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(), - None, 1, 2.]) + mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(), None, 1, + 2.]) result = mixed.str.split('_') - exp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, - NA, NA, NA]) + exp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA + ]) tm.assertIsInstance(result, Series) tm.assert_almost_equal(result, exp) @@ -1224,8 +1255,7 @@ def test_split(self): values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) result = values.str.split('_') - exp = Series([[u('a'), u('b'), u('c')], - [u('c'), u('d'), u('e')], NA, + exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, [u('f'), u('g'), u('h')]]) tm.assert_series_equal(result, exp) @@ -1235,8 +1265,7 @@ def test_split(self): # regex split values = Series([u('a,b_c'), u('c_d,e'), NA, u('f,g,h')]) result = values.str.split('[,_]') - exp = Series([[u('a'), u('b'), u('c')], - [u('c'), u('d'), u('e')], NA, + exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, [u('f'), u('g'), u('h')]]) tm.assert_series_equal(result, exp) @@ -1255,11 +1284,11 @@ def test_rsplit(self): tm.assert_series_equal(result, exp) # mixed - mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(), - None, 1, 2.]) + mixed = Series(['a_b_c', NA, 'd_e_f', True, datetime.today(), None, 1, + 2.]) result = mixed.str.rsplit('_') - exp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, - NA, NA, NA]) + exp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA + ]) tm.assertIsInstance(result, Series) tm.assert_almost_equal(result, exp) @@ -1270,8 +1299,7 @@ def test_rsplit(self): # unicode values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) result = values.str.rsplit('_') - exp = Series([[u('a'), u('b'), u('c')], - [u('c'), u('d'), u('e')], NA, + exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, [u('f'), u('g'), u('h')]]) tm.assert_series_equal(result, exp) @@ -1281,10 +1309,7 @@ def test_rsplit(self): # regex split is not supported by rsplit values = Series([u('a,b_c'), u('c_d,e'), NA, u('f,g,h')]) result = values.str.rsplit('[,_]') - exp = Series([[u('a,b_c')], - [u('c_d,e')], - NA, - [u('f,g,h')]]) + exp = Series([[u('a,b_c')], [u('c_d,e')], NA, [u('f,g,h')]]) tm.assert_series_equal(result, exp) # setting max number of splits, make sure it's from reverse @@ -1338,16 +1363,20 @@ def test_split_to_dataframe(self): s = Series(['some_equal_splits', 'with_no_nans']) with tm.assert_produces_warning(FutureWarning): result = s.str.split('_', return_type='frame') - exp = DataFrame({0: ['some', 'with'], 1: ['equal', 'no'], + exp = DataFrame({0: ['some', 'with'], + 1: ['equal', 'no'], 2: ['splits', 'nans']}) tm.assert_frame_equal(result, exp) s = Series(['some_unequal_splits', 'one_of_these_things_is_not']) with tm.assert_produces_warning(FutureWarning): result = s.str.split('_', return_type='frame') - exp = DataFrame({0: ['some', 'one'], 1: ['unequal', 'of'], - 2: ['splits', 'these'], 3: [NA, 'things'], - 4: [NA, 'is'], 5: [NA, 'not']}) + exp = DataFrame({0: ['some', 'one'], + 1: ['unequal', 'of'], + 2: ['splits', 'these'], + 3: [NA, 'things'], + 4: [NA, 'is'], + 5: [NA, 'not']}) tm.assert_frame_equal(result, exp) s = Series(['some_splits', 'with_index'], index=['preserve', 'me']) @@ -1369,15 +1398,19 @@ def test_split_to_dataframe_expand(self): s = Series(['some_equal_splits', 'with_no_nans']) result = s.str.split('_', expand=True) - exp = DataFrame({0: ['some', 'with'], 1: ['equal', 'no'], + exp = DataFrame({0: ['some', 'with'], + 1: ['equal', 'no'], 2: ['splits', 'nans']}) tm.assert_frame_equal(result, exp) s = Series(['some_unequal_splits', 'one_of_these_things_is_not']) result = s.str.split('_', expand=True) - exp = DataFrame({0: ['some', 'one'], 1: ['unequal', 'of'], - 2: ['splits', 'these'], 3: [NA, 'things'], - 4: [NA, 'is'], 5: [NA, 'not']}) + exp = DataFrame({0: ['some', 'one'], + 1: ['unequal', 'of'], + 2: ['splits', 'these'], + 3: [NA, 'things'], + 4: [NA, 'is'], + 5: [NA, 'not']}) tm.assert_frame_equal(result, exp) s = Series(['some_splits', 'with_index'], index=['preserve', 'me']) @@ -1399,15 +1432,16 @@ def test_split_to_multiindex_expand(self): idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.split('_', expand=True) - exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), - ('with', 'no', 'nans')]) + exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), ( + 'with', 'no', 'nans')]) tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 3) idx = Index(['some_unequal_splits', 'one_of_these_things_is_not']) result = idx.str.split('_', expand=True) - exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA), - ('one', 'of', 'these', 'things', 'is', 'not')]) + exp = MultiIndex.from_tuples([('some', 'unequal', 'splits', NA, NA, NA + ), ('one', 'of', 'these', 'things', + 'is', 'not')]) tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 6) @@ -1423,18 +1457,19 @@ def test_rsplit_to_dataframe_expand(self): s = Series(['some_equal_splits', 'with_no_nans']) result = s.str.rsplit('_', expand=True) - exp = DataFrame({0: ['some', 'with'], 1: ['equal', 'no'], + exp = DataFrame({0: ['some', 'with'], + 1: ['equal', 'no'], 2: ['splits', 'nans']}) tm.assert_frame_equal(result, exp) result = s.str.rsplit('_', expand=True, n=2) - exp = DataFrame({0: ['some', 'with'], 1: ['equal', 'no'], + exp = DataFrame({0: ['some', 'with'], + 1: ['equal', 'no'], 2: ['splits', 'nans']}) tm.assert_frame_equal(result, exp) result = s.str.rsplit('_', expand=True, n=1) - exp = DataFrame({0: ['some_equal', 'with_no'], - 1: ['splits', 'nans']}) + exp = DataFrame({0: ['some_equal', 'with_no'], 1: ['splits', 'nans']}) tm.assert_frame_equal(result, exp) s = Series(['some_splits', 'with_index'], index=['preserve', 'me']) @@ -1452,15 +1487,15 @@ def test_rsplit_to_multiindex_expand(self): idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.rsplit('_', expand=True) - exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), - ('with', 'no', 'nans')]) + exp = MultiIndex.from_tuples([('some', 'equal', 'splits'), ( + 'with', 'no', 'nans')]) tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 3) idx = Index(['some_equal_splits', 'with_no_nans']) result = idx.str.rsplit('_', expand=True, n=1) - exp = MultiIndex.from_tuples([('some_equal', 'splits'), - ('with_no', 'nans')]) + exp = MultiIndex.from_tuples([('some_equal', 'splits'), ('with_no', + 'nans')]) tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 2) @@ -1468,31 +1503,37 @@ def test_partition_series(self): values = Series(['a_b_c', 'c_d_e', NA, 'f_g_h']) result = values.str.partition('_', expand=False) - exp = Series([['a', '_', 'b_c'], ['c', '_', 'd_e'], NA, ['f', '_', 'g_h']]) + exp = Series([['a', '_', 'b_c'], ['c', '_', 'd_e'], NA, ['f', '_', + 'g_h']]) tm.assert_series_equal(result, exp) result = values.str.rpartition('_', expand=False) - exp = Series([['a_b', '_', 'c'], ['c_d', '_', 'e'], NA, ['f_g', '_', 'h']]) + exp = Series([['a_b', '_', 'c'], ['c_d', '_', 'e'], NA, ['f_g', '_', + 'h']]) tm.assert_series_equal(result, exp) # more than one char values = Series(['a__b__c', 'c__d__e', NA, 'f__g__h']) result = values.str.partition('__', expand=False) - exp = Series([['a', '__', 'b__c'], ['c', '__', 'd__e'], NA, ['f', '__', 'g__h']]) + exp = Series([['a', '__', 'b__c'], ['c', '__', 'd__e'], NA, ['f', '__', + 'g__h']]) tm.assert_series_equal(result, exp) result = values.str.rpartition('__', expand=False) - exp = Series([['a__b', '__', 'c'], ['c__d', '__', 'e'], NA, ['f__g', '__', 'h']]) + exp = Series([['a__b', '__', 'c'], ['c__d', '__', 'e'], NA, + ['f__g', '__', 'h']]) tm.assert_series_equal(result, exp) # None values = Series(['a b c', 'c d e', NA, 'f g h']) result = values.str.partition(expand=False) - exp = Series([['a', ' ', 'b c'], ['c', ' ', 'd e'], NA, ['f', ' ', 'g h']]) + exp = Series([['a', ' ', 'b c'], ['c', ' ', 'd e'], NA, ['f', ' ', + 'g h']]) tm.assert_series_equal(result, exp) result = values.str.rpartition(expand=False) - exp = Series([['a b', ' ', 'c'], ['c d', ' ', 'e'], NA, ['f g', ' ', 'h']]) + exp = Series([['a b', ' ', 'c'], ['c d', ' ', 'e'], NA, ['f g', ' ', + 'h']]) tm.assert_series_equal(result, exp) # Not splited @@ -1529,12 +1570,14 @@ def test_partition_index(self): values = Index(['a_b_c', 'c_d_e', 'f_g_h']) result = values.str.partition('_', expand=False) - exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', 'g_h')])) + exp = Index(np.array([('a', '_', 'b_c'), ('c', '_', 'd_e'), ('f', '_', + 'g_h')])) tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 1) result = values.str.rpartition('_', expand=False) - exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'), ('f_g', '_', 'h')])) + exp = Index(np.array([('a_b', '_', 'c'), ('c_d', '_', 'e'), ( + 'f_g', '_', 'h')])) tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 1) @@ -1598,12 +1641,12 @@ def test_slice(self): exp = Series(['foo', 'bar', NA, 'baz']) tm.assert_series_equal(result, exp) - for start, stop, step in [(0, 3, -1), (None, None, -1), - (3, 10, 2), (3, 0, -1)]: + for start, stop, step in [(0, 3, -1), (None, None, -1), (3, 10, 2), + (3, 0, -1)]: try: result = values.str.slice(start, stop, step) - expected = Series([s[start:stop:step] if not isnull(s) else NA for s in - values]) + expected = Series([s[start:stop:step] if not isnull(s) else NA + for s in values]) tm.assert_series_equal(result, expected) except: print('failed on %s:%s:%s' % (start, stop, step)) @@ -1614,19 +1657,16 @@ def test_slice(self): None, 1, 2.]) rs = Series(mixed).str.slice(2, 5) - xp = Series(['foo', NA, 'bar', NA, NA, - NA, NA, NA]) + xp = Series(['foo', NA, 'bar', NA, NA, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.slice(2, 5, -1) - xp = Series(['oof', NA, 'rab', NA, NA, - NA, NA, NA]) + xp = Series(['oof', NA, 'rab', NA, NA, NA, NA, NA]) # unicode - values = Series([u('aafootwo'), u('aabartwo'), NA, - u('aabazqux')]) + values = Series([u('aafootwo'), u('aabartwo'), NA, u('aabazqux')]) result = values.str.slice(2, 5) exp = Series([u('foo'), u('bar'), NA, u('baz')]) @@ -1637,7 +1677,8 @@ def test_slice(self): tm.assert_series_equal(result, exp) def test_slice_replace(self): - values = Series(['short', 'a bit longer', 'evenlongerthanthat', '', NA]) + values = Series(['short', 'a bit longer', 'evenlongerthanthat', '', NA + ]) exp = Series(['shrt', 'a it longer', 'evnlongerthanthat', '', NA]) result = values.str.slice_replace(2, 3) @@ -1647,11 +1688,13 @@ def test_slice_replace(self): result = values.str.slice_replace(2, 3, 'z') tm.assert_series_equal(result, exp) - exp = Series(['shzort', 'a zbit longer', 'evzenlongerthanthat', 'z', NA]) + exp = Series(['shzort', 'a zbit longer', 'evzenlongerthanthat', 'z', NA + ]) result = values.str.slice_replace(2, 2, 'z') tm.assert_series_equal(result, exp) - exp = Series(['shzort', 'a zbit longer', 'evzenlongerthanthat', 'z', NA]) + exp = Series(['shzort', 'a zbit longer', 'evzenlongerthanthat', 'z', NA + ]) result = values.str.slice_replace(2, 1, 'z') tm.assert_series_equal(result, exp) @@ -1688,34 +1731,30 @@ def test_strip_lstrip_rstrip(self): def test_strip_lstrip_rstrip_mixed(self): # mixed - mixed = Series([' aa ', NA, ' bb \t\n', True, datetime.today(), - None, 1, 2.]) + mixed = Series([' aa ', NA, ' bb \t\n', True, datetime.today(), None, + 1, 2.]) rs = Series(mixed).str.strip() - xp = Series(['aa', NA, 'bb', NA, NA, - NA, NA, NA]) + xp = Series(['aa', NA, 'bb', NA, NA, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.lstrip() - xp = Series(['aa ', NA, 'bb \t\n', NA, NA, - NA, NA, NA]) + xp = Series(['aa ', NA, 'bb \t\n', NA, NA, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rstrip() - xp = Series([' aa', NA, ' bb', NA, NA, - NA, NA, NA]) + xp = Series([' aa', NA, ' bb', NA, NA, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) def test_strip_lstrip_rstrip_unicode(self): # unicode - values = Series([u(' aa '), u(' bb \n'), NA, - u('cc ')]) + values = Series([u(' aa '), u(' bb \n'), NA, u('cc ')]) result = values.str.strip() exp = Series([u('aa'), u('bb'), NA, u('cc')]) @@ -1745,8 +1784,7 @@ def test_strip_lstrip_rstrip_args(self): assert_series_equal(rs, xp) def test_strip_lstrip_rstrip_args_unicode(self): - values = Series([u('xxABCxx'), u('xx BNSD'), - u('LDFJH xx')]) + values = Series([u('xxABCxx'), u('xx BNSD'), u('LDFJH xx')]) rs = values.str.strip(u('x')) xp = Series(['ABC', ' BNSD', 'LDFJH ']) @@ -1763,26 +1801,25 @@ def test_strip_lstrip_rstrip_args_unicode(self): def test_wrap(self): # test values are: two words less than width, two words equal to width, # two words greater than width, one word less than width, one word - # equal to width, one word greater than width, multiple tokens with trailing - # whitespace equal to width - values = Series([u('hello world'), u('hello world!'), - u('hello world!!'), u('abcdefabcde'), - u('abcdefabcdef'), u('abcdefabcdefa'), - u('ab ab ab ab '), u('ab ab ab ab a'), - u('\t')]) + # equal to width, one word greater than width, multiple tokens with + # trailing whitespace equal to width + values = Series([u('hello world'), u('hello world!'), u( + 'hello world!!'), u('abcdefabcde'), u('abcdefabcdef'), u( + 'abcdefabcdefa'), u('ab ab ab ab '), u('ab ab ab ab a'), u( + '\t')]) # expected values - xp = Series([u('hello world'), u('hello world!'), - u('hello\nworld!!'), u('abcdefabcde'), - u('abcdefabcdef'), u('abcdefabcdef\na'), - u('ab ab ab ab'), u('ab ab ab ab\na'), - u('')]) + xp = Series([u('hello world'), u('hello world!'), u('hello\nworld!!'), + u('abcdefabcde'), u('abcdefabcdef'), u('abcdefabcdef\na'), + u('ab ab ab ab'), u('ab ab ab ab\na'), u('')]) rs = values.str.wrap(12, break_long_words=True) assert_series_equal(rs, xp) - # test with pre and post whitespace (non-unicode), NaN, and non-ascii Unicode - values = Series([' pre ', np.nan, u('\xac\u20ac\U00008000 abadcafe')]) + # test with pre and post whitespace (non-unicode), NaN, and non-ascii + # Unicode + values = Series([' pre ', np.nan, u('\xac\u20ac\U00008000 abadcafe') + ]) xp = Series([' pre', NA, u('\xac\u20ac\U00008000 ab\nadcafe')]) rs = values.str.wrap(6) assert_series_equal(rs, xp) @@ -1795,19 +1832,17 @@ def test_get(self): tm.assert_series_equal(result, expected) # mixed - mixed = Series(['a_b_c', NA, 'c_d_e', True, datetime.today(), - None, 1, 2.]) + mixed = Series(['a_b_c', NA, 'c_d_e', True, datetime.today(), None, 1, + 2.]) rs = Series(mixed).str.split('_').str.get(1) - xp = Series(['b', NA, 'd', NA, NA, - NA, NA, NA]) + xp = Series(['b', NA, 'd', NA, NA, NA, NA, NA]) tm.assertIsInstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u('a_b_c'), u('c_d_e'), np.nan, - u('f_g_h')]) + values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')]) result = values.str.split('_').str.get(1) expected = Series([u('b'), u('d'), np.nan, u('g')]) @@ -1815,8 +1850,6 @@ def test_get(self): def test_more_contains(self): # PR #1179 - import re - s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA', 'dog', 'cat']) @@ -1826,8 +1859,8 @@ def test_more_contains(self): assert_series_equal(result, expected) result = s.str.contains('a', case=False) - expected = Series([True, False, False, True, True, False, np.nan, - True, False, True]) + expected = Series([True, False, False, True, True, False, np.nan, True, + False, True]) assert_series_equal(result, expected) result = s.str.contains('Aa') @@ -1847,9 +1880,8 @@ def test_more_contains(self): def test_more_replace(self): # PR #1179 - import re - s = Series(['A', 'B', 'C', 'Aaba', 'Baca', - '', NA, 'CABA', 'dog', 'cat']) + s = Series(['A', 'B', 'C', 'Aaba', 'Baca', '', NA, 'CABA', + 'dog', 'cat']) result = s.str.replace('A', 'YYY') expected = Series(['YYY', 'B', 'C', 'YYYaba', 'Baca', '', NA, @@ -1867,8 +1899,8 @@ def test_more_replace(self): assert_series_equal(result, expected) def test_string_slice_get_syntax(self): - s = Series(['YYY', 'B', 'C', 'YYYYYYbYYY', 'BYYYcYYY', NA, - 'CYYYBYYY', 'dog', 'cYYYt']) + s = Series(['YYY', 'B', 'C', 'YYYYYYbYYY', 'BYYYcYYY', NA, 'CYYYBYYY', + 'dog', 'cYYYt']) result = s.str[0] expected = s.str.get(0) @@ -1883,7 +1915,7 @@ def test_string_slice_get_syntax(self): assert_series_equal(result, expected) def test_string_slice_out_of_bounds(self): - s = Series([(1, 2), (1,), (3,4,5)]) + s = Series([(1, 2), (1, ), (3, 4, 5)]) result = s.str[1] expected = Series([2, np.nan, 4]) @@ -1896,14 +1928,17 @@ def test_string_slice_out_of_bounds(self): assert_series_equal(result, expected) def test_match_findall_flags(self): - data = {'Dave': 'dave@google.com', 'Steve': 'steve@gmail.com', - 'Rob': 'rob@gmail.com', 'Wes': np.nan} + data = {'Dave': 'dave@google.com', + 'Steve': 'steve@gmail.com', + 'Rob': 'rob@gmail.com', + 'Wes': np.nan} data = Series(data) - pat = pattern = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' + pat = r'([A-Z0-9._%+-]+)@([A-Z0-9.-]+)\.([A-Z]{2,4})' with tm.assert_produces_warning(FutureWarning): result = data.str.match(pat, flags=re.IGNORECASE) + self.assertEqual(result[0], ('dave', 'google', 'com')) result = data.str.findall(pat, flags=re.IGNORECASE) @@ -1929,8 +1964,7 @@ def test_encode_decode(self): def test_encode_decode_errors(self): encodeBase = Series([u('a'), u('b'), u('a\x9d')]) - self.assertRaises(UnicodeEncodeError, - encodeBase.str.encode, 'cp1252') + self.assertRaises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') f = lambda x: x.encode('cp1252', 'ignore') result = encodeBase.str.encode('cp1252', 'ignore') @@ -1939,8 +1973,7 @@ def test_encode_decode_errors(self): decodeBase = Series([b'a', b'b', b'a\x9d']) - self.assertRaises(UnicodeDecodeError, - decodeBase.str.decode, 'cp1252') + self.assertRaises(UnicodeDecodeError, decodeBase.str.decode, 'cp1252') f = lambda x: x.decode('cp1252', 'ignore') result = decodeBase.str.decode('cp1252', 'ignore') @@ -1973,8 +2006,8 @@ def test_normalize(self): tm.assert_index_equal(result, expected) def test_cat_on_filtered_index(self): - df = DataFrame(index=MultiIndex.from_product([[2011, 2012], [1,2,3]], - names=['year', 'month'])) + df = DataFrame(index=MultiIndex.from_product( + [[2011, 2012], [1, 2, 3]], names=['year', 'month'])) df = df.reset_index() df = df[df.month > 1] @@ -1989,21 +2022,18 @@ def test_cat_on_filtered_index(self): self.assertEqual(str_multiple.loc[1], '2011 2 2') - def test_index_str_accessor_visibility(self): from pandas.core.strings import StringMethods if not compat.PY3: - cases = [(['a', 'b'], 'string'), - (['a', u('b')], 'mixed'), + cases = [(['a', 'b'], 'string'), (['a', u('b')], 'mixed'), ([u('a'), u('b')], 'unicode'), (['a', 'b', 1], 'mixed-integer'), (['a', 'b', 1.3], 'mixed'), (['a', 'b', 1.3, 1], 'mixed-integer'), (['aa', datetime(2011, 1, 1)], 'mixed')] else: - cases = [(['a', 'b'], 'string'), - (['a', u('b')], 'string'), + cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'), ([u('a'), u('b')], 'string'), (['a', 'b', 1], 'mixed-integer'), (['a', 'b', 1.3], 'mixed'), @@ -2043,7 +2073,8 @@ def test_index_str_accessor_visibility(self): def test_str_accessor_no_new_attributes(self): # https://github.com/pydata/pandas/issues/10673 s = Series(list('aabbcde')) - with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"): + with tm.assertRaisesRegexp(AttributeError, + "You cannot add any new attribute"): s.str.xlabel = "a" def test_method_on_bytes(self): @@ -2053,8 +2084,8 @@ def test_method_on_bytes(self): self.assertRaises(TypeError, lhs.str.cat, rhs) else: result = lhs.str.cat(rhs) - expected = Series(np.array(['ad', 'be', 'cf'], - 'S2').astype(object)) + expected = Series(np.array( + ['ad', 'be', 'cf'], 'S2').astype(object)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_style.py b/pandas/tests/test_style.py index 486f997f9a7c8..fd8540fdf9c0a 100644 --- a/pandas/tests/test_style.py +++ b/pandas/tests/test_style.py @@ -230,39 +230,53 @@ def test_bar(self): def test_bar_0points(self): df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) result = df.style.bar()._compute().ctx - expected = {(0, 0): ['width: 10em', ' height: 80%'], - (0, 1): ['width: 10em', ' height: 80%'], - (0, 2): ['width: 10em', ' height: 80%'], - (1, 0): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, transparent 0%)'], - (1, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, transparent 0%)'], - (1, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, transparent 0%)'], - (2, 0): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, transparent 0%)'], - (2, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, transparent 0%)'], - (2, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, transparent 0%)']} + expected = { + (0, 0): ['width: 10em', ' height: 80%'], + (0, 1): ['width: 10em', ' height: 80%'], + (0, 2): ['width: 10em', ' height: 80%'], + (1, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%, ' + 'transparent 0%)'], + (1, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%, ' + 'transparent 0%)'], + (1, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%, ' + 'transparent 0%)'], + (2, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%, ' + 'transparent 0%)'], + (2, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%, ' + 'transparent 0%)'], + (2, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%, ' + 'transparent 0%)']} self.assertEqual(result, expected) result = df.style.bar(axis=1)._compute().ctx - expected = {(0, 0): ['width: 10em', ' height: 80%'], - (0, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, transparent 0%)'], - (0, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, transparent 0%)'], - (1, 0): ['width: 10em', ' height: 80%'], - (1, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, transparent 0%)'], - (1, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, transparent 0%)'], - (2, 0): ['width: 10em', ' height: 80%'], - (2, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, transparent 0%)'], - (2, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, transparent 0%)']} + expected = { + (0, 0): ['width: 10em', ' height: 80%'], + (0, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%, ' + 'transparent 0%)'], + (0, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%, ' + 'transparent 0%)'], + (1, 0): ['width: 10em', ' height: 80%'], + (1, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%, ' + 'transparent 0%)'], + (1, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%, ' + 'transparent 0%)'], + (2, 0): ['width: 10em', ' height: 80%'], + (2, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%, ' + 'transparent 0%)'], + (2, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%, ' + 'transparent 0%)']} self.assertEqual(result, expected) def test_highlight_null(self, null_color='red'): diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 58c4285b8394e..7c3ba2ee8b556 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -2,22 +2,22 @@ # -*- coding: utf-8 -*- import pandas as pd import unittest -import warnings import nose import numpy as np import sys from pandas import Series, DataFrame import pandas.util.testing as tm -from pandas.util.testing import ( - assert_almost_equal, assertRaisesRegexp, raise_with_traceback, - assert_index_equal, assert_series_equal, assert_frame_equal, - assert_numpy_array_equal, assert_isinstance, RNGContext, - assertRaises, skip_if_no_package_deco -) +from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, + raise_with_traceback, assert_index_equal, + assert_series_equal, assert_frame_equal, + assert_numpy_array_equal, + RNGContext, assertRaises, + skip_if_no_package_deco) from pandas.compat import is_platform_windows # let's get meta. + class TestAssertAlmostEqual(tm.TestCase): _multiprocess_can_split_ = True @@ -50,7 +50,7 @@ def test_assert_almost_equal_numbers_with_zeros(self): def test_assert_almost_equal_numbers_with_mixed(self): self._assert_not_almost_equal_both(1, 'abc') - self._assert_not_almost_equal_both(1, [1,]) + self._assert_not_almost_equal_both(1, [1, ]) self._assert_not_almost_equal_both(1, object()) def test_assert_almost_equal_edge_case_ndarrays(self): @@ -68,12 +68,13 @@ def test_assert_almost_equal_dicts(self): ) self._assert_not_almost_equal_both({'a': 1}, 1) self._assert_not_almost_equal_both({'a': 1}, 'abc') - self._assert_not_almost_equal_both({'a': 1}, [1,]) + self._assert_not_almost_equal_both({'a': 1}, [1, ]) def test_assert_almost_equal_dict_like_object(self): class DictLikeObj(object): + def keys(self): - return ('a',) + return ('a', ) def __getitem__(self, item): if item == 'a': @@ -89,7 +90,7 @@ def test_assert_almost_equal_strings(self): self._assert_not_almost_equal_both('abc', 'abcd') self._assert_not_almost_equal_both('abc', 'abd') self._assert_not_almost_equal_both('abc', 1) - self._assert_not_almost_equal_both('abc', [1,]) + self._assert_not_almost_equal_both('abc', [1, ]) def test_assert_almost_equal_iterables(self): self._assert_almost_equal_both([1, 2, 3], [1, 2, 3]) @@ -140,13 +141,15 @@ class TestAssertNumpyArrayEqual(tm.TestCase): def test_numpy_array_equal_message(self): if is_platform_windows(): - raise nose.SkipTest("windows has incomparable line-endings and uses L on the shape") + raise nose.SkipTest("windows has incomparable line-endings " + "and uses L on the shape") expected = """numpy array are different numpy array shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5])) @@ -167,6 +170,7 @@ def test_numpy_array_equal_message(self): First object is iterable, second isn't \\[left\\]: \\[1\\] \\[right\\]: 1""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1]), 1) with assertRaisesRegexp(AssertionError, expected): @@ -178,6 +182,7 @@ def test_numpy_array_equal_message(self): Second object is iterable, first isn't \\[left\\]: 1 \\[right\\]: \\[1\\]""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(1, np.array([1])) with assertRaisesRegexp(AssertionError, expected): @@ -188,29 +193,34 @@ def test_numpy_array_equal_message(self): numpy array values are different \\(66\\.66667 %\\) \\[left\\]: \\[nan, 2\\.0, 3\\.0\\] \\[right\\]: \\[1\\.0, nan, 3\\.0\\]""" + with assertRaisesRegexp(AssertionError, expected): - assert_numpy_array_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + assert_numpy_array_equal( + np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) with assertRaisesRegexp(AssertionError, expected): - assert_almost_equal(np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) + assert_almost_equal( + np.array([np.nan, 2, 3]), np.array([1, np.nan, 3])) expected = """numpy array are different numpy array values are different \\(50\\.0 %\\) \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([1, 3])) with assertRaisesRegexp(AssertionError, expected): assert_almost_equal(np.array([1, 2]), np.array([1, 3])) - expected = """numpy array are different numpy array values are different \\(50\\.0 %\\) \\[left\\]: \\[1\\.1, 2\\.000001\\] \\[right\\]: \\[1\\.1, 2.0\\]""" + with assertRaisesRegexp(AssertionError, expected): - assert_numpy_array_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) + assert_numpy_array_equal( + np.array([1.1, 2.000001]), np.array([1.1, 2.0])) # must pass assert_almost_equal(np.array([1.1, 2.000001]), np.array([1.1, 2.0])) @@ -220,6 +230,7 @@ def test_numpy_array_equal_message(self): numpy array values are different \\(16\\.66667 %\\) \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\], \\[5, 6\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\], \\[5, 6\\]\\]""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4], [5, 6]]), np.array([[1, 3], [3, 4], [5, 6]])) @@ -232,6 +243,7 @@ def test_numpy_array_equal_message(self): numpy array values are different \\(25\\.0 %\\) \\[left\\]: \\[\\[1, 2\\], \\[3, 4\\]\\] \\[right\\]: \\[\\[1, 3\\], \\[3, 4\\]\\]""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([[1, 2], [3, 4]]), np.array([[1, 3], [3, 4]])) @@ -245,6 +257,7 @@ def test_numpy_array_equal_message(self): Index shapes are different \\[left\\]: \\(2,\\) \\[right\\]: \\(3,\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_numpy_array_equal(np.array([1, 2]), np.array([3, 4, 5]), obj='Index') @@ -259,6 +272,7 @@ def test_assert_almost_equal_iterable_message(self): Iterable length are different \\[left\\]: 2 \\[right\\]: 3""" + with assertRaisesRegexp(AssertionError, expected): assert_almost_equal([1, 2], [3, 4, 5]) @@ -267,6 +281,7 @@ def test_assert_almost_equal_iterable_message(self): Iterable values are different \\(50\\.0 %\\) \\[left\\]: \\[1, 2\\] \\[right\\]: \\[1, 3\\]""" + with assertRaisesRegexp(AssertionError, expected): assert_almost_equal([1, 2], [1, 3]) @@ -282,20 +297,23 @@ def test_index_equal_message(self): \\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\], labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)""" + idx1 = pd.Index([1, 2, 3]) - idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), - ('B', 3), ('B', 4)]) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4 + )]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, exact=False) - expected = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) \\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" - idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)]) - idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) + + idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4 + )]) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4 + )]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): @@ -306,6 +324,7 @@ def test_index_equal_message(self): Index length are different \\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" + idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3, 4]) with assertRaisesRegexp(AssertionError, expected): @@ -318,6 +337,7 @@ def test_index_equal_message(self): Index classes are different \\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)""" + idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3.0]) with assertRaisesRegexp(AssertionError, expected): @@ -330,6 +350,7 @@ def test_index_equal_message(self): Index values are different \\(33\\.33333 %\\) \\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) \\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" + idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0000000001]) with assertRaisesRegexp(AssertionError, expected): @@ -343,6 +364,7 @@ def test_index_equal_message(self): Index values are different \\(33\\.33333 %\\) \\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) \\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" + idx1 = pd.Index([1, 2, 3.]) idx2 = pd.Index([1, 2, 3.0001]) with assertRaisesRegexp(AssertionError, expected): @@ -350,13 +372,15 @@ def test_index_equal_message(self): with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, check_exact=False) # must success - assert_index_equal(idx1, idx2, check_exact=False, check_less_precise=True) + assert_index_equal(idx1, idx2, check_exact=False, + check_less_precise=True) expected = """Index are different Index values are different \\(33\\.33333 %\\) \\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" + idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 4]) with assertRaisesRegexp(AssertionError, expected): @@ -369,8 +393,11 @@ def test_index_equal_message(self): MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) \\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) \\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" - idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4)]) - idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) + + idx1 = pd.MultiIndex.from_tuples([('A', 2), ('A', 2), ('B', 3), ('B', 4 + )]) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4 + )]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2) with assertRaisesRegexp(AssertionError, expected): @@ -383,6 +410,7 @@ def test_index_equal_metadata_message(self): Attribute "names" are different \\[left\\]: \\[None\\] \\[right\\]: \\[u?'x'\\]""" + idx1 = pd.Index([1, 2, 3]) idx2 = pd.Index([1, 2, 3], name='x') with assertRaisesRegexp(AssertionError, expected): @@ -390,16 +418,16 @@ def test_index_equal_metadata_message(self): # same name, should pass assert_index_equal(pd.Index([1, 2, 3], name=np.nan), - pd.Index([1, 2, 3], name=np.nan)) + pd.Index([1, 2, 3], name=np.nan)) assert_index_equal(pd.Index([1, 2, 3], name=pd.NaT), - pd.Index([1, 2, 3], name=pd.NaT)) - + pd.Index([1, 2, 3], name=pd.NaT)) expected = """Index are different Attribute "names" are different \\[left\\]: \\[nan\\] \\[right\\]: \\[NaT\\]""" + idx1 = pd.Index([1, 2, 3], name=np.nan) idx2 = pd.Index([1, 2, 3], name=pd.NaT) with assertRaisesRegexp(AssertionError, expected): @@ -410,59 +438,65 @@ class TestAssertSeriesEqual(tm.TestCase): _multiprocess_can_split_ = True def _assert_equal(self, x, y, **kwargs): - assert_series_equal(x,y,**kwargs) - assert_series_equal(y,x,**kwargs) + assert_series_equal(x, y, **kwargs) + assert_series_equal(y, x, **kwargs) def _assert_not_equal(self, a, b, **kwargs): self.assertRaises(AssertionError, assert_series_equal, a, b, **kwargs) self.assertRaises(AssertionError, assert_series_equal, b, a, **kwargs) def test_equal(self): - self._assert_equal(Series(range(3)),Series(range(3))) - self._assert_equal(Series(list('abc')),Series(list('abc'))) + self._assert_equal(Series(range(3)), Series(range(3))) + self._assert_equal(Series(list('abc')), Series(list('abc'))) def test_not_equal(self): - self._assert_not_equal(Series(range(3)),Series(range(3))+1) - self._assert_not_equal(Series(list('abc')),Series(list('xyz'))) - self._assert_not_equal(Series(range(3)),Series(range(4))) - self._assert_not_equal(Series(range(3)),Series(range(3),dtype='float64')) - self._assert_not_equal(Series(range(3)),Series(range(3),index=[1,2,4])) + self._assert_not_equal(Series(range(3)), Series(range(3)) + 1) + self._assert_not_equal(Series(list('abc')), Series(list('xyz'))) + self._assert_not_equal(Series(range(3)), Series(range(4))) + self._assert_not_equal( + Series(range(3)), Series( + range(3), dtype='float64')) + self._assert_not_equal( + Series(range(3)), Series( + range(3), index=[1, 2, 4])) # ATM meta data is not checked in assert_series_equal # self._assert_not_equal(Series(range(3)),Series(range(3),name='foo'),check_names=True) def test_less_precise(self): - s1 = Series([0.12345],dtype='float64') - s2 = Series([0.12346],dtype='float64') + s1 = Series([0.12345], dtype='float64') + s2 = Series([0.12346], dtype='float64') self.assertRaises(AssertionError, assert_series_equal, s1, s2) - self._assert_equal(s1,s2,check_less_precise=True) + self._assert_equal(s1, s2, check_less_precise=True) - s1 = Series([0.12345],dtype='float32') - s2 = Series([0.12346],dtype='float32') + s1 = Series([0.12345], dtype='float32') + s2 = Series([0.12346], dtype='float32') self.assertRaises(AssertionError, assert_series_equal, s1, s2) - self._assert_equal(s1,s2,check_less_precise=True) + self._assert_equal(s1, s2, check_less_precise=True) # even less than less precise - s1 = Series([0.1235],dtype='float32') - s2 = Series([0.1236],dtype='float32') + s1 = Series([0.1235], dtype='float32') + s2 = Series([0.1236], dtype='float32') self.assertRaises(AssertionError, assert_series_equal, s1, s2) self.assertRaises(AssertionError, assert_series_equal, s1, s2, True) def test_index_dtype(self): df1 = DataFrame.from_records( - {'a':[1,2],'c':['l1','l2']}, index=['a']) + {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a']) df2 = DataFrame.from_records( - {'a':[1.0,2.0],'c':['l1','l2']}, index=['a']) + {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a']) self._assert_not_equal(df1.c, df2.c, check_index_type=True) def test_multiindex_dtype(self): df1 = DataFrame.from_records( - {'a':[1,2],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b']) + {'a': [1, 2], 'b': [2.1, 1.5], + 'c': ['l1', 'l2']}, index=['a', 'b']) df2 = DataFrame.from_records( - {'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b']) + {'a': [1.0, 2.0], 'b': [2.1, 1.5], + 'c': ['l1', 'l2']}, index=['a', 'b']) self._assert_not_equal(df1.c, df2.c, check_index_type=True) def test_series_equal_message(self): @@ -472,28 +506,29 @@ def test_series_equal_message(self): Series length are different \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) - expected = """Series are different Series values are different \\(33\\.33333 %\\) \\[left\\]: \\[1, 2, 3\\] \\[right\\]: \\[1, 2, 4\\]""" + with assertRaisesRegexp(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4])) with assertRaisesRegexp(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 4]), - check_less_precise=True) + check_less_precise=True) class TestAssertFrameEqual(tm.TestCase): _multiprocess_can_split_ = True def _assert_equal(self, x, y, **kwargs): - assert_frame_equal(x,y,**kwargs) - assert_frame_equal(y,x,**kwargs) + assert_frame_equal(x, y, **kwargs) + assert_frame_equal(y, x, **kwargs) def _assert_not_equal(self, a, b, **kwargs): self.assertRaises(AssertionError, assert_frame_equal, a, b, **kwargs) @@ -501,22 +536,24 @@ def _assert_not_equal(self, a, b, **kwargs): def test_index_dtype(self): df1 = DataFrame.from_records( - {'a':[1,2],'c':['l1','l2']}, index=['a']) + {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a']) df2 = DataFrame.from_records( - {'a':[1.0,2.0],'c':['l1','l2']}, index=['a']) + {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a']) self._assert_not_equal(df1, df2, check_index_type=True) def test_multiindex_dtype(self): df1 = DataFrame.from_records( - {'a':[1,2],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b']) + {'a': [1, 2], 'b': [2.1, 1.5], + 'c': ['l1', 'l2']}, index=['a', 'b']) df2 = DataFrame.from_records( - {'a':[1.0,2.0],'b':[2.1,1.5],'c':['l1','l2']}, index=['a','b']) + {'a': [1.0, 2.0], 'b': [2.1, 1.5], + 'c': ['l1', 'l2']}, index=['a', 'b']) self._assert_not_equal(df1, df2, check_index_type=True) def test_empty_dtypes(self): - df1=pd.DataFrame(columns=["col1","col2"]) + df1 = pd.DataFrame(columns=["col1", "col2"]) df1["col1"] = df1["col1"].astype('int64') - df2=pd.DataFrame(columns=["col1","col2"]) + df2 = pd.DataFrame(columns=["col1", "col2"]) self._assert_equal(df1, df2, check_dtype=False) self._assert_not_equal(df1, df2, check_dtype=True) @@ -527,6 +564,7 @@ def test_frame_equal_message(self): DataFrame shape \\(number of rows\\) are different \\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) \\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), pd.DataFrame({'A': [1, 2, 3, 4]})) @@ -536,6 +574,7 @@ def test_frame_equal_message(self): DataFrame shape \\(number of columns\\) are different \\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), pd.DataFrame({'A': [1, 2, 3]})) @@ -545,6 +584,7 @@ def test_frame_equal_message(self): DataFrame\\.index values are different \\(33\\.33333 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), @@ -556,6 +596,7 @@ def test_frame_equal_message(self): DataFrame\\.columns values are different \\(50\\.0 %\\) \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" + with assertRaisesRegexp(AssertionError, expected): assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=['a', 'b', 'c']), @@ -567,14 +608,15 @@ def test_frame_equal_message(self): DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\) \\[left\\]: \\[4, 5, 6\\] \\[right\\]: \\[4, 5, 7\\]""" + with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}), - pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 7]})) + assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), + pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]})) with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}), - pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 7]}), - by_blocks=True) + assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), + pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 7]}), + by_blocks=True) class TestRNGContext(unittest.TestCase): @@ -589,7 +631,6 @@ def test_RNGContext(self): self.assertEqual(np.random.randn(), expected0) - class TestDeprecatedTests(tm.TestCase): def test_warning(self): @@ -617,15 +658,17 @@ class TestLocale(tm.TestCase): def test_locale(self): if sys.platform == 'win32': - raise nose.SkipTest("skipping on win platforms as locale not available") + raise nose.SkipTest( + "skipping on win platforms as locale not available") - #GH9744 + # GH9744 locales = tm.get_locales() self.assertTrue(len(locales) >= 1) def test_skiptest_deco(): from nose import SkipTest + @skip_if_no_package_deco("fakepackagename") def f(): pass diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 72318f8073595..8422759192cc3 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import nose from numpy import nan import numpy as np from pandas import Index, isnull, Timestamp @@ -11,7 +10,6 @@ import pandas.algos as algos from pandas.core import common as com import datetime -from pandas import DateOffset class TestTseriesUtil(tm.TestCase): @@ -72,7 +70,7 @@ def test_left_join_indexer_unique(): result = algos.left_join_indexer_unique_int64(b, a) expected = np.array([1, 1, 2, 3, 3], dtype=np.int64) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) def test_left_outer_join_bug(): @@ -93,8 +91,8 @@ def test_left_outer_join_bug(): exp_ridx[left == 1] = 1 exp_ridx[left == 3] = 0 - assert(np.array_equal(lidx, exp_lidx)) - assert(np.array_equal(ridx, exp_ridx)) + assert (np.array_equal(lidx, exp_lidx)) + assert (np.array_equal(ridx, exp_ridx)) def test_inner_join_indexer(): @@ -218,22 +216,29 @@ def test_is_lexsorted(): np.array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), np.array([30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, - 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, + 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, + 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, - 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, + 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, + 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 30, 29, 28, 27, 26, 25, 24, 23, 22, - 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, + 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, + 6, 5, 4, 3, 2, 1, 0])] - assert(not algos.is_lexsorted(failure)) + assert (not algos.is_lexsorted(failure)) # def test_get_group_index(): # a = np.array([0, 1, 2, 0, 2, 1, 0, 0], dtype=np.int64) @@ -253,20 +258,20 @@ def test_groupsort_indexer(): # need to use a stable sort expected = np.argsort(a, kind='mergesort') - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) # compare with lexsort key = a * 1000 + b result = algos.groupsort_indexer(key, 1000000)[0] expected = np.lexsort((b, a)) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) def test_ensure_platform_int(): arr = np.arange(100) result = algos.ensure_platform_int(arr) - assert(result is arr) + assert (result is arr) def test_duplicated_with_nas(): @@ -274,19 +279,19 @@ def test_duplicated_with_nas(): result = lib.duplicated(keys) expected = [False, False, False, True, False, True] - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = lib.duplicated(keys, keep='first') expected = [False, False, False, True, False, True] - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = lib.duplicated(keys, keep='last') expected = [True, False, True, False, False, False] - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = lib.duplicated(keys, keep=False) expected = [True, False, True, True, False, True] - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) keys = np.empty(8, dtype=object) for i, t in enumerate(zip([0, 0, nan, nan] * 2, [0, nan, 0, nan] * 2)): @@ -296,40 +301,40 @@ def test_duplicated_with_nas(): falses = [False] * 4 trues = [True] * 4 expected = falses + trues - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = lib.duplicated(keys, keep='last') expected = trues + falses - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = lib.duplicated(keys, keep=False) expected = trues + trues - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) def test_maybe_booleans_to_slice(): arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8) result = lib.maybe_booleans_to_slice(arr) - assert(result.dtype == np.bool_) + assert (result.dtype == np.bool_) result = lib.maybe_booleans_to_slice(arr[:0]) - assert(result == slice(0, 0)) + assert (result == slice(0, 0)) def test_convert_objects(): arr = np.array(['a', 'b', nan, nan, 'd', 'e', 'f'], dtype='O') result = lib.maybe_convert_objects(arr) - assert(result.dtype == np.object_) + assert (result.dtype == np.object_) def test_convert_infs(): arr = np.array(['inf', 'inf', 'inf'], dtype='O') result = lib.maybe_convert_numeric(arr, set(), False) - assert(result.dtype == np.float64) + assert (result.dtype == np.float64) arr = np.array(['-inf', '-inf', '-inf'], dtype='O') result = lib.maybe_convert_numeric(arr, set(), False) - assert(result.dtype == np.float64) + assert (result.dtype == np.float64) def test_convert_objects_ints(): @@ -338,17 +343,17 @@ def test_convert_objects_ints(): for dtype_str in dtypes: arr = np.array(list(np.arange(20, dtype=dtype_str)), dtype='O') - assert(arr[0].dtype == np.dtype(dtype_str)) + assert (arr[0].dtype == np.dtype(dtype_str)) result = lib.maybe_convert_objects(arr) - assert(issubclass(result.dtype.type, np.integer)) + assert (issubclass(result.dtype.type, np.integer)) def test_convert_objects_complex_number(): for dtype in np.sctypes['complex']: arr = np.array(list(1j * np.arange(20, dtype=dtype)), dtype='O') - assert(arr[0].dtype == np.dtype(dtype)) + assert (arr[0].dtype == np.dtype(dtype)) result = lib.maybe_convert_objects(arr) - assert(issubclass(result.dtype.type, np.complexfloating)) + assert (issubclass(result.dtype.type, np.complexfloating)) def test_rank(): @@ -372,7 +377,7 @@ def test_get_reverse_indexer(): indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.int64) result = lib.get_reverse_indexer(indexer, 5) expected = np.array([4, 2, 3, 6, 7], dtype=np.int64) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) def test_pad_backfill_object_segfault(): @@ -382,25 +387,25 @@ def test_pad_backfill_object_segfault(): result = algos.pad_object(old, new) expected = np.array([-1], dtype=np.int64) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = algos.pad_object(new, old) expected = np.array([], dtype=np.int64) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = algos.backfill_object(old, new) expected = np.array([-1], dtype=np.int64) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) result = algos.backfill_object(new, old) expected = np.array([], dtype=np.int64) - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) def test_arrmap(): values = np.array(['foo', 'foo', 'bar', 'bar', 'baz', 'qux'], dtype='O') result = algos.arrmap_object(values, lambda x: x in ['foo', 'bar']) - assert(result.dtype == np.bool_) + assert (result.dtype == np.bool_) def test_series_grouper(): @@ -452,40 +457,40 @@ def test_generate_bins(self): for func in [lib.generate_bins_dt64, generate_bins_generic]: bins = func(values, binner, closed='left') - assert((bins == np.array([2, 5, 6])).all()) + assert ((bins == np.array([2, 5, 6])).all()) bins = func(values, binner, closed='right') - assert((bins == np.array([3, 6, 6])).all()) + assert ((bins == np.array([3, 6, 6])).all()) for func in [lib.generate_bins_dt64, generate_bins_generic]: values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64) binner = np.array([0, 3, 6], dtype=np.int64) bins = func(values, binner, closed='right') - assert((bins == np.array([3, 6])).all()) + assert ((bins == np.array([3, 6])).all()) self.assertRaises(ValueError, generate_bins_generic, values, [], 'right') self.assertRaises(ValueError, generate_bins_generic, values[:0], binner, 'right') - self.assertRaises(ValueError, generate_bins_generic, - values, [4], 'right') - self.assertRaises(ValueError, generate_bins_generic, - values, [-3, -1], 'right') + self.assertRaises(ValueError, generate_bins_generic, values, [4], + 'right') + self.assertRaises(ValueError, generate_bins_generic, values, [-3, -1], + 'right') def test_group_ohlc(): - def _check(dtype): - obj = np.array(np.random.randn(20),dtype=dtype) + obj = np.array(np.random.randn(20), dtype=dtype) bins = np.array([6, 12, 20]) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - labels = com._ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins]))) + labels = com._ensure_int64(np.repeat( + np.arange(3), np.diff(np.r_[0, bins]))) - func = getattr(algos,'group_ohlc_%s' % dtype) + func = getattr(algos, 'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], labels) def _ohlc(group): @@ -493,8 +498,8 @@ def _ohlc(group): return np.repeat(nan, 4) return [group[0], group.max(), group.min(), group[-1]] - expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), - _ohlc(obj[12:])]) + expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:]) + ]) assert_almost_equal(out, expected) assert_almost_equal(counts, [6, 6, 8]) @@ -507,6 +512,7 @@ def _ohlc(group): _check('float32') _check('float64') + def test_try_parse_dates(): from dateutil.parser import parse @@ -514,7 +520,7 @@ def test_try_parse_dates(): result = lib.try_parse_dates(arr, dayfirst=True) expected = [parse(d, dayfirst=True) for d in arr] - assert(np.array_equal(result, expected)) + assert (np.array_equal(result, expected)) class TestTypeInference(tm.TestCase): @@ -532,8 +538,7 @@ def test_integers(self): result = lib.infer_dtype(arr) self.assertEqual(result, 'integer') - arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], - dtype='O') + arr = np.array([1, 2, 3, np.int64(4), np.int32(5), 'foo'], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'mixed-integer') @@ -605,7 +610,7 @@ def test_to_object_array_tuples(self): record = namedtuple('record', 'x y') r = record(5, 6) values = [r] - result = lib.to_object_array_tuples(values) + result = lib.to_object_array_tuples(values) # noqa except ImportError: pass @@ -613,7 +618,7 @@ def test_object(self): # GH 7431 # cannot infer more than this as only a single element - arr = np.array([None],dtype='O') + arr = np.array([None], dtype='O') result = lib.infer_dtype(arr) self.assertEqual(result, 'mixed') @@ -628,19 +633,19 @@ def test_categorical(self): result = lib.infer_dtype(Series(arr)) self.assertEqual(result, 'categorical') - arr = Categorical(list('abc'),categories=['cegfab'],ordered=True) + arr = Categorical(list('abc'), categories=['cegfab'], ordered=True) result = lib.infer_dtype(arr) self.assertEqual(result, 'categorical') result = lib.infer_dtype(Series(arr)) self.assertEqual(result, 'categorical') + class TestMoments(tm.TestCase): pass class TestReducer(tm.TestCase): - def test_int_index(self): from pandas.core.series import Series @@ -654,19 +659,19 @@ def test_int_index(self): assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(100)) - result = lib.reduce( - arr, np.sum, dummy=dummy, labels=Index(np.arange(4))) + result = lib.reduce(arr, np.sum, dummy=dummy, + labels=Index(np.arange(4))) expected = arr.sum(0) assert_almost_equal(result, expected) dummy = Series(0., index=np.arange(4)) - result = lib.reduce(arr, np.sum, axis=1, - dummy=dummy, labels=Index(np.arange(100))) + result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) expected = arr.sum(1) assert_almost_equal(result, expected) - result = lib.reduce(arr, np.sum, axis=1, - dummy=dummy, labels=Index(np.arange(100))) + result = lib.reduce(arr, np.sum, axis=1, dummy=dummy, + labels=Index(np.arange(100))) assert_almost_equal(result, expected) @@ -682,18 +687,18 @@ def test_max_valid(self): def test_to_datetime_bijective(self): # Ensure that converting to datetime and back only loses precision # by going from nanoseconds to microseconds. - self.assertEqual(Timestamp(Timestamp.max.to_pydatetime()).value/1000, Timestamp.max.value/1000) - self.assertEqual(Timestamp(Timestamp.min.to_pydatetime()).value/1000, Timestamp.min.value/1000) + self.assertEqual( + Timestamp(Timestamp.max.to_pydatetime()).value / 1000, + Timestamp.max.value / 1000) + self.assertEqual( + Timestamp(Timestamp.min.to_pydatetime()).value / 1000, + Timestamp.min.value / 1000) -class TestPeriodField(tm.TestCase): +class TestPeriodField(tm.TestCase): def test_get_period_field_raises_on_out_of_range(self): self.assertRaises(ValueError, period.get_period_field, -1, 0, 0) def test_get_period_field_array_raises_on_out_of_range(self): - self.assertRaises(ValueError, period.get_period_field_arr, -1, np.empty(1), 0) - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) + self.assertRaises(ValueError, period.get_period_field_arr, -1, + np.empty(1), 0) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 427c96a839c26..e27e45a96432f 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -5,8 +5,8 @@ import pandas.util.testing as tm - class TestDecorators(tm.TestCase): + def setUp(self): @deprecate_kwarg('old', 'new') def _f1(new=False): @@ -16,7 +16,7 @@ def _f1(new=False): def _f2(new=False): return new - @deprecate_kwarg('old', 'new', lambda x: x+1) + @deprecate_kwarg('old', 'new', lambda x: x + 1) def _f3(new=0): return new @@ -48,7 +48,7 @@ def test_callable_deprecate_kwarg(self): x = 5 with tm.assert_produces_warning(FutureWarning): result = self.f3(old=x) - self.assertEqual(result, x+1) + self.assertEqual(result, x + 1) with tm.assertRaises(TypeError): self.f3(old='hello') diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 4d7f9292705ad..d3e8320fd282d 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -1,6 +1,6 @@ +from itertools import product import nose import sys -import functools import warnings from datetime import datetime @@ -10,18 +10,20 @@ from distutils.version import LooseVersion import pandas as pd -from pandas import Series, DataFrame, Panel, bdate_range, isnull, notnull, concat -from pandas.util.testing import ( - assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal, assert_index_equal -) +from pandas import (Series, DataFrame, Panel, bdate_range, isnull, + notnull, concat) +from pandas.util.testing import (assert_almost_equal, assert_series_equal, + assert_frame_equal, assert_panel_equal, + assert_index_equal) import pandas.core.datetools as datetools import pandas.stats.moments as mom import pandas.core.window as rwindow import pandas.util.testing as tm -from pandas.compat import range, zip, PY3, StringIO +from pandas.compat import range, zip, PY3 N, K = 100, 10 + class Base(tm.TestCase): _multiprocess_can_split_ = True @@ -39,6 +41,7 @@ def _create_data(self): self.frame = DataFrame(randn(N, K), index=self.rng, columns=np.arange(K)) + class TestApi(Base): def setUp(self): @@ -47,17 +50,19 @@ def setUp(self): def test_getitem(self): r = self.frame.rolling(window=5) - tm.assert_index_equal(r._selected_obj.columns,self.frame.columns) + tm.assert_index_equal(r._selected_obj.columns, self.frame.columns) r = self.frame.rolling(window=5)[1] - self.assertEqual(r._selected_obj.name,self.frame.columns[1]) + self.assertEqual(r._selected_obj.name, self.frame.columns[1]) # technically this is allowed - r = self.frame.rolling(window=5)[1,3] - tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + r = self.frame.rolling(window=5)[1, 3] + tm.assert_index_equal(r._selected_obj.columns, + self.frame.columns[[1, 3]]) - r = self.frame.rolling(window=5)[[1,3]] - tm.assert_index_equal(r._selected_obj.columns,self.frame.columns[[1,3]]) + r = self.frame.rolling(window=5)[[1, 3]] + tm.assert_index_equal(r._selected_obj.columns, + self.frame.columns[[1, 3]]) def test_select_bad_cols(self): df = DataFrame([[1, 2]], columns=['A', 'B']) @@ -74,37 +79,39 @@ def test_attribute_access(self): df = DataFrame([[1, 2]], columns=['A', 'B']) r = df.rolling(window=5) - tm.assert_series_equal(r.A.sum(),r['A'].sum()) - self.assertRaises(AttributeError, lambda : r.F) + tm.assert_series_equal(r.A.sum(), r['A'].sum()) + self.assertRaises(AttributeError, lambda: r.F) def tests_skip_nuisance(self): - df = DataFrame({'A' : range(5), 'B' : range(5,10), 'C' : 'foo'}) + df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'}) r = df.rolling(window=3) - result = r[['A','B']].sum() - expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], - 'B' : [np.nan,np.nan,18,21,24]}, + result = r[['A', 'B']].sum() + expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9], + 'B': [np.nan, np.nan, 18, 21, 24]}, columns=list('AB')) assert_frame_equal(result, expected) - expected = pd.concat([r[['A','B']].sum(),df[['C']]],axis=1) + expected = pd.concat([r[['A', 'B']].sum(), df[['C']]], axis=1) result = r.sum() assert_frame_equal(result, expected) def test_timedeltas(self): - df = DataFrame({'A' : range(5), 'B' : pd.timedelta_range('1 day',periods=5)}) + df = DataFrame({'A': range(5), + 'B': pd.timedelta_range('1 day', periods=5)}) r = df.rolling(window=3) result = r.sum() - expected = DataFrame({'A' : [np.nan,np.nan,3,6,9], - 'B' : pd.to_timedelta([pd.NaT,pd.NaT,'6 days','9 days','12 days'])}, + expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9], + 'B': pd.to_timedelta([pd.NaT, pd.NaT, + '6 days', '9 days', + '12 days'])}, columns=list('AB')) assert_frame_equal(result, expected) def test_agg(self): - df = DataFrame({'A' : range(5), - 'B' : range(0,10,2)}) + df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) r = df.rolling(window=3) a_mean = r['A'].mean() @@ -119,101 +126,105 @@ def compare(result, expected): assert_frame_equal(result.reindex_like(expected), expected) result = r.aggregate([np.mean, np.std]) - expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) - expected.columns = pd.MultiIndex.from_product([['A','B'],['mean','std']]) + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_product([['A', 'B'], ['mean', + 'std']]) assert_frame_equal(result, expected) - result = r.aggregate({'A': np.mean, - 'B': np.std}) - expected = pd.concat([a_mean,b_std],axis=1) + result = r.aggregate({'A': np.mean, 'B': np.std}) + expected = pd.concat([a_mean, b_std], axis=1) compare(result, expected) - result = r.aggregate({'A': ['mean','std']}) - expected = pd.concat([a_mean,a_std],axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std')]) + result = r.aggregate({'A': ['mean', 'std']}) + expected = pd.concat([a_mean, a_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', + 'std')]) assert_frame_equal(result, expected) - result = r['A'].aggregate(['mean','sum']) - expected = pd.concat([a_mean,a_sum],axis=1) - expected.columns = ['mean','sum'] + result = r['A'].aggregate(['mean', 'sum']) + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = ['mean', 'sum'] assert_frame_equal(result, expected) - result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' } }) - expected = pd.concat([a_mean,a_sum],axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum')]) + result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}}) + expected = pd.concat([a_mean, a_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A', + 'sum')]) compare(result, expected) - result = r.aggregate({'A': { 'mean' : 'mean', 'sum' : 'sum' }, - 'B': { 'mean2' : 'mean', 'sum2' : 'sum' }}) - expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','sum'), - ('B','mean2'),('B','sum2')]) + result = r.aggregate({'A': {'mean': 'mean', + 'sum': 'sum'}, + 'B': {'mean2': 'mean', + 'sum2': 'sum'}}) + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ( + 'A', 'sum'), ('B', 'mean2'), ('B', 'sum2')]) compare(result, expected) - result = r.aggregate({'A': ['mean','std'], - 'B': ['mean','std']}) - expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A','mean'),('A','std'), - ('B','mean'),('B','std')]) + result = r.aggregate({'A': ['mean', 'std'], 'B': ['mean', 'std']}) + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ( + 'A', 'std'), ('B', 'mean'), ('B', 'std')]) compare(result, expected) - result = r.aggregate({'r1' : { 'A' : ['mean','sum'] }, - 'r2' : { 'B' : ['mean','sum'] }}) - expected = pd.concat([a_mean,a_sum,b_mean,b_sum],axis=1) - expected.columns = pd.MultiIndex.from_tuples([('r1','A','mean'),('r1','A','sum'), - ('r2','B','mean'),('r2','B','sum')]) + result = r.aggregate({'r1': {'A': ['mean', 'sum']}, + 'r2': {'B': ['mean', 'sum']}}) + expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'), ( + 'r1', 'A', 'sum'), ('r2', 'B', 'mean'), ('r2', 'B', 'sum')]) compare(result, expected) - result = r.agg({'A' : {'ra' : ['mean','std']}, - 'B' : {'rb' : ['mean','std']}}) - expected = pd.concat([a_mean,a_std,b_mean,b_std],axis=1) - expected.columns = pd.MultiIndex.from_tuples([('A','ra','mean'),('A','ra','std'), - ('B','rb','mean'),('B','rb','std')]) + result = r.agg({'A': {'ra': ['mean', 'std']}, + 'B': {'rb': ['mean', 'std']}}) + expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) + expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), ( + 'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')]) compare(result, expected) - # passed lambda - result = r.agg({'A' : np.sum, - 'B' : lambda x: np.std(x, ddof=1)}) - rcustom = r['B'].apply(lambda x: np.std(x,ddof=1)) - expected = pd.concat([a_sum,rcustom],axis=1) + result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)}) + rcustom = r['B'].apply(lambda x: np.std(x, ddof=1)) + expected = pd.concat([a_sum, rcustom], axis=1) compare(result, expected) def test_agg_consistency(self): - df = DataFrame({'A' : range(5), - 'B' : range(0,10,2)}) + df = DataFrame({'A': range(5), 'B': range(0, 10, 2)}) r = df.rolling(window=3) result = r.agg([np.sum, np.mean]).columns - expected = pd.MultiIndex.from_product([list('AB'),['sum','mean']]) + expected = pd.MultiIndex.from_product([list('AB'), ['sum', 'mean']]) tm.assert_index_equal(result, expected) result = r['A'].agg([np.sum, np.mean]).columns - expected = pd.Index(['sum','mean']) + expected = pd.Index(['sum', 'mean']) tm.assert_index_equal(result, expected) - result = r.agg({'A' : [np.sum, np.mean]}).columns - expected = pd.MultiIndex.from_tuples([('A','sum'),('A','mean')]) + result = r.agg({'A': [np.sum, np.mean]}).columns + expected = pd.MultiIndex.from_tuples([('A', 'sum'), ('A', 'mean')]) tm.assert_index_equal(result, expected) def test_window_with_args(self): tm._skip_if_no_scipy() # make sure that we are aggregating window functions correctly with arg - r = Series(np.random.randn(100)).rolling(window=10,min_periods=1,win_type='gaussian') - expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) - expected.columns = ['',''] - result = r.aggregate([lambda x: x.mean(std=10), lambda x: x.mean(std=.01)]) + r = Series(np.random.randn(100)).rolling(window=10, min_periods=1, + win_type='gaussian') + expected = pd.concat([r.mean(std=10), r.mean(std=.01)], axis=1) + expected.columns = ['', ''] + result = r.aggregate([lambda x: x.mean(std=10), + lambda x: x.mean(std=.01)]) assert_frame_equal(result, expected) def a(x): return x.mean(std=10) + def b(x): return x.mean(std=0.01) - expected = pd.concat([r.mean(std=10),r.mean(std=.01)],axis=1) - expected.columns = ['a','b'] - result = r.aggregate([a,b]) + + expected = pd.concat([r.mean(std=10), r.mean(std=.01)], axis=1) + expected.columns = ['a', 'b'] + result = r.aggregate([a, b]) assert_frame_equal(result, expected) def test_preserve_metadata(self): @@ -229,39 +240,44 @@ def test_how_compat(self): # in prior versions, we would allow how to be used in the resample # now that its deprecated, we need to handle this in the actual # aggregation functions - s = pd.Series(np.random.randn(20), index=pd.date_range('1/1/2000', periods=20, freq='12H')) + s = pd.Series( + np.random.randn(20), + index=pd.date_range('1/1/2000', periods=20, freq='12H')) - for how in ['min','max','median']: - for op in ['mean','sum','std','var','kurt','skew']: - for t in ['rolling','expanding']: + for how in ['min', 'max', 'median']: + for op in ['mean', 'sum', 'std', 'var', 'kurt', 'skew']: + for t in ['rolling', 'expanding']: - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): - dfunc = getattr(pd,"{0}_{1}".format(t,op)) + dfunc = getattr(pd, "{0}_{1}".format(t, op)) if dfunc is None: continue if t == 'rolling': - kwargs = {'window' : 5} + kwargs = {'window': 5} else: kwargs = {} result = dfunc(s, freq='D', how=how, **kwargs) - expected = getattr(getattr(s,t)(freq='D', **kwargs),op)(how=how) + expected = getattr( + getattr(s, t)(freq='D', **kwargs), op)(how=how) assert_series_equal(result, expected) + class TestDeprecations(Base): """ test that we are catching deprecation warnings """ def setUp(self): self._create_data() - def test_deprecations(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - mom.rolling_mean(np.ones(10),3,center=True ,axis=0) - mom.rolling_mean(Series(np.ones(10)),3,center=True ,axis=0) + mom.rolling_mean(np.ones(10), 3, center=True, axis=0) + mom.rolling_mean(Series(np.ones(10)), 3, center=True, axis=0) + class TestMoments(Base): @@ -271,27 +287,30 @@ def setUp(self): def test_centered_axis_validation(self): # ok - Series(np.ones(10)).rolling(window=3,center=True ,axis=0).mean() + Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean() # bad axis - self.assertRaises(ValueError, lambda : Series(np.ones(10)).rolling(window=3,center=True ,axis=1).mean()) + with self.assertRaises(ValueError): + Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean() # ok ok - DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=0).mean() - DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=1).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, + axis=0).mean() + DataFrame(np.ones((10, 10))).rolling(window=3, center=True, + axis=1).mean() # bad axis - self.assertRaises(ValueError, lambda : DataFrame(np.ones((10,10))).rolling(window=3,center=True ,axis=2).mean()) + with self.assertRaises(ValueError): + (DataFrame(np.ones((10, 10))) + .rolling(window=3, center=True, axis=2).mean()) def test_rolling_sum(self): self._check_moment_func(mom.rolling_sum, np.sum, name='sum') def test_rolling_count(self): counter = lambda x: np.isfinite(x).astype(float).sum() - self._check_moment_func(mom.rolling_count, counter, - name='count', - has_min_periods=False, - preserve_nan=False, + self._check_moment_func(mom.rolling_count, counter, name='count', + has_min_periods=False, preserve_nan=False, fill_value=0) def test_rolling_mean(self): @@ -301,10 +320,10 @@ def test_cmov_mean(self): # GH 8238 tm._skip_if_no_scipy() - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, - 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516, - 12.818, 12.952, np.nan, np.nan]) + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, + 10.63, 14.48]) + xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, + 12.952, np.nan, np.nan]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): rs = mom.rolling_mean(vals, 5, center=True) @@ -318,10 +337,10 @@ def test_cmov_window(self): # GH 8238 tm._skip_if_no_scipy() - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, - 13.49, 16.68, 9.48, 10.63, 14.48]) - xp = np.array([np.nan, np.nan, 9.962, 11.27 , 11.564, 12.516, - 12.818, 12.952, np.nan, np.nan]) + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, + 10.63, 14.48]) + xp = np.array([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516, 12.818, + 12.952, np.nan, np.nan]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): rs = mom.rolling_window(vals, 5, 'boxcar', center=True) @@ -359,46 +378,30 @@ def test_cmov_window_frame(self): # Gh 8238 tm._skip_if_no_scipy() - vals = np.array([[ 12.18, 3.64], - [ 10.18, 9.16], - [ 13.24, 14.61], - [ 4.51, 8.11], - [ 6.15, 11.44], - [ 9.14, 6.21], - [ 11.31, 10.67], - [ 2.94, 6.51], - [ 9.42, 8.39], - [ 12.44, 7.34 ]]) - - xp = np.array([[ np.nan, np.nan], - [ np.nan, np.nan], - [ 9.252, 9.392], - [ 8.644, 9.906], - [ 8.87 , 10.208], - [ 6.81 , 8.588], - [ 7.792, 8.644], - [ 9.05 , 7.824], - [ np.nan, np.nan], - [ np.nan, np.nan]]) + vals = np.array([[12.18, 3.64], [10.18, 9.16], [13.24, 14.61], + [4.51, 8.11], [6.15, 11.44], [9.14, 6.21], + [11.31, 10.67], [2.94, 6.51], [9.42, 8.39], [12.44, + 7.34]]) + + xp = np.array([[np.nan, np.nan], [np.nan, np.nan], [9.252, 9.392], + [8.644, 9.906], [8.87, 10.208], [6.81, 8.588], + [7.792, 8.644], [9.05, 7.824], [np.nan, np.nan + ], [np.nan, np.nan]]) # DataFrame rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).mean() assert_frame_equal(DataFrame(xp), rs) # invalid method - self.assertRaises(AttributeError, lambda : DataFrame(vals).rolling(5, win_type='boxcar', center=True).std()) + with self.assertRaises(AttributeError): + (DataFrame(vals).rolling(5, win_type='boxcar', center=True) + .std()) # sum - xp = np.array([[ np.nan, np.nan], - [ np.nan, np.nan], - [ 46.26, 46.96], - [ 43.22, 49.53], - [ 44.35, 51.04], - [ 34.05, 42.94], - [ 38.96, 43.22], - [ 45.25, 39.12], - [ np.nan, np.nan], - [ np.nan, np.nan]]) + xp = np.array([[np.nan, np.nan], [np.nan, np.nan], [46.26, 46.96], + [43.22, 49.53], [44.35, 51.04], [34.05, 42.94], + [38.96, 43.22], [45.25, 39.12], [np.nan, np.nan + ], [np.nan, np.nan]]) rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).sum() assert_frame_equal(DataFrame(xp), rs) @@ -412,7 +415,8 @@ def test_cmov_window_na_min_periods(self): vals[8] = np.nan xp = vals.rolling(5, min_periods=4, center=True).mean() - rs = vals.rolling(5, win_type='boxcar', min_periods=4, center=True).mean() + rs = vals.rolling(5, win_type='boxcar', min_periods=4, + center=True).mean() assert_series_equal(xp, rs) def test_cmov_window_regular(self): @@ -422,25 +426,26 @@ def test_cmov_window_regular(self): win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, - 13.49, 16.68, 9.48, 10.63, 14.48]) + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, + 10.63, 14.48]) xps = { - 'hamming': [np.nan, np.nan, 8.71384, 9.56348, 12.38009, - 14.03687, 13.8567, 11.81473, np.nan, np.nan], - 'triang': [np.nan, np.nan, 9.28667, 10.34667, 12.00556, - 13.33889, 13.38, 12.33667, np.nan, np.nan], - 'barthann': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, - 14.3675, 14.0825, 11.5675, np.nan, np.nan], - 'bohman': [np.nan, np.nan, 7.61599, 9.1764, 12.83559, - 14.17267, 14.65923, 11.10401, np.nan, np.nan], + 'hamming': [np.nan, np.nan, 8.71384, 9.56348, 12.38009, 14.03687, + 13.8567, 11.81473, np.nan, np.nan], + 'triang': [np.nan, np.nan, 9.28667, 10.34667, 12.00556, 13.33889, + 13.38, 12.33667, np.nan, np.nan], + 'barthann': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, 14.3675, + 14.0825, 11.5675, np.nan, np.nan], + 'bohman': [np.nan, np.nan, 7.61599, 9.1764, 12.83559, 14.17267, + 14.65923, 11.10401, np.nan, np.nan], 'blackmanharris': [np.nan, np.nan, 6.97691, 9.16438, 13.05052, 14.02156, 15.10512, 10.74574, np.nan, np.nan], - 'nuttall': [np.nan, np.nan, 7.04618, 9.16786, 13.02671, - 14.03559, 15.05657, 10.78514, np.nan, np.nan], - 'blackman': [np.nan, np.nan, 7.73345, 9.17869, 12.79607, - 14.20036, 14.57726, 11.16988, np.nan, np.nan], - 'bartlett': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, - 14.3675, 14.0825, 11.5675, np.nan, np.nan]} + 'nuttall': [np.nan, np.nan, 7.04618, 9.16786, 13.02671, 14.03559, + 15.05657, 10.78514, np.nan, np.nan], + 'blackman': [np.nan, np.nan, 7.73345, 9.17869, 12.79607, 14.20036, + 14.57726, 11.16988, np.nan, np.nan], + 'bartlett': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, 14.3675, + 14.0825, 11.5675, np.nan, np.nan] + } for wt in win_types: xp = Series(xps[wt]) @@ -471,27 +476,26 @@ def test_cmov_window_regular_missing_data(self): win_types = ['triang', 'blackman', 'hamming', 'bartlett', 'bohman', 'blackmanharris', 'nuttall', 'barthann'] - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, - 13.49, 16.68, np.nan, 10.63, 14.48]) + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan, + 10.63, 14.48]) xps = { - 'bartlett': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, - 9.1925, 12.5575, 14.3675, 15.61667, 13.655], - 'blackman': [np.nan, np.nan, 9.04582, 11.41536, 7.73345, - 9.17869, 12.79607, 14.20036, 15.8706, 13.655], - 'barthann': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, - 9.1925, 12.5575, 14.3675, 15.61667, 13.655], - 'bohman': [np.nan, np.nan, 8.9444, 11.56327, 7.61599, - 9.1764, 12.83559, 14.17267, 15.90976, 13.655], - 'hamming': [np.nan, np.nan, 9.59321, 10.29694, 8.71384, - 9.56348, 12.38009, 14.20565, 15.24694, 13.69758], - 'nuttall': [np.nan, np.nan, 8.47693, 12.2821, 7.04618, - 9.16786, 13.02671, 14.03673, 16.08759, 13.65553], - 'triang': [np.nan, np.nan, 9.33167, 9.76125, 9.28667, - 10.34667, 12.00556, 13.82125, 14.49429, 13.765], + 'bartlett': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, 9.1925, + 12.5575, 14.3675, 15.61667, 13.655], + 'blackman': [np.nan, np.nan, 9.04582, 11.41536, 7.73345, 9.17869, + 12.79607, 14.20036, 15.8706, 13.655], + 'barthann': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, 9.1925, + 12.5575, 14.3675, 15.61667, 13.655], + 'bohman': [np.nan, np.nan, 8.9444, 11.56327, 7.61599, 9.1764, + 12.83559, 14.17267, 15.90976, 13.655], + 'hamming': [np.nan, np.nan, 9.59321, 10.29694, 8.71384, 9.56348, + 12.38009, 14.20565, 15.24694, 13.69758], + 'nuttall': [np.nan, np.nan, 8.47693, 12.2821, 7.04618, 9.16786, + 13.02671, 14.03673, 16.08759, 13.65553], + 'triang': [np.nan, np.nan, 9.33167, 9.76125, 9.28667, 10.34667, + 12.00556, 13.82125, 14.49429, 13.765], 'blackmanharris': [np.nan, np.nan, 8.42526, 12.36824, 6.97691, - 9.16438, 13.05052, 14.02175, 16.1098, - 13.65509] - } + 9.16438, 13.05052, 14.02175, 16.1098, 13.65509] + } for wt in win_types: xp = Series(xps[wt]) @@ -503,22 +507,21 @@ def test_cmov_window_special(self): tm._skip_if_no_scipy() win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] - kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.}, - {'width': 0.5}] + kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., + 'width': 2.}, {'width': 0.5}] - vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, - 13.49, 16.68, 9.48, 10.63, 14.48]) + vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, + 10.63, 14.48]) xps = { - 'gaussian': [np.nan, np.nan, 8.97297, 9.76077, 12.24763, - 13.89053, 13.65671, 12.01002, np.nan, np.nan], - 'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589, - 11.73161, 13.08516, 12.95111, 12.74577, - np.nan, np.nan], - 'slepian': [np.nan, np.nan, 9.81073, 10.89359, 11.70284, - 12.88331, 12.96079, 12.77008, np.nan, np.nan], - 'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161, - 12.75129, 12.90702, 12.83757, np.nan, np.nan] + 'gaussian': [np.nan, np.nan, 8.97297, 9.76077, 12.24763, 13.89053, + 13.65671, 12.01002, np.nan, np.nan], + 'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589, 11.73161, + 13.08516, 12.95111, 12.74577, np.nan, np.nan], + 'slepian': [np.nan, np.nan, 9.81073, 10.89359, 11.70284, 12.88331, + 12.96079, 12.77008, np.nan, np.nan], + 'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161, 12.75129, + 12.90702, 12.83757, np.nan, np.nan] } for wt, k in zip(win_types, kwds): @@ -531,8 +534,8 @@ def test_cmov_window_special_linear_range(self): tm._skip_if_no_scipy() win_types = ['kaiser', 'gaussian', 'general_gaussian', 'slepian'] - kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., 'width': 2.}, - {'width': 0.5}] + kwds = [{'beta': 1.}, {'std': 1.}, {'power': 2., + 'width': 2.}, {'width': 0.5}] vals = np.array(range(10), dtype=np.float) xp = vals.copy() @@ -546,7 +549,8 @@ def test_cmov_window_special_linear_range(self): def test_rolling_median(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self._check_moment_func(mom.rolling_median, np.median, name='median') + self._check_moment_func(mom.rolling_median, np.median, + name='median') def test_rolling_min(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -557,8 +561,8 @@ def test_rolling_min(self): b = mom.rolling_min(a, window=100, min_periods=1) assert_almost_equal(b, np.ones(len(a))) - self.assertRaises(ValueError, mom.rolling_min, - np.array([1,2, 3]), window=3, min_periods=5) + self.assertRaises(ValueError, mom.rolling_min, np.array([1, 2, 3]), + window=3, min_periods=5) def test_rolling_max(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -569,7 +573,7 @@ def test_rolling_max(self): b = mom.rolling_max(a, window=100, min_periods=1) assert_almost_equal(a, b) - self.assertRaises(ValueError, mom.rolling_max, np.array([1,2, 3]), + self.assertRaises(ValueError, mom.rolling_max, np.array([1, 2, 3]), window=3, min_periods=5) def test_rolling_quantile(self): @@ -582,10 +586,11 @@ def scoreatpercentile(a, per): return values[int(idx)] for q in qs: - def f(x, window, quantile, min_periods=None, freq=None, center=False): + + def f(x, window, quantile, min_periods=None, freq=None, + center=False): return mom.rolling_quantile(x, window, quantile, - min_periods=min_periods, - freq=freq, + min_periods=min_periods, freq=freq, center=center) def alt(x): @@ -594,26 +599,29 @@ def alt(x): self._check_moment_func(f, alt, name='quantile', quantile=q) def test_rolling_apply(self): - # suppress warnings about empty slices, as we are deliberately testing with a 0-length Series + # suppress warnings about empty slices, as we are deliberately testing + # with a 0-length Series with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) + warnings.filterwarnings("ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning) ser = Series([]) assert_series_equal(ser, ser.rolling(10).apply(lambda x: x.mean())) f = lambda x: x[np.isfinite(x)].mean() - def roll_mean(x, window, min_periods=None, freq=None, center=False, **kwargs): - return mom.rolling_apply(x, - window, - func=f, - min_periods=min_periods, - freq=freq, + + def roll_mean(x, window, min_periods=None, freq=None, center=False, + **kwargs): + return mom.rolling_apply(x, window, func=f, + min_periods=min_periods, freq=freq, center=center) + self._check_moment_func(roll_mean, np.mean, name='apply', func=f) # GH 8080 s = Series([None, None, None]) - result = s.rolling(2,min_periods=0).apply(lambda x: len(x)) + result = s.rolling(2, min_periods=0).apply(lambda x: len(x)) expected = Series([1., 2., 2.]) assert_series_equal(result, expected) @@ -634,13 +642,10 @@ def test_rolling_apply_out_of_bounds(self): assert_almost_equal(result, result) def test_rolling_std(self): - self._check_moment_func(mom.rolling_std, - lambda x: np.std(x, ddof=1), + self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=1), name='std') - self._check_moment_func(mom.rolling_std, - lambda x: np.std(x, ddof=0), - name='std', - ddof=0) + self._check_moment_func(mom.rolling_std, lambda x: np.std(x, ddof=0), + name='std', ddof=0) def test_rolling_std_1obs(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -665,10 +670,8 @@ def test_rolling_std_neg_sqrt(self): # Test move_nanstd for neg sqrt. - a = np.array([0.0011448196318903589, - 0.00028718669878572767, - 0.00028718669878572767, - 0.00028718669878572767, + a = np.array([0.0011448196318903589, 0.00028718669878572767, + 0.00028718669878572767, 0.00028718669878572767, 0.00028718669878572767]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): b = mom.rolling_std(a, window=3) @@ -679,14 +682,10 @@ def test_rolling_std_neg_sqrt(self): self.assertTrue(np.isfinite(b[2:]).all()) def test_rolling_var(self): - self._check_moment_func(mom.rolling_var, - lambda x: np.var(x, ddof=1), - test_stable=True, - name='var') - self._check_moment_func(mom.rolling_var, - lambda x: np.var(x, ddof=0), - name='var', - ddof=0) + self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=1), + test_stable=True, name='var') + self._check_moment_func(mom.rolling_var, lambda x: np.var(x, ddof=0), + name='var', ddof=0) def test_rolling_skew(self): try: @@ -694,8 +693,7 @@ def test_rolling_skew(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_skew, - lambda x: skew(x, bias=False), - name='skew') + lambda x: skew(x, bias=False), name='skew') def test_rolling_kurt(self): try: @@ -703,8 +701,7 @@ def test_rolling_kurt(self): except ImportError: raise nose.SkipTest('no scipy') self._check_moment_func(mom.rolling_kurt, - lambda x: kurtosis(x, bias=False), - name='kurt') + lambda x: kurtosis(x, bias=False), name='kurt') def test_fperr_robustness(self): # TODO: remove this once python 2.5 out of picture @@ -712,7 +709,7 @@ def test_fperr_robustness(self): raise nose.SkipTest("doesn't work on python 3") # #2114 - data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' + data = '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1a@\xaa\xaa\xaa\xaa\xaa\xaa\x02@8\x8e\xe38\x8e\xe3\xe8?z\t\xed%\xb4\x97\xd0?\xa2\x0c<\xdd\x9a\x1f\xb6?\x82\xbb\xfa&y\x7f\x9d?\xac\'\xa7\xc4P\xaa\x83?\x90\xdf\xde\xb0k8j?`\xea\xe9u\xf2zQ?*\xe37\x9d\x98N7?\xe2.\xf5&v\x13\x1f?\xec\xc9\xf8\x19\xa4\xb7\x04?\x90b\xf6w\x85\x9f\xeb>\xb5A\xa4\xfaXj\xd2>F\x02\xdb\xf8\xcb\x8d\xb8>.\xac<\xfb\x87^\xa0>\xe8:\xa6\xf9_\xd3\x85>\xfb?\xe2cUU\xfd?\xfc\x7fA\xed8\x8e\xe3?\xa5\xaa\xac\x91\xf6\x12\xca?n\x1cs\xb6\xf9a\xb1?\xe8%D\xf3L-\x97?5\xddZD\x11\xe7~?#>\xe7\x82\x0b\x9ad?\xd9R4Y\x0fxK?;7x;\nP2?N\xf4JO\xb8j\x18?4\xf81\x8a%G\x00?\x9a\xf5\x97\r2\xb4\xe5>\xcd\x9c\xca\xbcB\xf0\xcc>3\x13\x87(\xd7J\xb3>\x99\x19\xb4\xe0\x1e\xb9\x99>ff\xcd\x95\x14&\x81>\x88\x88\xbc\xc7p\xddf>`\x0b\xa6_\x96|N>@\xb2n\xea\x0eS4>U\x98\x938i\x19\x1b>\x8eeb\xd0\xf0\x10\x02>\xbd\xdc-k\x96\x16\xe8=(\x93\x1e\xf2\x0e\x0f\xd0=\xe0n\xd3Bii\xb5=*\xe9\x19Y\x8c\x8c\x9c=\xc6\xf0\xbb\x90]\x08\x83=]\x96\xfa\xc0|`i=>d\xfc\xd5\xfd\xeaP=R0\xfb\xc7\xa7\x8e6=\xc2\x95\xf9_\x8a\x13\x1e=\xd6c\xa6\xea\x06\r\x04=r\xda\xdd8\t\xbc\xea<\xf6\xe6\x93\xd0\xb0\xd2\xd1<\x9d\xdeok\x96\xc3\xb7<&~\xea9s\xaf\x9f\xb8\x02@\xc6\xd2&\xfd\xa8\xf5\xe8?\xd9\xe1\x19\xfe\xc5\xa3\xd0?v\x82"\xa8\xb2/\xb6?\x9dX\x835\xee\x94\x9d?h\x90W\xce\x9e\xb8\x83?\x8a\xc0th~Kj?\\\x80\xf8\x9a\xa9\x87Q?%\xab\xa0\xce\x8c_7?1\xe4\x80\x13\x11*\x1f? \x98\x00\r\xb6\xc6\x04?\x80u\xabf\x9d\xb3\xeb>UNrD\xbew\xd2>\x1c\x13C[\xa8\x9f\xb8>\x12b\xd7m-\x1fQ@\xe3\x85>\xe6\x91)l\x00/m>Da\xc6\xf2\xaatS>\x05\xd7]\xee\xe3\xf09>' # noqa arr = np.frombuffer(data, dtype='= LooseVersion('1.8.0'): base_functions += [ (np.nanmean, 1, 'mean'), - (lambda v: np.nanstd(v, ddof=1), 1 ,'std'), - (lambda v: np.nanvar(v, ddof=1), 1 ,'var'), + (lambda v: np.nanstd(v, ddof=1), 1, 'std'), + (lambda v: np.nanvar(v, ddof=1), 1, 'var'), ] if np.__version__ >= LooseVersion('1.9.0'): - base_functions += [ - (np.nanmedian, 1, 'median'), - ] + base_functions += [(np.nanmedian, 1, 'median'), ] no_nan_functions = [ (np.max, None, 'max'), (np.min, None, 'min'), (np.sum, None, 'sum'), (np.mean, None, 'mean'), - (lambda v: np.std(v, ddof=1), 1 ,'std'), - (lambda v: np.var(v, ddof=1), 1 ,'var'), + (lambda v: np.std(v, ddof=1), 1, 'std'), + (lambda v: np.var(v, ddof=1), 1, 'var'), (np.median, None, 'median'), ] @@ -1226,19 +1232,18 @@ def _create_data(self): def setUp(self): self._create_data() - def _test_moments_consistency(self, - min_periods, - count, mean, mock_mean, corr, - var_unbiased=None, std_unbiased=None, cov_unbiased=None, - var_biased=None, std_biased=None, cov_biased=None, + def _test_moments_consistency(self, min_periods, count, mean, mock_mean, + corr, var_unbiased=None, std_unbiased=None, + cov_unbiased=None, var_biased=None, + std_biased=None, cov_biased=None, var_debiasing_factors=None): - def _non_null_values(x): values = x.values.ravel() return set(values[notnull(values)].tolist()) for (x, is_constant, no_nans) in self.data: - assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + assert_equal = assert_series_equal if isinstance( + x, Series) else assert_frame_equal count_x = count(x) mean_x = mean(x) @@ -1249,7 +1254,8 @@ def _non_null_values(x): # check that correlation of a series with itself is either 1 or NaN corr_x_x = corr(x, x) - # self.assertTrue(_non_null_values(corr_x_x).issubset(set([1.]))) # restore once rolling_cov(x, x) is identically equal to var(x) + # self.assertTrue(_non_null_values(corr_x_x).issubset(set([1.]))) # + # restore once rolling_cov(x, x) is identically equal to var(x) if is_constant: exp = x.max() if isinstance(x, Series) else x.max().max() @@ -1268,10 +1274,12 @@ def _non_null_values(x): var_unbiased_x = var_unbiased(x) var_biased_x = var_biased(x) var_debiasing_factors_x = var_debiasing_factors(x) - assert_equal(var_unbiased_x, var_biased_x * var_debiasing_factors_x) + assert_equal(var_unbiased_x, var_biased_x * + var_debiasing_factors_x) for (std, var, cov) in [(std_biased, var_biased, cov_biased), - (std_unbiased, var_unbiased, cov_unbiased)]: + (std_unbiased, var_unbiased, cov_unbiased) + ]: # check that var(x), std(x), and cov(x) are all >= 0 var_x = var(x) @@ -1305,7 +1313,8 @@ def _non_null_values(x): if isinstance(x, Series): for (y, is_constant, no_nans) in self.data: if not x.isnull().equals(y.isnull()): - # can only easily test two Series with similar structure + # can only easily test two Series with similar + # structure continue # check that cor(x, y) is symmetric @@ -1319,41 +1328,45 @@ def _non_null_values(x): cov_y_x = cov(y, x) assert_equal(cov_x_y, cov_y_x) - # check that cov(x, y) == (var(x+y) - var(x) - var(y)) / 2 + # check that cov(x, y) == (var(x+y) - var(x) - + # var(y)) / 2 var_x_plus_y = var(x + y) var_y = var(y) - assert_equal(cov_x_y, 0.5 * (var_x_plus_y - var_x - var_y)) + assert_equal(cov_x_y, 0.5 * + (var_x_plus_y - var_x - var_y)) - # check that corr(x, y) == cov(x, y) / (std(x) * std(y)) + # check that corr(x, y) == cov(x, y) / (std(x) * + # std(y)) std_y = std(y) assert_equal(corr_x_y, cov_x_y / (std_x * std_y)) if cov is cov_biased: - # check that biased cov(x, y) == mean(x*y) - mean(x)*mean(y) + # check that biased cov(x, y) == mean(x*y) - + # mean(x)*mean(y) mean_y = mean(y) mean_x_times_y = mean(x * y) - assert_equal(cov_x_y, mean_x_times_y - (mean_x * mean_y)) + assert_equal(cov_x_y, mean_x_times_y - + (mean_x * mean_y)) @slow def test_ewm_consistency(self): - def _weights(s, com, adjust, ignore_na): if isinstance(s, DataFrame): if not len(s.columns): return DataFrame(index=s.index, columns=s.columns) - w = concat([ _weights(s.iloc[:, i], - com=com, - adjust=adjust, - ignore_na=ignore_na) for i, _ in enumerate(s.columns) ], - axis=1) - w.index=s.index - w.columns=s.columns + w = concat([ + _weights(s.iloc[:, i], com=com, adjust=adjust, + ignore_na=ignore_na) + for i, _ in enumerate(s.columns)], axis=1) + w.index = s.index + w.columns = s.columns return w w = Series(np.nan, index=s.index) alpha = 1. / (1. + com) if ignore_na: - w[s.notnull()] = _weights(s[s.notnull()], com=com, adjust=adjust, ignore_na=False) + w[s.notnull()] = _weights(s[s.notnull()], com=com, + adjust=adjust, ignore_na=False) elif adjust: for i in range(len(s)): if s.iat[i] == s.iat[i]: @@ -1366,7 +1379,8 @@ def _weights(s, com, adjust, ignore_na): if prev_i == -1: w.iat[i] = 1. else: - w.iat[i] = alpha * sum_wts / pow(1. - alpha, i - prev_i) + w.iat[i] = alpha * sum_wts / pow(1. - alpha, + i - prev_i) sum_wts += w.iat[i] prev_i = i return w @@ -1382,35 +1396,66 @@ def _variance_debiasing_factors(s, com, adjust, ignore_na): def _ewma(s, com, min_periods, adjust, ignore_na): weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na) - result = s.multiply(weights).cumsum().divide(weights.cumsum()).fillna(method='ffill') - result[s.expanding().count() < (max(min_periods, 1) if min_periods else 1)] = np.nan + result = s.multiply(weights).cumsum().divide(weights.cumsum( + )).fillna(method='ffill') + result[s.expanding().count() < (max(min_periods, 1) if min_periods + else 1)] = np.nan return result com = 3. - for min_periods in [0, 1, 2, 3, 4]: - for adjust in [True, False]: - for ignore_na in [False, True]: - # test consistency between different ewm* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.expanding().count(), - mean=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).mean(), - mock_mean=lambda x: _ewma(x, com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na), - corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).corr(y), - var_unbiased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).var(bias=False), - std_unbiased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).std(bias=False), - cov_unbiased=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).cov(y, bias=False), - var_biased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).var(bias=True), - std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).std(bias=True), - cov_biased=lambda x, y: x.ewm(com=com, min_periods=min_periods, adjust=adjust, ignore_na=ignore_na).cov(y, bias=True), - var_debiasing_factors=lambda x: _variance_debiasing_factors(x, com=com, adjust=adjust, ignore_na=ignore_na)) + for min_periods, adjust, ignore_na in product([0, 1, 2, 3, 4], + [True, False], + [False, True]): + # test consistency between different ewm* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: x.expanding().count(), + mean=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).mean(), + mock_mean=lambda x: _ewma(x, com=com, + min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na), + corr=lambda x, y: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).corr(y), + var_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).var(bias=False)), + std_unbiased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .std(bias=False)), + cov_unbiased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=False)), + var_biased=lambda x: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .var(bias=True)), + std_biased=lambda x: x.ewm(com=com, min_periods=min_periods, + adjust=adjust, + ignore_na=ignore_na).std(bias=True), + cov_biased=lambda x, y: ( + x.ewm(com=com, min_periods=min_periods, + adjust=adjust, ignore_na=ignore_na) + .cov(y, bias=True)), + var_debiasing_factors=lambda x: ( + _variance_debiasing_factors(x, com=com, adjust=adjust, + ignore_na=ignore_na))) @slow def test_expanding_consistency(self): - # suppress warnings about empty slices, as we are deliberately testing with empty/0-length Series/DataFrames + # suppress warnings about empty slices, as we are deliberately testing + # with empty/0-length Series/DataFrames with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) + warnings.filterwarnings("ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning) for min_periods in [0, 1, 2, 3, 4]: @@ -1418,125 +1463,208 @@ def test_expanding_consistency(self): self._test_moments_consistency( min_periods=min_periods, count=lambda x: x.expanding().count(), - mean=lambda x: x.expanding(min_periods=min_periods).mean(), - mock_mean=lambda x: x.expanding(min_periods=min_periods).sum() / x.expanding().count(), - corr=lambda x, y: x.expanding(min_periods=min_periods).corr(y), - var_unbiased=lambda x: x.expanding(min_periods=min_periods).var(), - std_unbiased=lambda x: x.expanding(min_periods=min_periods).std(), - cov_unbiased=lambda x, y: x.expanding(min_periods=min_periods).cov(y), - var_biased=lambda x: x.expanding(min_periods=min_periods).var(ddof=0), - std_biased=lambda x: x.expanding(min_periods=min_periods).std(ddof=0), - cov_biased=lambda x, y: x.expanding(min_periods=min_periods).cov(y, ddof=0), - var_debiasing_factors=lambda x: x.expanding().count() / (x.expanding().count() - 1.).replace(0., np.nan) - ) - - # test consistency between expanding_xyz() and either (a) expanding_apply of Series.xyz(), - # or (b) expanding_apply of np.nanxyz() + mean=lambda x: x.expanding( + min_periods=min_periods).mean(), + mock_mean=lambda x: x.expanding( + min_periods=min_periods).sum() / x.expanding().count(), + corr=lambda x, y: x.expanding( + min_periods=min_periods).corr(y), + var_unbiased=lambda x: x.expanding( + min_periods=min_periods).var(), + std_unbiased=lambda x: x.expanding( + min_periods=min_periods).std(), + cov_unbiased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y), + var_biased=lambda x: x.expanding( + min_periods=min_periods).var(ddof=0), + std_biased=lambda x: x.expanding( + min_periods=min_periods).std(ddof=0), + cov_biased=lambda x, y: x.expanding( + min_periods=min_periods).cov(y, ddof=0), + var_debiasing_factors=lambda x: ( + x.expanding().count() / + (x.expanding().count() - 1.) + .replace(0., np.nan))) + + # test consistency between expanding_xyz() and either (a) + # expanding_apply of Series.xyz(), or (b) expanding_apply of + # np.nanxyz() for (x, is_constant, no_nans) in self.data: - assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal + assert_equal = assert_series_equal if isinstance( + x, Series) else assert_frame_equal functions = self.base_functions # GH 8269 if no_nans: functions = self.base_functions + self.no_nan_functions for (f, require_min_periods, name) in functions: - expanding_f = getattr(x.expanding(min_periods=min_periods),name) + expanding_f = getattr( + x.expanding(min_periods=min_periods), name) - if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): + if (require_min_periods and + (min_periods is not None) and + (min_periods < require_min_periods)): continue if name == 'count': expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding(min_periods=0).apply(func=f) + expanding_apply_f_result = x.expanding( + min_periods=0).apply(func=f) else: - if name in ['cov','corr']: - expanding_f_result = expanding_f(pairwise=False) + if name in ['cov', 'corr']: + expanding_f_result = expanding_f( + pairwise=False) else: expanding_f_result = expanding_f() - expanding_apply_f_result = x.expanding(min_periods=min_periods).apply(func=f) + expanding_apply_f_result = x.expanding( + min_periods=min_periods).apply(func=f) if not tm._incompat_bottleneck_version(name): - assert_equal(expanding_f_result, expanding_apply_f_result) + assert_equal(expanding_f_result, + expanding_apply_f_result) - if (name in ['cov','corr']) and isinstance(x, DataFrame): + if (name in ['cov', 'corr']) and isinstance(x, + DataFrame): # test pairwise=True expanding_f_result = expanding_f(x, pairwise=True) - expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) + expected = Panel(items=x.index, + major_axis=x.columns, + minor_axis=x.columns) for i, _ in enumerate(x.columns): for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = getattr(x.iloc[:, i].expanding(min_periods=min_periods),name)(x.iloc[:, j]) + expected.iloc[:, i, j] = getattr( + x.iloc[:, i].expanding( + min_periods=min_periods), + name)(x.iloc[:, j]) assert_panel_equal(expanding_f_result, expected) @slow def test_rolling_consistency(self): - # suppress warnings about empty slices, as we are deliberately testing with empty/0-length Series/DataFrames + # suppress warnings about empty slices, as we are deliberately testing + # with empty/0-length Series/DataFrames with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=".*(empty slice|0 for slice).*", category=RuntimeWarning) - - for window in [1, 2, 3, 10, 20]: - for min_periods in set([0, 1, 2, 3, 4, window]): - if min_periods and (min_periods > window): - continue - for center in [False, True]: - - # test consistency between different rolling_* moments - self._test_moments_consistency( - min_periods=min_periods, - count=lambda x: x.rolling(window=window, center=center).count(), - mean=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).mean(), - mock_mean=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).sum().divide( - x.rolling(window=window, min_periods=min_periods, center=center).count()), - corr=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).corr(y), - var_unbiased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).var(), - std_unbiased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).std(), - cov_unbiased=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).cov(y), - var_biased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).var(ddof=0), - std_biased=lambda x: x.rolling(window=window, min_periods=min_periods, center=center).std(ddof=0), - cov_biased=lambda x, y: x.rolling(window=window, min_periods=min_periods, center=center).cov(y, ddof=0), - var_debiasing_factors=lambda x: x.rolling(window=window, center=center).count().divide( - (x.rolling(window=window, center=center).count() - 1.).replace(0., np.nan)), - ) - - # test consistency between rolling_xyz() and either (a) rolling_apply of Series.xyz(), - # or (b) rolling_apply of np.nanxyz() - for (x, is_constant, no_nans) in self.data: - - assert_equal = assert_series_equal if isinstance(x, Series) else assert_frame_equal - functions = self.base_functions - - # GH 8269 - if no_nans: - functions = self.base_functions + self.no_nan_functions - for (f, require_min_periods, name) in functions: - rolling_f = getattr(x.rolling(window=window, center=center, min_periods=min_periods),name) - - if require_min_periods and (min_periods is not None) and (min_periods < require_min_periods): - continue - - if name == 'count': - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling(window=window, - min_periods=0, center=center).apply(func=f) - else: - if name in ['cov','corr']: - rolling_f_result = rolling_f(pairwise=False) - else: - rolling_f_result = rolling_f() - rolling_apply_f_result = x.rolling(window=window, - min_periods=min_periods, center=center).apply(func=f) - if not tm._incompat_bottleneck_version(name): - assert_equal(rolling_f_result, rolling_apply_f_result) - - if (name in ['cov','corr']) and isinstance(x, DataFrame): - # test pairwise=True - rolling_f_result = rolling_f(x, pairwise=True) - expected = Panel(items=x.index, major_axis=x.columns, minor_axis=x.columns) - for i, _ in enumerate(x.columns): - for j, _ in enumerate(x.columns): - expected.iloc[:, i, j] = getattr(x.iloc[:, i].rolling( - window=window, min_periods=min_periods, center=center),name)(x.iloc[:, j]) - assert_panel_equal(rolling_f_result, expected) + warnings.filterwarnings("ignore", + message=".*(empty slice|0 for slice).*", + category=RuntimeWarning) + + def cases(): + for window in [1, 2, 3, 10, 20]: + for min_periods in set([0, 1, 2, 3, 4, window]): + if min_periods and (min_periods > window): + continue + for center in [False, True]: + yield window, min_periods, center + + for window, min_periods, center in cases(): + # test consistency between different rolling_* moments + self._test_moments_consistency( + min_periods=min_periods, + count=lambda x: ( + x.rolling(window=window, center=center) + .count()), + mean=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).mean()), + mock_mean=lambda x: ( + x.rolling(window=window, + min_periods=min_periods, + center=center).sum() + .divide(x.rolling(window=window, + min_periods=min_periods, + center=center).count())), + corr=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).corr(y)), + + var_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var()), + + std_unbiased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std()), + + cov_unbiased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y)), + + var_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).var(ddof=0)), + + std_biased=lambda x: ( + x.rolling(window=window, min_periods=min_periods, + center=center).std(ddof=0)), + + cov_biased=lambda x, y: ( + x.rolling(window=window, min_periods=min_periods, + center=center).cov(y, ddof=0)), + var_debiasing_factors=lambda x: ( + x.rolling(window=window, center=center).count() + .divide((x.rolling(window=window, center=center) + .count() - 1.) + .replace(0., np.nan)))) + + # test consistency between rolling_xyz() and either (a) + # rolling_apply of Series.xyz(), or (b) rolling_apply of + # np.nanxyz() + for (x, is_constant, no_nans) in self.data: + + assert_equal = (assert_series_equal + if isinstance(x, Series) else + assert_frame_equal) + functions = self.base_functions + + # GH 8269 + if no_nans: + functions = self.base_functions + self.no_nan_functions + for (f, require_min_periods, name) in functions: + rolling_f = getattr( + x.rolling(window=window, center=center, + min_periods=min_periods), name) + + if require_min_periods and ( + min_periods is not None) and ( + min_periods < require_min_periods): + continue + + if name == 'count': + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=0, + center=center).apply(func=f) + else: + if name in ['cov', 'corr']: + rolling_f_result = rolling_f( + pairwise=False) + else: + rolling_f_result = rolling_f() + rolling_apply_f_result = x.rolling( + window=window, min_periods=min_periods, + center=center).apply(func=f) + if not tm._incompat_bottleneck_version(name): + assert_equal(rolling_f_result, + rolling_apply_f_result) + + if (name in ['cov', 'corr']) and isinstance( + x, DataFrame): + # test pairwise=True + rolling_f_result = rolling_f(x, + pairwise=True) + expected = Panel(items=x.index, + major_axis=x.columns, + minor_axis=x.columns) + for i, _ in enumerate(x.columns): + for j, _ in enumerate(x.columns): + expected.iloc[:, i, j] = ( + getattr( + x.iloc[:, i] + .rolling(window=window, + min_periods=min_periods, + center=center), + name)(x.iloc[:, j])) + assert_panel_equal(rolling_f_result, expected) # binary moments def test_rolling_cov(self): @@ -1547,7 +1675,7 @@ def test_rolling_cov(self): assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1]) def test_rolling_cov_pairwise(self): - self._check_pairwise_moment('rolling','cov', window=10, min_periods=5) + self._check_pairwise_moment('rolling', 'cov', window=10, min_periods=5) def test_rolling_corr(self): A = self.series @@ -1566,12 +1694,12 @@ def test_rolling_corr(self): assert_almost_equal(result[-1], a.corr(b)) def test_rolling_corr_pairwise(self): - self._check_pairwise_moment('rolling', 'corr', window=10, min_periods=5) + self._check_pairwise_moment('rolling', 'corr', window=10, + min_periods=5) def _check_pairwise_moment(self, dispatch, name, **kwargs): - def get_result(obj, obj2=None): - return getattr(getattr(obj,dispatch)(**kwargs),name)(obj2) + return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2) panel = get_result(self.frame) actual = panel.ix[:, 1, 5] @@ -1582,40 +1710,36 @@ def get_result(obj, obj2=None): def test_flex_binary_moment(self): # GH3155 # don't blow the stack - self.assertRaises(TypeError, rwindow._flex_binary_moment,5,6,None) + self.assertRaises(TypeError, rwindow._flex_binary_moment, 5, 6, None) def test_corr_sanity(self): - #GH 3155 - df = DataFrame( - np.array( - [[ 0.87024726, 0.18505595], - [ 0.64355431, 0.3091617 ], - [ 0.92372966, 0.50552513], - [ 0.00203756, 0.04520709], - [ 0.84780328, 0.33394331], - [ 0.78369152, 0.63919667]]) - ) - - res = df[0].rolling(5,center=True).corr(df[1]) - self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) + # GH 3155 + df = DataFrame(np.array( + [[0.87024726, 0.18505595], [0.64355431, 0.3091617], + [0.92372966, 0.50552513], [0.00203756, 0.04520709], + [0.84780328, 0.33394331], [0.78369152, 0.63919667]])) + + res = df[0].rolling(5, center=True).corr(df[1]) + self.assertTrue(all([np.abs(np.nan_to_num(x)) <= 1 for x in res])) # and some fuzzing for i in range(10): - df = DataFrame(np.random.rand(30,2)) - res = df[0].rolling(5,center=True).corr(df[1]) + df = DataFrame(np.random.rand(30, 2)) + res = df[0].rolling(5, center=True).corr(df[1]) try: - self.assertTrue(all([np.abs(np.nan_to_num(x)) <=1 for x in res])) + self.assertTrue(all([np.abs(np.nan_to_num(x)) <= 1 for x in res + ])) except: print(res) - def test_flex_binary_frame(self): def _check(method): series = self.frame[1] - res = getattr(series.rolling(window=10),method)(self.frame) - res2 = getattr(self.frame.rolling(window=10),method)(series) - exp = self.frame.apply(lambda x: getattr(series.rolling(window=10),method)(x)) + res = getattr(series.rolling(window=10), method)(self.frame) + res2 = getattr(self.frame.rolling(window=10), method)(series) + exp = self.frame.apply(lambda x: getattr( + series.rolling(window=10), method)(x)) tm.assert_frame_equal(res, exp) tm.assert_frame_equal(res2, exp) @@ -1623,12 +1747,12 @@ def _check(method): frame2 = self.frame.copy() frame2.values[:] = np.random.randn(*frame2.shape) - res3 = getattr(self.frame.rolling(window=10),method)(frame2) - exp = DataFrame(dict((k, getattr(self.frame[k].rolling(window=10),method)(frame2[k])) - for k in self.frame)) + res3 = getattr(self.frame.rolling(window=10), method)(frame2) + exp = DataFrame(dict((k, getattr(self.frame[k].rolling( + window=10), method)(frame2[k])) for k in self.frame)) tm.assert_frame_equal(res3, exp) - methods = ['corr','cov'] + methods = ['corr', 'cov'] for meth in methods: _check(meth) @@ -1636,18 +1760,17 @@ def test_ewmcov(self): self._check_binary_ew('cov') def test_ewmcov_pairwise(self): - self._check_pairwise_moment('ewm','cov', span=10, min_periods=5) + self._check_pairwise_moment('ewm', 'cov', span=10, min_periods=5) def test_ewmcorr(self): self._check_binary_ew('corr') def test_ewmcorr_pairwise(self): - self._check_pairwise_moment('ewm','corr', span=10, min_periods=5) + self._check_pairwise_moment('ewm', 'corr', span=10, min_periods=5) def _check_binary_ew(self, name): - def func(A, B, com, **kwargs): - return getattr(A.ewm(com, **kwargs),name)(B) + return getattr(A.ewm(com, **kwargs), name)(B) A = Series(randn(50), index=np.arange(50)) B = A[2:] + randn(48) @@ -1662,7 +1785,8 @@ def func(A, B, com, **kwargs): # GH 7898 for min_periods in (0, 1, 2): result = func(A, B, 20, min_periods=min_periods) - # binary functions (ewmcov, ewmcorr) with bias=False require at least two values + # binary functions (ewmcov, ewmcorr) with bias=False require at + # least two values self.assertTrue(np.isnan(result.values[:11]).all()) self.assertFalse(np.isnan(result.values[11:]).any()) @@ -1671,7 +1795,8 @@ def func(A, B, com, **kwargs): assert_series_equal(result, Series([])) # check series of length 1 - result = func(Series([1.]), Series([1.]), 50, min_periods=min_periods) + result = func( + Series([1.]), Series([1.]), 50, min_periods=min_periods) assert_series_equal(result, Series([np.NaN])) self.assertRaises(Exception, func, A, randn(50), 20, min_periods=5) @@ -1681,10 +1806,9 @@ def test_expanding_apply(self): assert_series_equal(ser, ser.expanding().apply(lambda x: x.mean())) def expanding_mean(x, min_periods=1, freq=None): - return mom.expanding_apply(x, - lambda x: x.mean(), - min_periods=min_periods, - freq=freq) + return mom.expanding_apply(x, lambda x: x.mean(), + min_periods=min_periods, freq=freq) + self._check_expanding(expanding_mean, np.mean) # GH 8080 @@ -1701,32 +1825,32 @@ def mean_w_arg(x, const): expected = df.expanding().apply(np.mean) + 20. - assert_frame_equal(df.expanding().apply(mean_w_arg, args=(20,)), - expected) + assert_frame_equal(df.expanding().apply(mean_w_arg, args=(20, )), + expected) assert_frame_equal(df.expanding().apply(mean_w_arg, - kwargs={'const' : 20}), + kwargs={'const': 20}), expected) - def test_expanding_corr(self): A = self.series.dropna() B = (A + randn(len(A)))[:-5] result = A.expanding().corr(B) - rolling_result = A.rolling(window=len(A),min_periods=1).corr(B) + rolling_result = A.rolling(window=len(A), min_periods=1).corr(B) assert_almost_equal(rolling_result, result) def test_expanding_count(self): result = self.series.expanding().count() - assert_almost_equal(result, self.series.rolling(window=len(self.series)).count()) + assert_almost_equal(result, self.series.rolling( + window=len(self.series)).count()) def test_expanding_quantile(self): result = self.series.expanding().quantile(0.5) - rolling_result = self.series.rolling( - window=len(self.series),min_periods=1).quantile(0.5) + rolling_result = self.series.rolling(window=len(self.series), + min_periods=1).quantile(0.5) assert_almost_equal(result, rolling_result) @@ -1746,7 +1870,8 @@ def test_expanding_max(self): def test_expanding_cov_pairwise(self): result = self.frame.expanding().corr() - rolling_result = self.frame.rolling(window=len(self.frame),min_periods=1).corr() + rolling_result = self.frame.rolling(window=len(self.frame), + min_periods=1).corr() for i in result.items: assert_almost_equal(result[i], rolling_result[i]) @@ -1754,7 +1879,8 @@ def test_expanding_cov_pairwise(self): def test_expanding_corr_pairwise(self): result = self.frame.expanding().corr() - rolling_result = self.frame.rolling(window=len(self.frame), min_periods=1).corr() + rolling_result = self.frame.rolling(window=len(self.frame), + min_periods=1).corr() for i in result.items: assert_almost_equal(result[i], rolling_result[i]) @@ -1823,12 +1949,15 @@ def test_rolling_functions_window_non_shrinkage(self): # GH 7764 s = Series(range(4)) s_expected = Series(np.nan, index=s.index) - df = DataFrame([[1,5], [3, 2], [3,9], [-1,0]], columns=['A','B']) + df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B']) df_expected = DataFrame(np.nan, index=df.index, columns=df.columns) - df_expected_panel = Panel(items=df.index, major_axis=df.columns, minor_axis=df.columns) + df_expected_panel = Panel(items=df.index, major_axis=df.columns, + minor_axis=df.columns) - functions = [lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), - lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + functions = [lambda x: (x.rolling(window=10, min_periods=5) + .cov(x, pairwise=False)), + lambda x: (x.rolling(window=10, min_periods=5) + .corr(x, pairwise=False)), lambda x: x.rolling(window=10, min_periods=5).max(), lambda x: x.rolling(window=10, min_periods=5).min(), lambda x: x.rolling(window=10, min_periods=5).sum(), @@ -1837,11 +1966,12 @@ def test_rolling_functions_window_non_shrinkage(self): lambda x: x.rolling(window=10, min_periods=5).var(), lambda x: x.rolling(window=10, min_periods=5).skew(), lambda x: x.rolling(window=10, min_periods=5).kurt(), - lambda x: x.rolling(window=10, min_periods=5).quantile(quantile=0.5), + lambda x: x.rolling( + window=10, min_periods=5).quantile(quantile=0.5), lambda x: x.rolling(window=10, min_periods=5).median(), lambda x: x.rolling(window=10, min_periods=5).apply(sum), - lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), - ] + lambda x: x.rolling(win_type='boxcar', + window=10, min_periods=5).mean()] for f in functions: try: s_result = f(s) @@ -1854,9 +1984,10 @@ def test_rolling_functions_window_non_shrinkage(self): # scipy needed for rolling_window continue - functions = [lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=True), - lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=True), - ] + functions = [lambda x: (x.rolling(window=10, min_periods=5) + .cov(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5) + .corr(x, pairwise=True))] for f in functions: df_result_panel = f(df) assert_panel_equal(df_result_panel, df_expected_panel) @@ -1867,15 +1998,19 @@ def test_moment_functions_zero_length(self): s_expected = s df1 = DataFrame() df1_expected = df1 - df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, minor_axis=df1.columns) + df1_expected_panel = Panel(items=df1.index, major_axis=df1.columns, + minor_axis=df1.columns) df2 = DataFrame(columns=['a']) df2['a'] = df2['a'].astype('float64') df2_expected = df2 - df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, minor_axis=df2.columns) + df2_expected_panel = Panel(items=df2.index, major_axis=df2.columns, + minor_axis=df2.columns) functions = [lambda x: x.expanding().count(), - lambda x: x.expanding(min_periods=5).cov(x, pairwise=False), - lambda x: x.expanding(min_periods=5).corr(x, pairwise=False), + lambda x: x.expanding(min_periods=5).cov( + x, pairwise=False), + lambda x: x.expanding(min_periods=5).corr( + x, pairwise=False), lambda x: x.expanding(min_periods=5).max(), lambda x: x.expanding(min_periods=5).min(), lambda x: x.expanding(min_periods=5).sum(), @@ -1888,8 +2023,10 @@ def test_moment_functions_zero_length(self): lambda x: x.expanding(min_periods=5).median(), lambda x: x.expanding(min_periods=5).apply(sum), lambda x: x.rolling(window=10).count(), - lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=False), - lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).cov( + x, pairwise=False), + lambda x: x.rolling(window=10, min_periods=5).corr( + x, pairwise=False), lambda x: x.rolling(window=10, min_periods=5).max(), lambda x: x.rolling(window=10, min_periods=5).min(), lambda x: x.rolling(window=10, min_periods=5).sum(), @@ -1898,11 +2035,13 @@ def test_moment_functions_zero_length(self): lambda x: x.rolling(window=10, min_periods=5).var(), lambda x: x.rolling(window=10, min_periods=5).skew(), lambda x: x.rolling(window=10, min_periods=5).kurt(), - lambda x: x.rolling(window=10, min_periods=5).quantile(0.5), + lambda x: x.rolling( + window=10, min_periods=5).quantile(0.5), lambda x: x.rolling(window=10, min_periods=5).median(), lambda x: x.rolling(window=10, min_periods=5).apply(sum), - lambda x: x.rolling(win_type='boxcar', window=10, min_periods=5).mean(), - ] + lambda x: x.rolling(win_type='boxcar', + window=10, min_periods=5).mean(), + ] for f in functions: try: s_result = f(s) @@ -1918,11 +2057,15 @@ def test_moment_functions_zero_length(self): # scipy needed for rolling_window continue - functions = [lambda x: x.expanding(min_periods=5).cov(x, pairwise=True), - lambda x: x.expanding(min_periods=5).corr(x, pairwise=True), - lambda x: x.rolling(window=10, min_periods=5).cov(x, pairwise=True), - lambda x: x.rolling(window=10, min_periods=5).corr(x, pairwise=True), - ] + functions = [lambda x: (x.expanding(min_periods=5) + .cov(x, pairwise=True)), + lambda x: (x.expanding(min_periods=5) + .corr(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5) + .cov(x, pairwise=True)), + lambda x: (x.rolling(window=10, min_periods=5) + .corr(x, pairwise=True)), + ] for f in functions: df1_result_panel = f(df1) assert_panel_equal(df1_result_panel, df1_expected_panel) @@ -1932,15 +2075,16 @@ def test_moment_functions_zero_length(self): def test_expanding_cov_pairwise_diff_length(self): # GH 7512 - df1 = DataFrame([[1,5], [3, 2], [3,9]], columns=['A','B']) - df1a = DataFrame([[1,5], [3,9]], index=[0,2], columns=['A','B']) - df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) - df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) + df1 = DataFrame([[1, 5], [3, 2], [3, 9]], columns=['A', 'B']) + df1a = DataFrame([[1, 5], [3, 9]], index=[0, 2], columns=['A', 'B']) + df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) + df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) result1 = df1.expanding().cov(df2a, pairwise=True)[2] result2 = df1.expanding().cov(df2a, pairwise=True)[2] result3 = df1a.expanding().cov(df2, pairwise=True)[2] result4 = df1a.expanding().cov(df2a, pairwise=True)[2] - expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A','B'], columns=['X','Y']) + expected = DataFrame([[-3., -5.], [-6., -10.]], index=['A', 'B'], + columns=['X', 'Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) @@ -1948,15 +2092,16 @@ def test_expanding_cov_pairwise_diff_length(self): def test_expanding_corr_pairwise_diff_length(self): # GH 7512 - df1 = DataFrame([[1,2], [3, 2], [3,4]], columns=['A','B']) - df1a = DataFrame([[1,2], [3,4]], index=[0,2], columns=['A','B']) - df2 = DataFrame([[5,6], [None,None], [2,1]], columns=['X','Y']) - df2a = DataFrame([[5,6], [2,1]], index=[0,2], columns=['X','Y']) + df1 = DataFrame([[1, 2], [3, 2], [3, 4]], columns=['A', 'B']) + df1a = DataFrame([[1, 2], [3, 4]], index=[0, 2], columns=['A', 'B']) + df2 = DataFrame([[5, 6], [None, None], [2, 1]], columns=['X', 'Y']) + df2a = DataFrame([[5, 6], [2, 1]], index=[0, 2], columns=['X', 'Y']) result1 = df1.expanding().corr(df2, pairwise=True)[2] result2 = df1.expanding().corr(df2a, pairwise=True)[2] result3 = df1a.expanding().corr(df2, pairwise=True)[2] result4 = df1a.expanding().corr(df2a, pairwise=True)[2] - expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A','B'], columns=['X','Y']) + expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]], index=['A', 'B'], + columns=['X', 'Y']) assert_frame_equal(result1, expected) assert_frame_equal(result2, expected) assert_frame_equal(result3, expected) @@ -1964,28 +2109,39 @@ def test_expanding_corr_pairwise_diff_length(self): def test_pairwise_stats_column_names_order(self): # GH 7738 - df1s = [DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[0,1]), - DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[1,0]), - DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[1,1]), - DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=['C','C']), - DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[1.,0]), - DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=[0.,1]), - DataFrame([[2,4],[1,2],[5,2],[8,1]], columns=['C',1]), - DataFrame([[2.,4.],[1.,2.],[5.,2.],[8.,1.]], columns=[1,0.]), - DataFrame([[2,4.],[1,2.],[5,2.],[8,1.]], columns=[0,1.]), - DataFrame([[2,4],[1,2],[5,2],[8,1.]], columns=[1.,'X']), - ] - df2 = DataFrame([[None,1,1],[None,1,2],[None,3,2],[None,8,1]], columns=['Y','Z','X']) - s = Series([1,1,3,8]) - - # suppress warnings about incomparable objects, as we are deliberately testing with such column labels + df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 'C']), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]), + DataFrame( + [[2., 4.], [1., 2.], [5., 2.], [8., 1.]], columns=[1, 0.]), + DataFrame( + [[2, 4.], [1, 2.], [5, 2.], [8, 1.]], columns=[0, 1.]), + DataFrame( + [[2, 4], [1, 2], [5, 2], [8, 1.]], columns=[1., 'X']), ] + df2 = DataFrame( + [[None, 1, 1], [None, 1, 2], [None, 3, 2], [None, 8, 1] + ], columns=['Y', 'Z', 'X']) + s = Series([1, 1, 3, 8]) + + # suppress warnings about incomparable objects, as we are deliberately + # testing with such column labels with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message=".*incomparable objects.*", category=RuntimeWarning) + warnings.filterwarnings("ignore", + message=".*incomparable objects.*", + category=RuntimeWarning) # DataFrame methods (which do not call _flex_binary_moment()) - for f in [lambda x: x.cov(), - lambda x: x.corr(), - ]: + for f in [lambda x: x.cov(), lambda x: x.corr(), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): assert_index_equal(result.index, df.columns) @@ -2000,8 +2156,7 @@ def test_pairwise_stats_column_names_order(self): lambda x: x.rolling(window=3).cov(pairwise=True), lambda x: x.rolling(window=3).corr(pairwise=True), lambda x: x.ewm(com=3).cov(pairwise=True), - lambda x: x.ewm(com=3).corr(pairwise=True), - ]: + lambda x: x.ewm(com=3).corr(pairwise=True), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): assert_index_equal(result.items, df.index) @@ -2017,8 +2172,7 @@ def test_pairwise_stats_column_names_order(self): lambda x: x.rolling(window=3).cov(pairwise=False), lambda x: x.rolling(window=3).corr(pairwise=False), lambda x: x.ewm(com=3).cov(pairwise=False), - lambda x: x.ewm(com=3).corr(pairwise=False), - ]: + lambda x: x.ewm(com=3).corr(pairwise=False), ]: results = [f(df) for df in df1s] for (df, result) in zip(df1s, results): assert_index_equal(result.index, df.index) @@ -2033,8 +2187,7 @@ def test_pairwise_stats_column_names_order(self): lambda x, y: x.rolling(window=3).cov(y, pairwise=True), lambda x, y: x.rolling(window=3).corr(y, pairwise=True), lambda x, y: x.ewm(com=3).cov(y, pairwise=True), - lambda x, y: x.ewm(com=3).corr(y, pairwise=True), - ]: + lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ]: results = [f(df, df2) for df in df1s] for (df, result) in zip(df1s, results): assert_index_equal(result.items, df.index) @@ -2050,9 +2203,9 @@ def test_pairwise_stats_column_names_order(self): lambda x, y: x.rolling(window=3).cov(y, pairwise=False), lambda x, y: x.rolling(window=3).corr(y, pairwise=False), lambda x, y: x.ewm(com=3).cov(y, pairwise=False), - lambda x, y: x.ewm(com=3).corr(y, pairwise=False), - ]: - results = [f(df, df2) if df.columns.is_unique else None for df in df1s] + lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ]: + results = [f(df, df2) if df.columns.is_unique else None + for df in df1s] for (df, result) in zip(df1s, results): if result is not None: expected_index = df.index.union(df2.index) @@ -2060,8 +2213,12 @@ def test_pairwise_stats_column_names_order(self): assert_index_equal(result.index, expected_index) assert_index_equal(result.columns, expected_columns) else: - tm.assertRaisesRegexp(ValueError, "'arg1' columns are not unique", f, df, df2) - tm.assertRaisesRegexp(ValueError, "'arg2' columns are not unique", f, df2, df) + tm.assertRaisesRegexp( + ValueError, "'arg1' columns are not unique", f, df, + df2) + tm.assertRaisesRegexp( + ValueError, "'arg2' columns are not unique", f, + df2, df) # DataFrame with a Series for f in [lambda x, y: x.expanding().cov(y), @@ -2069,8 +2226,7 @@ def test_pairwise_stats_column_names_order(self): lambda x, y: x.rolling(window=3).cov(y), lambda x, y: x.rolling(window=3).corr(y), lambda x, y: x.ewm(com=3).cov(y), - lambda x, y: x.ewm(com=3).corr(y), - ]: + lambda x, y: x.ewm(com=3).corr(y), ]: results = [f(df, s) for df in df1s] + [f(s, df) for df in df1s] for (df, result) in zip(df1s, results): assert_index_equal(result.index, df.index) @@ -2094,10 +2250,9 @@ def test_rolling_skew_edge_cases(self): assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 0.177994, 1.548824] - d = Series([-1.50837035, -0.1297039 , 0.19501095, - 1.73508164, 0.41941401]) - expected = Series([np.NaN, np.NaN, np.NaN, - 0.177994, 1.548824]) + d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401 + ]) + expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824]) x = d.rolling(window=4).skew() assert_series_equal(expected, x) @@ -2116,10 +2271,9 @@ def test_rolling_kurt_edge_cases(self): assert_series_equal(all_nan, x) # yields [NaN, NaN, NaN, 1.224307, 2.671499] - d = Series([-1.50837035, -0.1297039 , 0.19501095, - 1.73508164, 0.41941401]) - expected = Series([np.NaN, np.NaN, np.NaN, - 1.224307, 2.671499]) + d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401 + ]) + expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499]) x = d.rolling(window=4).kurt() assert_series_equal(expected, x) @@ -2127,17 +2281,16 @@ def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, has_time_rule=True, preserve_nan=True): result = func(self.arr) - assert_almost_equal(result[10], - static_comp(self.arr[:11])) + assert_almost_equal(result[10], static_comp(self.arr[:11])) if preserve_nan: - assert(np.isnan(result[self._nan_locs]).all()) + assert (np.isnan(result[self._nan_locs]).all()) arr = randn(50) if has_min_periods: result = func(arr, min_periods=30) - assert(np.isnan(result[:29]).all()) + assert (np.isnan(result[:29]).all()) assert_almost_equal(result[-1], static_comp(arr[:50])) # min_periods is working correctly @@ -2165,8 +2318,7 @@ def _check_expanding_structures(self, func): self.assertEqual(type(frame_result), DataFrame) def _check_expanding(self, func, static_comp, has_min_periods=True, - has_time_rule=True, - preserve_nan=True): + has_time_rule=True, preserve_nan=True): with warnings.catch_warnings(record=True): self._check_expanding_ndarray(func, static_comp, has_min_periods=has_min_periods, @@ -2188,8 +2340,7 @@ def test_rolling_max_gh6297(self): series = series.sort_index() expected = Series([1.0, 2.0, 6.0, 4.0, 5.0], - index=[datetime(1975, 1, i, 0) - for i in range(1, 6)]) + index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) @@ -2208,30 +2359,26 @@ def test_rolling_max_how_resample(self): # Default how should be max expected = Series([0.0, 1.0, 2.0, 3.0, 20.0], - index=[datetime(1975, 1, i, 0) - for i in range(1, 6)]) + index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').max() assert_series_equal(expected, x) # Now specify median (10.0) expected = Series([0.0, 1.0, 2.0, 3.0, 10.0], - index=[datetime(1975, 1, i, 0) - for i in range(1, 6)]) + index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').max(how='median') assert_series_equal(expected, x) # Now specify mean (4+10+20)/3 - v = (4.0+10.0+20.0)/3.0 + v = (4.0 + 10.0 + 20.0) / 3.0 expected = Series([0.0, 1.0, 2.0, 3.0, v], - index=[datetime(1975, 1, i, 0) - for i in range(1, 6)]) + index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').max(how='mean') assert_series_equal(expected, x) - def test_rolling_min_how_resample(self): indices = [datetime(1975, 1, i) for i in range(1, 6)] @@ -2246,8 +2393,7 @@ def test_rolling_min_how_resample(self): # Default how should be min expected = Series([0.0, 1.0, 2.0, 3.0, 4.0], - index=[datetime(1975, 1, i, 0) - for i in range(1, 6)]) + index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): r = series.rolling(window=1, freq='D') assert_series_equal(expected, r.min()) @@ -2266,8 +2412,7 @@ def test_rolling_median_how_resample(self): # Default how should be median expected = Series([0.0, 1.0, 2.0, 3.0, 10], - index=[datetime(1975, 1, i, 0) - for i in range(1, 6)]) + index=[datetime(1975, 1, i, 0) for i in range(1, 6)]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): x = series.rolling(window=1, freq='D').median() assert_series_equal(expected, x) @@ -2277,8 +2422,3 @@ def test_rolling_median_memory_error(self): n = 20000 Series(np.random.randn(n)).rolling(window=2, center=False).median() Series(np.random.randn(n)).rolling(window=2, center=False).median() - -if __name__ == '__main__': - import nose - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False)