diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4cf7c8013aa2b..129ac6b06205c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -99,9 +99,9 @@ def max_value(group): applied = df.groupby('A').apply(max_value) result = applied.get_dtype_counts().sort_values() - expected = Series({'object': 2, - 'float64': 2, - 'int64': 1}).sort_values() + expected = Series({'float64': 2, + 'int64': 1, + 'object': 2}).sort_values() assert_series_equal(result, expected) def test_groupby_return_type(self): @@ -244,7 +244,7 @@ def func_with_no_date(batch): return pd.Series({'c': 2}) def func_with_date(batch): - return pd.Series({'c': 2, 'b': datetime(2015, 1, 1)}) + return pd.Series({'b': datetime(2015, 1, 1), 'c': 2}) dfg_no_conversion = df.groupby(by=['a']).apply(func_with_no_date) dfg_no_conversion_expected = pd.DataFrame({'c': 2}, index=[1]) @@ -1628,8 +1628,8 @@ def f(g): def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 - df = DataFrame({'foo1': ['one', 'two', 'two', 'three', 'one', 'two'], - 'foo2': np.random.randn(6)}) + df = DataFrame({'foo1': np.random.randn(6), + 'foo2': ['one', 'two', 'two', 'three', 'one', 'two']}) result = df.apply(lambda x: x, axis=1) assert_series_equal(df.get_dtype_counts(), result.get_dtype_counts()) @@ -2113,10 +2113,10 @@ def test_multifunc_sum_bug(self): def test_handle_dict_return_value(self): def f(group): - return {'min': group.min(), 'max': group.max()} + return {'max': group.max(), 'min': group.min()} def g(group): - return Series({'min': group.min(), 'max': group.max()}) + return Series({'max': group.max(), 'min': group.min()}) result = self.df.groupby('A')['C'].apply(f) expected = self.df.groupby('A')['C'].apply(g) diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index 4159d0f709a13..1be7dfdcc64e6 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -519,7 +519,9 @@ def test_cython_transform_frame(self, op, args, targop): 'timedelta': pd.timedelta_range(1, freq='s', periods=1000), 'string': strings * 50, - 'string_missing': strings_missing * 50}) + 'string_missing': strings_missing * 50}, + columns=['float', 'float_missing', 'int', 'datetime', + 'timedelta', 'string', 'string_missing']) df['cat'] = df['string'].astype('category') df2 = df.copy() @@ -552,7 +554,9 @@ def test_cython_transform_frame(self, op, args, targop): tm.assert_frame_equal(expected, gb.transform(op, *args).sort_index( axis=1)) - tm.assert_frame_equal(expected, getattr(gb, op)(*args)) + tm.assert_frame_equal( + expected, + getattr(gb, op)(*args).sort_index(axis=1)) # individual columns for c in df: if c not in ['float', 'int', 'float_missing' diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index 3f71e673a4ffe..c84576c984525 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -53,13 +53,15 @@ def test_ix_loc_setitem_consistency(self): # GH 8607 # ix setitem consistency - df = DataFrame({'timestamp': [1413840976, 1413842580, 1413760580], - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) - expected = DataFrame({'timestamp': pd.to_datetime( - [1413840976, 1413842580, 1413760580], unit='s'), - 'delta': [1174, 904, 161], - 'elapsed': [7673, 9277, 1470]}) + df = DataFrame({'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470], + 'timestamp': [1413840976, 1413842580, 1413760580]}) + expected = DataFrame({'delta': [1174, 904, 161], + 'elapsed': [7673, 9277, 1470], + 'timestamp': pd.to_datetime( + [1413840976, 1413842580, 1413760580], + unit='s') + }) df2 = df.copy() df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s') diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index dddba5b425c3b..03c071dbe4bc5 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -539,8 +539,8 @@ def test_east_asian_unicode_frame(self): assert _rep(df) == expected # column name - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + df = DataFrame({'b': [u'あ', u'いいい', u'う', u'ええええええ'], + u'あああああ': [1, 222, 33333, 4]}, index=['a', 'bb', 'c', 'ddd']) expected = (u" b あああああ\na あ 1\n" u"bb いいい 222\nc う 33333\n" @@ -647,8 +647,8 @@ def test_east_asian_unicode_frame(self): assert _rep(df) == expected # column name - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'う', u'ええええええ']}, + df = DataFrame({'b': [u'あ', u'いいい', u'う', u'ええええええ'], + u'あああああ': [1, 222, 33333, 4]}, index=['a', 'bb', 'c', 'ddd']) expected = (u" b あああああ\n" u"a あ 1\n" @@ -733,8 +733,8 @@ def test_east_asian_unicode_frame(self): assert _rep(df) == expected # ambiguous unicode - df = DataFrame({u'あああああ': [1, 222, 33333, 4], - 'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']}, + df = DataFrame({'b': [u'あ', u'いいい', u'¡¡', u'ええええええ'], + u'あああああ': [1, 222, 33333, 4]}, index=['a', 'bb', 'c', '¡¡¡']) expected = (u" b あああああ\n" u"a あ 1\n" diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index f266a8b3a3268..5ebf196be094e 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -115,17 +115,18 @@ def test_to_latex_empty(self): assert result == expected def test_to_latex_with_formatters(self): - df = DataFrame({'int': [1, 2, 3], + df = DataFrame({'datetime64': [datetime(2016, 1, 1), + datetime(2016, 2, 5), + datetime(2016, 3, 3)], 'float': [1.0, 2.0, 3.0], + 'int': [1, 2, 3], 'object': [(1, 2), True, False], - 'datetime64': [datetime(2016, 1, 1), - datetime(2016, 2, 5), - datetime(2016, 3, 3)]}) + }) - formatters = {'int': lambda x: '0x{x:x}'.format(x=x), + formatters = {'datetime64': lambda x: x.strftime('%Y-%m'), 'float': lambda x: '[{x: 4.1f}]'.format(x=x), + 'int': lambda x: '0x{x:x}'.format(x=x), 'object': lambda x: '-{x!s}-'.format(x=x), - 'datetime64': lambda x: x.strftime('%Y-%m'), '__index__': lambda x: 'index: {x}'.format(x=x)} result = df.to_latex(formatters=dict(formatters)) @@ -347,10 +348,10 @@ def test_to_latex_escape(self): a = 'a' b = 'b' - test_dict = {u('co^l1'): {a: "a", - b: "b"}, - u('co$e^x$'): {a: "a", - b: "b"}} + test_dict = {u('co$e^x$'): {a: "a", + b: "b"}, + u('co^l1'): {a: "a", + b: "b"}} unescaped_result = DataFrame(test_dict).to_latex(escape=False) escaped_result = DataFrame(test_dict).to_latex( diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index a72744e08fa7c..7e497c395266f 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -553,7 +553,7 @@ def __str__(self): def test_label_overflow(self): # GH14256: buffer length not checked when writing label - df = pd.DataFrame({'foo': [1337], 'bar' * 100000: [1]}) + df = pd.DataFrame({'bar' * 100000: [1], 'foo': [1337]}) assert df.to_json() == \ '{{"{bar}":{{"0":1}},"foo":{{"0":1337}}}}'.format( bar=('bar' * 100000)) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 101d34ebdb89f..5dca45c8dd8bb 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -588,18 +588,18 @@ def test_merge_on_datetime64tz(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - left = pd.DataFrame({'value': pd.date_range('20151010', periods=2, - tz='US/Eastern'), - 'key': [1, 2]}) - right = pd.DataFrame({'value': pd.date_range('20151011', periods=2, - tz='US/Eastern'), - 'key': [2, 3]}) + left = pd.DataFrame({'key': [1, 2], + 'value': pd.date_range('20151010', periods=2, + tz='US/Eastern')}) + right = pd.DataFrame({'key': [2, 3], + 'value': pd.date_range('20151011', periods=2, + tz='US/Eastern')}) expected = DataFrame({ + 'key': [1, 2, 3], 'value_x': list(pd.date_range('20151010', periods=2, tz='US/Eastern')) + [pd.NaT], 'value_y': [pd.NaT] + list(pd.date_range('20151011', periods=2, - tz='US/Eastern')), - 'key': [1, 2, 3]}) + tz='US/Eastern'))}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) assert result['value_x'].dtype == 'datetime64[ns, US/Eastern]' @@ -632,18 +632,18 @@ def test_merge_on_periods(self): result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - left = pd.DataFrame({'value': pd.period_range('20151010', periods=2, - freq='D'), - 'key': [1, 2]}) - right = pd.DataFrame({'value': pd.period_range('20151011', periods=2, - freq='D'), - 'key': [2, 3]}) + left = pd.DataFrame({'key': [1, 2], + 'value': pd.period_range('20151010', periods=2, + freq='D')}) + right = pd.DataFrame({'key': [2, 3], + 'value': pd.period_range('20151011', periods=2, + freq='D')}) exp_x = pd.period_range('20151010', periods=2, freq='D') exp_y = pd.period_range('20151011', periods=2, freq='D') - expected = DataFrame({'value_x': list(exp_x) + [pd.NaT], - 'value_y': [pd.NaT] + list(exp_y), - 'key': [1, 2, 3]}) + expected = DataFrame({'key': [1, 2, 3], + 'value_x': list(exp_x) + [pd.NaT], + 'value_y': [pd.NaT] + list(exp_y)}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) assert result['value_x'].dtype == 'object' @@ -651,12 +651,13 @@ def test_merge_on_periods(self): def test_indicator(self): # PR #10054. xref #7412 and closes #8790. - df1 = DataFrame({'col1': [0, 1], 'col_left': [ - 'a', 'b'], 'col_conflict': [1, 2]}) + df1 = DataFrame({'col1': [0, 1], 'col_conflict': [1, 2], + 'col_left': ['a', 'b']}) df1_copy = df1.copy() - df2 = DataFrame({'col1': [1, 2, 3, 4, 5], 'col_right': [2, 2, 2, 2, 2], - 'col_conflict': [1, 2, 3, 4, 5]}) + df2 = DataFrame({'col1': [1, 2, 3, 4, 5], + 'col_conflict': [1, 2, 3, 4, 5], + 'col_right': [2, 2, 2, 2, 2]}) df2_copy = df2.copy() df_result = DataFrame({ diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 31c484a483d18..42d8eb7273ee1 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -83,9 +83,10 @@ def test_empty_sequence_concat(self): pd.concat([pd.DataFrame(), None]) def test_doc_example(self): - left = DataFrame({'key': ['a', 'c', 'e', 'a', 'c', 'e'], + left = DataFrame({'group': list('aaabbb'), + 'key': ['a', 'c', 'e', 'a', 'c', 'e'], 'lvalue': [1, 2, 3] * 2, - 'group': list('aaabbb')}) + }) right = DataFrame({'key': ['b', 'c', 'd'], 'rvalue': [1, 2, 3]}) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 7e126dd56775b..11c3b733422cf 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1542,10 +1542,10 @@ def test_concat_bug_2972(self): def test_concat_bug_3602(self): # GH 3602, duplicate columns - df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'stringvar': [ - 'rrr', 'rrr', 'rrr', 'rrr'], 'prc': [6, 6, 6, 6]}) - df2 = DataFrame({'misc': [1, 2, 3, 4], 'prc': [ - 6, 6, 6, 6], 'C': [9, 10, 11, 12]}) + df1 = DataFrame({'firmNo': [0, 0, 0, 0], 'prc': [6, 6, 6, 6], + 'stringvar': ['rrr', 'rrr', 'rrr', 'rrr']}) + df2 = DataFrame({'C': [9, 10, 11, 12], 'misc': [1, 2, 3, 4], + 'prc': [6, 6, 6, 6]}) expected = DataFrame([[0, 6, 'rrr', 9, 1, 6], [0, 6, 'rrr', 10, 2, 6], [0, 6, 'rrr', 11, 3, 6], diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index b7422dfd7e911..000b22d4fdd36 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -589,11 +589,11 @@ def test_nonnumeric_suffix(self): def test_mixed_type_suffix(self): df = pd.DataFrame({ - 'treatment_1': [1.0, 2.0], - 'treatment_foo': [3.0, 4.0], - 'result_foo': [5.0, 6.0], + 'A': ['X1', 'X2'], 'result_1': [0, 9], - 'A': ['X1', 'X2']}) + 'result_foo': [5.0, 6.0], + 'treatment_1': [1.0, 2.0], + 'treatment_foo': [3.0, 4.0]}) expected = pd.DataFrame({ 'A': ['X1', 'X2', 'X1', 'X2'], 'colname': ['1', '1', 'foo', 'foo'], diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index a57c3c41b3637..c4d925b83585b 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -100,8 +100,8 @@ def test_basic_types(self, sparse, dtype): expected_counts = {'int64': 1, 'object': 1} expected_counts[dtype_name] = 3 + expected_counts.get(dtype_name, 0) - expected = Series(expected_counts).sort_values() - tm.assert_series_equal(result.get_dtype_counts().sort_values(), + expected = Series(expected_counts).sort_index() + tm.assert_series_equal(result.get_dtype_counts().sort_index(), expected) def test_just_na(self, sparse): @@ -212,10 +212,10 @@ def test_dataframe_dummies_prefix_str(self, df, sparse): def test_dataframe_dummies_subset(self, df, sparse): result = get_dummies(df, prefix=['from_A'], columns=['A'], sparse=sparse) - expected = DataFrame({'from_A_a': [1, 0, 1], - 'from_A_b': [0, 1, 0], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}, dtype=np.uint8) + expected = DataFrame({'B': ['b', 'b', 'c'], + 'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], + 'from_A_b': [0, 1, 0]}, dtype=np.uint8) expected[['C']] = df[['C']] assert_frame_equal(result, expected) @@ -249,16 +249,16 @@ def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse): def test_dataframe_dummies_prefix_dict(self, sparse): prefixes = {'A': 'from_A', 'B': 'from_B'} - df = DataFrame({'A': ['a', 'b', 'a'], - 'B': ['b', 'b', 'c'], - 'C': [1, 2, 3]}) + df = DataFrame({'C': [1, 2, 3], + 'A': ['a', 'b', 'a'], + 'B': ['b', 'b', 'c']}) result = get_dummies(df, prefix=prefixes, sparse=sparse) - expected = DataFrame({'from_A_a': [1, 0, 1], + expected = DataFrame({'C': [1, 2, 3], + 'from_A_a': [1, 0, 1], 'from_A_b': [0, 1, 0], 'from_B_b': [1, 1, 0], - 'from_B_c': [0, 0, 1], - 'C': [1, 2, 3]}) + 'from_B_c': [0, 0, 1]}) columns = ['from_A_a', 'from_A_b', 'from_B_b', 'from_B_c'] expected[columns] = expected[columns].astype(np.uint8)