From afc83606e7142633b47b4e7b848d17844a32918f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Dec 2019 13:59:10 -0800 Subject: [PATCH 1/6] TST: test for 6051 read_csv with multiindex columns --- pandas/tests/io/parser/test_header.py | 31 ++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 0ecd8be7ddc78..70a0781dee258 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -11,7 +11,7 @@ from pandas.errors import ParserError -from pandas import DataFrame, Index, MultiIndex +from pandas import DataFrame, Index, MultiIndex, read_csv import pandas.util.testing as tm @@ -540,3 +540,32 @@ def test_multi_index_unnamed(all_parsers, index_col, columns): columns = MultiIndex.from_tuples(zip(exp_columns, ["0", "1"])) expected = DataFrame([[2, 3], [4, 5]], columns=columns) tm.assert_frame_equal(result, expected) + + +def test_read_csv_multiindex_columns(): + # GH#6051 + s1 = "Male, Male, Male, Female, Female\nR, R, L, R, R\n.86, .67, .88, .78, .81" + s2 = ( + "Male, Male, Male, Female, Female\n" + "R, R, L, R, R\n" + ".86, .67, .88, .78, .81\n" + ".86, .67, .88, .78, .82" + ) + + mi = MultiIndex.from_tuples( + [ + ("Male", "R"), + (" Male", " R"), + (" Male", " L"), + (" Female", " R"), + (" Female", " R.1"), + ] + ) + expected = DataFrame( + [[0.86, 0.67, 0.88, 0.78, 0.81], [0.86, 0.67, 0.88, 0.78, 0.82]], columns=mi + ) + + df1 = read_csv(StringIO(s1), header=[0, 1]) + tm.assert_frame_equal(df1, expected.iloc[:1]) + df2 = read_csv(StringIO(s2), header=[0, 1]) + tm.assert_frame_equal(df2, expected) From db4ae907d118c36d2656086b37eeb55f491f831c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 16 Dec 2019 15:33:03 -0800 Subject: [PATCH 2/6] TST: test for #9232 --- pandas/tests/frame/test_constructors.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ad6e0c963e730..d0179e094eab7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -42,6 +42,19 @@ class TestDataFrameConstructors: + def test_series_with_name_not_matching_column(self): + # GH#9232 + x = pd.Series(range(5), name=1) + y = pd.Series(range(5), name=0) + + result = pd.DataFrame(x, columns=[0]) + expected = pd.DataFrame([], columns=[0]) + tm.assert_frame_equal(result, expected) + + result = pd.DataFrame(y, columns=[1]) + expected = pd.DataFrame([], columns=[1]) + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "constructor", [ From 74fc0cc39fcaef1c162006268d1416d38190e260 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Dec 2019 11:05:55 -0800 Subject: [PATCH 3/6] TST: tests for needs-test issues #12857, #12689 --- pandas/tests/frame/test_arithmetic.py | 7 +++++++ pandas/tests/frame/test_repr_info.py | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f6e203afb0898..f303838b35811 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -16,6 +16,13 @@ class TestFrameComparisons: # Specifically _not_ flex-comparisons + def test_frame_in_list(self): + # GH#12689 this should raise at the DataFrame level, not blocks + df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD')) + msg = "The truth value of a DataFrame is ambiguous" + with pytest.raises(ValueError, match=msg): + df in [None] + def test_comparison_invalid(self): def check(df, df2): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 318b1c6add91e..2ec0d21e6d65f 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta from io import StringIO +import random import re import sys import textwrap @@ -27,6 +28,17 @@ class TestDataFrameReprInfoEtc: + + def test_repr_bytes_61_lines(self): + # GH#12857 + lets = 'ACDEFGHIJKLMNOP' + slen = 50 + nseqs = 1000 + words = [[random.choice(lets) for x in range(slen)] for _ in range(nseqs)] + df = pd.DataFrame(words).astype('S1') + assert (df.dtypes == 'S1').all() + repr(df) # smoke test + def test_repr_empty(self): # empty foo = repr(DataFrame()) # noqa From 0f3ad2fe66377bd10b7406735301203803365ceb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Dec 2019 11:10:30 -0800 Subject: [PATCH 4/6] black fixup --- pandas/tests/frame/test_arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index f303838b35811..5ecbe21d113b5 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -18,7 +18,7 @@ class TestFrameComparisons: def test_frame_in_list(self): # GH#12689 this should raise at the DataFrame level, not blocks - df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD')) + df = pd.DataFrame(np.random.randn(6, 4), columns=list("ABCD")) msg = "The truth value of a DataFrame is ambiguous" with pytest.raises(ValueError, match=msg): df in [None] From bb7ef54028ae70ae2a5f5bb5bbbf125f72489fe7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 18 Dec 2019 12:40:42 -0800 Subject: [PATCH 5/6] blackify --- pandas/tests/frame/test_repr_info.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 2ec0d21e6d65f..64face38ed929 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -28,15 +28,14 @@ class TestDataFrameReprInfoEtc: - def test_repr_bytes_61_lines(self): # GH#12857 - lets = 'ACDEFGHIJKLMNOP' + lets = "ACDEFGHIJKLMNOP" slen = 50 nseqs = 1000 words = [[random.choice(lets) for x in range(slen)] for _ in range(nseqs)] - df = pd.DataFrame(words).astype('S1') - assert (df.dtypes == 'S1').all() + df = pd.DataFrame(words).astype("S1") + assert (df.dtypes == "S1").all() repr(df) # smoke test def test_repr_empty(self): From 036a67280c7a44ef2e5e6d394e6ef3cbd6e34dc4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 20 Dec 2019 13:34:45 -0800 Subject: [PATCH 6/6] use all_parsers fixture --- pandas/tests/frame/test_repr_info.py | 11 ----------- pandas/tests/io/parser/test_header.py | 10 ++++++---- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 64face38ed929..318b1c6add91e 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -1,6 +1,5 @@ from datetime import datetime, timedelta from io import StringIO -import random import re import sys import textwrap @@ -28,16 +27,6 @@ class TestDataFrameReprInfoEtc: - def test_repr_bytes_61_lines(self): - # GH#12857 - lets = "ACDEFGHIJKLMNOP" - slen = 50 - nseqs = 1000 - words = [[random.choice(lets) for x in range(slen)] for _ in range(nseqs)] - df = pd.DataFrame(words).astype("S1") - assert (df.dtypes == "S1").all() - repr(df) # smoke test - def test_repr_empty(self): # empty foo = repr(DataFrame()) # noqa diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 70a0781dee258..214b93b6f0628 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -11,7 +11,7 @@ from pandas.errors import ParserError -from pandas import DataFrame, Index, MultiIndex, read_csv +from pandas import DataFrame, Index, MultiIndex import pandas.util.testing as tm @@ -542,8 +542,10 @@ def test_multi_index_unnamed(all_parsers, index_col, columns): tm.assert_frame_equal(result, expected) -def test_read_csv_multiindex_columns(): +def test_read_csv_multiindex_columns(all_parsers): # GH#6051 + parser = all_parsers + s1 = "Male, Male, Male, Female, Female\nR, R, L, R, R\n.86, .67, .88, .78, .81" s2 = ( "Male, Male, Male, Female, Female\n" @@ -565,7 +567,7 @@ def test_read_csv_multiindex_columns(): [[0.86, 0.67, 0.88, 0.78, 0.81], [0.86, 0.67, 0.88, 0.78, 0.82]], columns=mi ) - df1 = read_csv(StringIO(s1), header=[0, 1]) + df1 = parser.read_csv(StringIO(s1), header=[0, 1]) tm.assert_frame_equal(df1, expected.iloc[:1]) - df2 = read_csv(StringIO(s2), header=[0, 1]) + df2 = parser.read_csv(StringIO(s2), header=[0, 1]) tm.assert_frame_equal(df2, expected)