From aaf4e4260f603b3ed3eb3bb9774dea585ac098f0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 17 May 2020 11:41:48 +0100 Subject: [PATCH 01/15] quote string elements --- pandas/io/formats/format.py | 2 ++ pandas/tests/arrays/categorical/test_repr.py | 30 +++++++++++++------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 02339f4344d4d..68eeda431b9b4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1237,6 +1237,8 @@ def _format(x): fmt_values.append(f" {_format(v)}") elif is_float_type[i]: fmt_values.append(float_format(v)) + elif isinstance(v, str): + fmt_values.append(f"'{v}'") else: if leading_space is False: # False specifically, so that the default is diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index d08c4b47dd3cb..3164b54653efd 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -14,7 +14,10 @@ class TestCategoricalReprWithFactor(TestCategorical): def test_print(self): - expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] + expected = [ + "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", + "Categories (3, object): ['a' < 'b' < 'c']", + ] expected = "\n".join(expected) actual = repr(self.factor) assert actual == expected @@ -24,9 +27,9 @@ class TestCategoricalRepr: def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) expected = [ - "[a, b, c, a, b, ..., b, c, a, b, c]", + "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", "Length: 600", - "Categories (3, object): [a, b, c]", + "Categories (3, object): ['a', 'b', 'c']", ] expected = "\n".join(expected) @@ -36,13 +39,13 @@ def test_big_print(self): def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) - expected = "[], Categories (3, object): [a, b, c]" + expected = "[], Categories (3, object): ['a', 'b', 'c']" actual = repr(factor) assert actual == expected assert expected == actual factor = Categorical([], ["a", "b", "c"], ordered=True) - expected = "[], Categories (3, object): [a < b < c]" + expected = "[], Categories (3, object): ['a' < 'b' < 'c']" actual = repr(factor) assert expected == actual @@ -64,17 +67,17 @@ def test_print_none_width(self): def test_unicode_print(self): c = Categorical(["aaaaa", "bb", "cccc"] * 20) expected = """\ -[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] +['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] Length: 60 -Categories (3, object): [aaaaa, bb, cccc]""" +Categories (3, object): ['aaaaa', 'bb', 'cccc']""" assert repr(c) == expected c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) expected = """\ -[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa assert repr(c) == expected @@ -83,9 +86,9 @@ def test_unicode_print(self): with option_context("display.unicode.east_asian_width", True): c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) - expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] + expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa assert repr(c) == expected @@ -523,3 +526,8 @@ def test_categorical_index_repr_timedelta_ordered(self): categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa assert repr(i) == exp + + def test_categorical_str_repr(self): + result = repr(Categorical([1, "2", 3, 4])) + expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" + assert result == expected From 1f90a62964ca4dd3326aa5917c07efe7e7aaf275 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Sun, 17 May 2020 12:37:43 +0100 Subject: [PATCH 02/15] wip --- failures | 1584 +++++++++++++++++ pandas/tests/arrays/string_/test_string.py | 4 +- pandas/tests/frame/test_repr_info.py | 14 +- pandas/tests/util/test_assert_series_equal.py | 8 +- 4 files changed, 1597 insertions(+), 13 deletions(-) create mode 100644 failures diff --git a/failures b/failures new file mode 100644 index 0000000000000..b30ae124e6201 --- /dev/null +++ b/failures @@ -0,0 +1,1584 @@ +_ TestIntervalIndexRendering.test_repr_missing[Series-(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c\ndtype: object] _ +[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = +constructor = +expected = '(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c\ndtype: object' + + @pytest.mark.parametrize( + "constructor,expected", + [ + ( + Series, + ( + "(0.0, 1.0] a\n" + "NaN b\n" + "(2.0, 3.0] c\n" + "dtype: object" + ), + ), + (DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")), + ], + ) + def test_repr_missing(self, constructor, expected): + # GH 25984 + index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) + obj = constructor(list("abc"), index=index) + result = repr(obj) +> assert result == expected +E AssertionError: assert '(0.0, 1.0] ...dtype: object' == '(0.0, 1.0] ...dtype: object' +E - (0.0, 1.0] a +E ? ^ +E + (0.0, 1.0] 'a' +E ? ^ + +E - NaN b +E ? ^ +E + NaN 'b'... +E +E ...Full output truncated (7 lines hidden), use '-vv' to show + +pandas/tests/indexes/interval/test_formats.py:38: AssertionError +_ TestIntervalIndexRendering.test_repr_missing[DataFrame- 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c] _ +[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = +constructor = +expected = ' 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c' + + @pytest.mark.parametrize( + "constructor,expected", + [ + ( + Series, + ( + "(0.0, 1.0] a\n" + "NaN b\n" + "(2.0, 3.0] c\n" + "dtype: object" + ), + ), + (DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")), + ], + ) + def test_repr_missing(self, constructor, expected): + # GH 25984 + index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) + obj = constructor(list("abc"), index=index) + result = repr(obj) +> assert result == expected +E assert " ...2.0, 3.0] 'c'" == ' ...(2.0, 3.0] c' +E - 0 +E + 0 +E ? + +E - (0.0, 1.0] a +E ? ^ +E + (0.0, 1.0] 'a' +E ? ^ +... +E +E ...Full output truncated (9 lines hidden), use '-vv' to show + +pandas/tests/indexes/interval/test_formats.py:38: AssertionError +_________________ TestReadHtml.test_multiple_header_rows[bs4] __________________ +[gw2] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame( + data=[("Hillary", 68, "D"), ("Bernie", 74, "D"), ("Donald", 69, "R")] + ) + expected_df.columns = [ + ["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", "Unnamed: 2_level_1"], + ] + html = expected_df.to_html(index=False) + html_df = self.read_html(html)[0] +> tm.assert_frame_equal(expected_df, html_df) + +pandas/tests/io/test_html.py:1079: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +pandas/_libs/testing.pyx:68: in pandas._libs.testing.assert_almost_equal + cpdef assert_almost_equal(a, b, +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) +E AssertionError: DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") are different +E +E DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") values are different (100.0 %) +E [index]: [0, 1, 2] +E [left]: [Hillary, Bernie, Donald] +E [right]: ['Hillary', 'Bernie', 'Donald'] + +pandas/_libs/testing.pyx:183: AssertionError +_________________ TestReadHtml.test_multiple_header_rows[lxml] _________________ +[gw2] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_multiple_header_rows(self): + # Issue #13434 + expected_df = DataFrame( + data=[("Hillary", 68, "D"), ("Bernie", 74, "D"), ("Donald", 69, "R")] + ) + expected_df.columns = [ + ["Unnamed: 0_level_0", "Age", "Party"], + ["Name", "Unnamed: 1_level_1", "Unnamed: 2_level_1"], + ] + html = expected_df.to_html(index=False) + html_df = self.read_html(html)[0] +> tm.assert_frame_equal(expected_df, html_df) + +pandas/tests/io/test_html.py:1079: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +pandas/_libs/testing.pyx:68: in pandas._libs.testing.assert_almost_equal + cpdef assert_almost_equal(a, b, +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) +E AssertionError: DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") are different +E +E DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") values are different (100.0 %) +E [index]: [0, 1, 2] +E [left]: [Hillary, Bernie, Donald] +E [right]: ['Hillary', 'Bernie', 'Donald'] + +pandas/_libs/testing.pyx:183: AssertionError +_________________ TestDataFrameFormatting.test_repr_truncation _________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_repr_truncation(self): + max_len = 20 + with option_context("display.max_colwidth", max_len): + df = DataFrame( + { + "A": np.random.randn(10), + "B": [ + tm.rands(np.random.randint(max_len - 1, max_len + 1)) + for i in range(10) + ], + } + ) + r = repr(df) + r = r[r.find("\n") + 1 :] + + adj = fmt._get_adjustment() + + for line, value in zip(r.split("\n"), df["B"]): + if adj.len(value) + 1 > max_len: + assert "..." in line + else: +> assert "..." not in line +E AssertionError: assert '...' not in '0 -0.426290...7OEA0dVWe...' +E '...' is contained here: +E 0 -0.426290 '2fAo1sX7OEA0dVWe... +E ? +++ + +pandas/tests/io/formats/test_format.py:234: AssertionError +________________ TestDataFrameFormatting.test_str_max_colwidth _________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_str_max_colwidth(self): + # GH 7856 + df = pd.DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "uncomfortably long line with lots of stuff", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) +> assert str(df) == ( + " a b c d\n" + "0 foo bar uncomfortably long line with lots of stuff 1\n" + "1 foo bar stuff 1" + ) +E assert " a ... 'stuff' 1" == ' a b ... stuff 1' +E - a b c d +E + a b c d +E ? + + + +E - 0 foo bar uncomfortably long line with lots of stuff 1 +E ? ^ ^^ ^^ +E + 0 'foo' 'bar' 'uncomfortably long line with lots of stuff' 1 +E ? ^ ^^^ ^^^ +... +E +E ...Full output truncated (3 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:524: AssertionError +_______________ TestDataFrameFormatting.test_to_string_truncate ________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_string_truncate(self): + # GH 9784 - dont truncate when calling DataFrame.to_string + df = pd.DataFrame( + [ + { + "a": "foo", + "b": "bar", + "c": "let's make this a very VERY long line that is longer " + "than the default 50 character limit", + "d": 1, + }, + {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, + ] + ) + df.set_index(["a", "b", "c"]) +> assert df.to_string() == ( + " a b " + " c d\n" + "0 foo bar let's make this a very VERY long line t" + "hat is longer than the default 50 character limit 1\n" + "1 foo bar " + " stuff 1" + ) +E assert " a ... 'stuff' 1" == ' a b ... stuff 1' +E - a b c d +E + a b c d +E ? + + + +E - 0 foo bar let's make this a very VERY long line that is longer than the default 50 character limit 1 +E ? ^ ^^ ^^ +E + 0 'foo' 'bar' 'let's make this a very VERY long line that is longer than the default 50 character limit' 1 +E ? ^ ^^^ ^^^ ... +E +E ...Full output truncated (3 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:551: AssertionError +____________ TestDataFrameFormatting.test_east_asian_unicode_false _____________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_east_asian_unicode_false(self): + # not aligned properly because of east asian width + + # mid col + df = DataFrame( + {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) +> assert repr(df) == expected +E assert " a...えええええ' 4" == ' a ...ええええええ 4' +E - a b +E + a b +E ? + +E - a あ 1 +E ? ^ +E + a 'あ' 1 +E ? ^ +... +E +E ...Full output truncated (13 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:737: AssertionError +_____________ TestDataFrameFormatting.test_east_asian_unicode_true _____________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_east_asian_unicode_true(self): + # Enable Unicode option ----------------------------------------- + with option_context("display.unicode.east_asian_width", True): + + # mid col + df = DataFrame( + {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, + index=["a", "bb", "c", "ddd"], + ) + expected = ( + " a b\na あ 1\n" + "bb いいい 222\nc う 33333\n" + "ddd ええええええ 4" + ) +> assert repr(df) == expected +E assert " ...えええええ' 4" == ' ...ええええええ 4' +E - a b +E + a b +E ? + +E - a あ 1 +E ? ^ +E + a 'あ' 1 +E ? ^ +... +E +E ...Full output truncated (13 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:875: AssertionError +_________________ TestDataFrameFormatting.test_index_with_nan __________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_index_with_nan(self): + # GH 2850 + df = DataFrame( + { + "id1": {0: "1a3", 1: "9h4"}, + "id2": {0: np.nan, 1: "d67"}, + "id3": {0: "78d", 1: "79d"}, + "value": {0: 123, 1: 64}, + } + ) + + # multi-index + y = df.set_index(["id1", "id2", "id3"]) + result = y.to_string() + expected = ( + " value\nid1 id2 id3 \n" + "1a3 NaN 78d 123\n9h4 d67 79d 64" + ) + assert result == expected + + # index + y = df.set_index("id2") + result = y.to_string() + expected = ( + " id1 id3 value\nid2 \n" + "NaN 1a3 78d 123\nd67 9h4 79d 64" + ) +> assert result == expected +E assert " id1 ... '79d' 64" == ' id1 id...4 79d 64' +E - id1 id3 value +E + id1 id3 value +E ? + + +E - id2 +E + id2 +E ? ++ +E - NaN 1a3 78d 123... +E +E ...Full output truncated (8 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:1398: AssertionError +____________________ TestDataFrameFormatting.test_to_string ____________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_string(self): + + # big mixed + biggie = DataFrame( + {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, + index=np.arange(200), + ) + + biggie.loc[:20, "A"] = np.nan + biggie.loc[:20, "B"] = np.nan + s = biggie.to_string() + + buf = StringIO() + retval = biggie.to_string(buf=buf) + assert retval is None + assert buf.getvalue() == s + + assert isinstance(s, str) + + # print in right order + result = biggie.to_string( + columns=["B", "A"], col_space=17, float_format="%.5f".__mod__ + ) + lines = result.split("\n") + header = lines[0].strip().split() + joined = "\n".join(re.sub(r"\s+", " ", x).strip() for x in lines[1:]) + recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") +> tm.assert_series_equal(recons["B"], biggie["B"]) + +pandas/tests/io/formats/test_format.py:1475: +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ +pandas/_libs/testing.pyx:68: in pandas._libs.testing.assert_almost_equal + cpdef assert_almost_equal(a, b, +_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ + +> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) +E AssertionError: Series are different +E +E Series values are different (89.5 %) +E [index]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...] +E [left]: [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 'ReUMMYOzRc', 'm8JaaoOinz', 'sObNzAa9Sb', 'HRKG2ackB4', '0azcKU0pMp', '2mIvVr5itT', 'rGUpLDHyCl', '82UUP3PsXe', '05Wgq2rheA', 'E5jcBVizmI', 's2BXU0YHCQ', 'frVcLBb65o', 'wAMPDhKWPK', 'ASORqAK5Jk', 'YeJ7LtATFI', 'IzEShI7kI1', 'OABJwASxEY', 'Jr8okkyRpq', 'Rxj4uDCiyu', 'gmf0SkUai3', 'HpgiaSbjR2', 'xtZWYDfYEO', 'VUzBhy7urU', 'uLUGvTFnqx', 'FZx3FplLeC', 'U9hpEhF5ss', 'TpJdnJ7Nzd', '1phREHhRsM', 'NssXhnFpKV', 'hEqW5irrLk', 'Twuj74zdTH', '7ltutV1O47', '2Ipsj424K8', 'G3dbMMYkgz', 'rxAsSNQ8qn', 'RqrQj2Ozfo', 'A07eRJDOxX', 'x0Y1kwZMv2', 'k6jyXkGdWb', 'Yl5NkpyVKl', 'C03IGiW6zr', 'mVbiXcVTtM', 'VOmlVUmdh0', '481dbvfltM', 'lwNNDPVPSI', 'rha2YAX39o', 'zJzMvKWl7A', 'cWoM5F49zZ', 'zMqfkqXpdR', '7pvDnY8NR5', 'J8XExS3CMA', '5WNzD0xErD', 'cy9DDrahxY', 'IEgi5viuyF', 'hs8VgWm3C6', 'KP458GphgF', 'TO8oGkA5NL', 'dDuScWreSw', 'kyIHeh9Vwl', 'BZuhU8x2S9', 'f7p4PNZ8Vr', 'U1EiHy397b', 'OIvNL6oxOH', 'CuaVgCNtzX', 'vsFWaKUFbS', 'byfE947n2S', 'LctPcyhLuE', 't8JOJggu2o', 'nGJhNJvSSp', 'QmYVoLdp4Y', '1lr9fM7yVS', 'kNzzRNrcrm', '6uFClL9u62', 'gaTYFoh9zk', 'GoC88dpzsj', 'OLp31oEzCo', 'hdM3S6VYVH', 'jmBW6pvJbd', 'UpU6XuGjCP', ...] +E [right]: [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, ReUMMYOzRc, m8JaaoOinz, sObNzAa9Sb, HRKG2ackB4, 0azcKU0pMp, 2mIvVr5itT, rGUpLDHyCl, 82UUP3PsXe, 05Wgq2rheA, E5jcBVizmI, s2BXU0YHCQ, frVcLBb65o, wAMPDhKWPK, ASORqAK5Jk, YeJ7LtATFI, IzEShI7kI1, OABJwASxEY, Jr8okkyRpq, Rxj4uDCiyu, gmf0SkUai3, HpgiaSbjR2, xtZWYDfYEO, VUzBhy7urU, uLUGvTFnqx, FZx3FplLeC, U9hpEhF5ss, TpJdnJ7Nzd, 1phREHhRsM, NssXhnFpKV, hEqW5irrLk, Twuj74zdTH, 7ltutV1O47, 2Ipsj424K8, G3dbMMYkgz, rxAsSNQ8qn, RqrQj2Ozfo, A07eRJDOxX, x0Y1kwZMv2, k6jyXkGdWb, Yl5NkpyVKl, C03IGiW6zr, mVbiXcVTtM, VOmlVUmdh0, 481dbvfltM, lwNNDPVPSI, rha2YAX39o, zJzMvKWl7A, cWoM5F49zZ, zMqfkqXpdR, 7pvDnY8NR5, J8XExS3CMA, 5WNzD0xErD, cy9DDrahxY, IEgi5viuyF, hs8VgWm3C6, KP458GphgF, TO8oGkA5NL, dDuScWreSw, kyIHeh9Vwl, BZuhU8x2S9, f7p4PNZ8Vr, U1EiHy397b, OIvNL6oxOH, CuaVgCNtzX, vsFWaKUFbS, byfE947n2S, LctPcyhLuE, t8JOJggu2o, nGJhNJvSSp, QmYVoLdp4Y, 1lr9fM7yVS, kNzzRNrcrm, 6uFClL9u62, gaTYFoh9zk, GoC88dpzsj, OLp31oEzCo, hdM3S6VYVH, jmBW6pvJbd, UpU6XuGjCP, ...] + +pandas/_libs/testing.pyx:183: AssertionError +_______________ TestDataFrameFormatting.test_to_string_no_index ________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_string_no_index(self): + # GH 16839, GH 13032 + df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) + + df_s = df.to_string(index=False) + # Leading space is expected for positive numbers. + expected = " x y z\n 11 33 AAA\n 22 -44 " +> assert df_s == expected +E assert " x y ... 22 -44 ' '" == ' x y z...n 22 -44 ' +E - x y z +E + x y z +E ? + +E - 11 33 AAA +E ? ^ +E + 11 33 'AAA' +E ? ^ +... +E +E ...Full output truncated (3 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:1522: AssertionError +_______________ TestDataFrameFormatting.test_to_string_format_na _______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_string_format_na(self): + tm.reset_display_options() + df = DataFrame( + { + "A": [np.nan, -1, -2.1234, 3, 4], + "B": [np.nan, "foo", "foooo", "fooooo", "bar"], + } + ) + result = df.to_string() + + expected = ( + " A B\n" + "0 NaN NaN\n" + "1 -1.0000 foo\n" + "2 -2.1234 foooo\n" + "3 3.0000 fooooo\n" + "4 4.0000 bar" + ) +> assert result == expected +E assert " A ...0000 'bar'" == ' A ....0000 bar' +E - A B +E + A B +E ? + +E - 0 NaN NaN +E + 0 NaN NaN +E ? + +E - 1 -1.0000 foo... +E +E ...Full output truncated (16 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:1731: AssertionError +_____________________ TestDataFrameFormatting.test_period ______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_period(self): + # GH 12615 + df = pd.DataFrame( + { + "A": pd.period_range("2013-01", periods=4, freq="M"), + "B": [ + pd.Period("2011-01", freq="M"), + pd.Period("2011-02-01", freq="D"), + pd.Period("2011-03-01 09:00", freq="H"), + pd.Period("2011-04", freq="M"), + ], + "C": list("abcd"), + } + ) + exp = ( + " A B C\n" + "0 2013-01 2011-01 a\n" + "1 2013-02 2011-02-01 b\n" + "2 2013-03 2011-03-01 09:00 c\n" + "3 2013-04 2011-04 d" + ) +> assert str(df) == exp +E assert " A ... 2011-04 'd'" == ' A ... 2011-04 d' +E - A B C +E + A B C +E ? + +E - 0 2013-01 2011-01 a +E ? ^ +E + 0 2013-01 2011-01 'a' +E ? ^ +... +E +E ...Full output truncated (13 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:2138: AssertionError +__________________ TestSeriesFormatting.test_to_string_mixed ___________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_string_mixed(self): + s = Series(["foo", np.nan, -1.23, 4.56]) + result = s.to_string() + expected = "0 foo\n" + "1 NaN\n" + "2 -1.23\n" + "3 4.56" + assert result == expected + + # but don't count NAs as floats + s = Series(["foo", np.nan, "bar", "baz"]) + result = s.to_string() + expected = "0 foo\n" + "1 NaN\n" + "2 bar\n" + "3 baz" +> assert result == expected +E assert "0 'foo'\n1...r'\n3 'baz'" == '0 foo\n1 ...bar\n3 baz' +E - 0 foo +E ? ^ +E + 0 'foo' +E ? ^ + +E - 1 NaN +E + 1 NaN +E ? +... +E +E ...Full output truncated (9 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:2210: AssertionError +_____________ TestSeriesFormatting.test_east_asian_unicode_series ______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_east_asian_unicode_series(self): + # not aligned properly because of east asian width + + # unicode index + s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) + expected = "あ a\nいい bb\nううう CCC\nええええ D\ndtype: object" +> assert repr(s) == expected +E assert "あ 'a'...dtype: object" == 'あ a\...dtype: object' +E - あ a +E ? ^ +E + あ 'a' +E ? ^ + +E - いい bb +E ? ^ +E + いい 'bb'... +E +E ...Full output truncated (11 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:2249: AssertionError +__________________ TestSeriesFormatting.test_format_explicit ___________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_format_explicit(self): + test_sers = gen_series_formatting() + with option_context("display.max_rows", 4, "display.show_dimensions", False): + res = repr(test_sers["onel"]) + exp = "0 a\n1 a\n ..\n98 a\n99 a\ndtype: object" +> assert exp == res +E assert '0 a\n1 ...dtype: object' == "0 'a'\n1 ...dtype: object" +E - 0 'a' +E ? ^ - +E + 0 a +E ? ^ +E - 1 'a' +E ? ^ - +E + 1 a... +E +E ...Full output truncated (14 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_format.py:2666: AssertionError +_____________________ test_to_html_unicode[df1-unicode_2] ______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +df = A +0 'σ' +expected = '\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0σ
' +datapath = .deco at 0x7f3c3989a1f0> + + @pytest.mark.parametrize( + "df,expected", + [ + (DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"), + (DataFrame({"A": ["\u03c3"]}), "unicode_2"), + ], + ) + def test_to_html_unicode(df, expected, datapath): + expected = expected_html(datapath, expected) + result = df.to_html() +> assert result == expected +E AssertionError: assert '\n
' == '\n
' +E Skipping 173 identical leading characters in diff, use -v to show +E - σ +E + 'σ' +E ? + + +E +E +E + +pandas/tests/io/formats/test_to_html.py:99: AssertionError +______________ test_to_html_escaped[kwargs0--escaped] ______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +kwargs = {}, string = "" +expected = '\n \n \n \n \n \n \n \n \n
co&l...p;amp;<type \'str\'><type \'str\'>
' +datapath = .deco at 0x7f3c33ab65e0> + + @pytest.mark.parametrize( + "kwargs,string,expected", + [ + (dict(), "", "escaped"), + (dict(escape=False), "bold", "escape_disabled"), + ], + ) + def test_to_html_escaped(kwargs, string, expected, datapath): + a = "strl2": {a: string, b: string}} + result = DataFrame(test_dict).to_html(**kwargs) + expected = expected_html(datapath, expected) +> assert result == expected +E AssertionError: assert '\n
' == '\n
' +E Skipping 224 identical leading characters in diff, use -v to show +E - <type 'str'> +E + '<type 'str'>' +E ? + + +E - <type 'str'> +E + '<type 'str'>' +E ? + +... +E +E ...Full output truncated (13 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:132: AssertionError +__________ test_to_html_escaped[kwargs1-bold-escape_disabled] ___________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +kwargs = {'escape': False}, string = 'bold' +expected = '\n \n \n \n \n \n \n \n \n
costri>ng2 &boldbold
' +datapath = .deco at 0x7f3c33ab6160> + + @pytest.mark.parametrize( + "kwargs,string,expected", + [ + (dict(), "", "escaped"), + (dict(escape=False), "bold", "escape_disabled"), + ], + ) + def test_to_html_escaped(kwargs, string, expected, datapath): + a = "strl2": {a: string, b: string}} + result = DataFrame(test_dict).to_html(**kwargs) + expected = expected_html(datapath, expected) +> assert result == expected +E AssertionError: assert '\n
' == '\n
' +E Skipping 211 identical leading characters in diff, use -v to show +E - bold +E + 'bold' +E ? + + +E - bold +E + 'bold' +E ? + +... +E +E ...Full output truncated (13 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:132: AssertionError +_____________ test_to_html_multiindex[columns0-left-multiindex_1] ______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +columns = MultiIndex([(0, 0), + (0, 1), + (1, 0), + (1, 1)], + names=['CL0', 'CL1']) +justify = 'left' +expected = '\n \n \n \n \n \n \n \n \n \n \n
CL00<... 1efgh
' +datapath = .deco at 0x7f3c33ab6820> + + @pytest.mark.parametrize( + "columns,justify,expected", + [ + ( + MultiIndex.from_tuples( + list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), + names=["CL0", "CL1"], + ), + "left", + "multiindex_1", + ), + ( + MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), + "right", + "multiindex_2", + ), + ], + ) + def test_to_html_multiindex(columns, justify, expected, datapath): + df = DataFrame([list("abcd"), list("efgh")], columns=columns) + result = df.to_html(justify=justify) + expected = expected_html(datapath, expected) +> assert result == expected +E AssertionError: assert '\n
' == '\n
' +E Skipping 324 identical leading characters in diff, use -v to show +E - a +E + 'a' +E ? + + +E - b +E + 'b' +E ? + +... +E +E ...Full output truncated (25 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:376: AssertionError +_____________ test_to_html_multiindex[columns1-right-multiindex_2] _____________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +columns = MultiIndex([(0, 0), + (1, 1), + (2, 0), + (3, 1)], + ) +justify = 'right' +expected = '\n \n \n \n \n \n ... \n \n \n \n \n \n \n
011efgh
' +datapath = .deco at 0x7f3c33ab6790> + + @pytest.mark.parametrize( + "columns,justify,expected", + [ + ( + MultiIndex.from_tuples( + list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), + names=["CL0", "CL1"], + ), + "left", + "multiindex_1", + ), + ( + MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), + "right", + "multiindex_2", + ), + ], + ) + def test_to_html_multiindex(columns, justify, expected, datapath): + df = DataFrame([list("abcd"), list("efgh")], columns=columns) + result = df.to_html(justify=justify) + expected = expected_html(datapath, expected) +> assert result == expected +E AssertionError: assert '\n
' == '\n
' +E Skipping 300 identical leading characters in diff, use -v to show +E - a +E + 'a' +E ? + + +E - b +E + 'b' +E ? + +... +E +E ...Full output truncated (25 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:376: AssertionError +______________________________ test_to_html_index ______________________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +datapath = .deco at 0x7f3c33ab6a60> + + def test_to_html_index(datapath): + # TODO: split this test + index = ["foo", "bar", "baz"] + df = DataFrame( + {"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]}, + columns=["A", "B", "C"], + index=index, + ) + expected_with_index = expected_html(datapath, "index_1") +> assert df.to_html() == expected_with_index +E AssertionError: assert '\n
' == '\n
' +E Skipping 245 identical leading characters in diff, use -v to show +E - one +E + 'one' +E ? + + +E +E +E bar... +E +E ...Full output truncated (15 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:410: AssertionError +______________ test_to_html_render_links[True-render_links_true] _______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +render_links = True +expected = '\n \n \n \n \n \n \n \n \n \n
foo<...th>10www.pydata.orgpydata.org
' +datapath = .deco at 0x7f3c33a40ca0> + + @pytest.mark.parametrize( + "render_links,expected", + [(True, "render_links_true"), (False, "render_links_false")], + ) + def test_to_html_render_links(render_links, expected, datapath): + # GH 2679 + data = [ + [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"], + [0, "www.pydata.org", "pydata.org"], + ] + df = DataFrame(data, columns=["foo", "bar", None]) + + result = df.to_html(render_links=render_links) + expected = expected_html(datapath, expected) +> assert result == expected +E assert '\n
' == '\n
' +E Skipping 231 identical leading characters in diff, use -v to show +E - https://pandas.pydata.org/?q1=a&q2=b +E + 'https://pandas.pydata.org/?q1=a&q2=b' +E - pydata.org +E + 'pydata.org' +E ? + + +E ... +E +E ...Full output truncated (13 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:698: AssertionError +_____________ test_to_html_render_links[False-render_links_false] ______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +render_links = False +expected = '\n \n \n \n \n \n \n \n \n \n
foo<...th>10www.pydata.orgpydata.org
' +datapath = .deco at 0x7f3c33a40b80> + + @pytest.mark.parametrize( + "render_links,expected", + [(True, "render_links_true"), (False, "render_links_false")], + ) + def test_to_html_render_links(render_links, expected, datapath): + # GH 2679 + data = [ + [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"], + [0, "www.pydata.org", "pydata.org"], + ] + df = DataFrame(data, columns=["foo", "bar", None]) + + result = df.to_html(render_links=render_links) + expected = expected_html(datapath, expected) +> assert result == expected +E AssertionError: assert '\n
' == '\n
' +E Skipping 231 identical leading characters in diff, use -v to show +E - https://pandas.pydata.org/?q1=a&q2=b +E + 'https://pandas.pydata.org/?q1=a&q2=b' +E ? + + +E - pydata.org +E + 'pydata.org' +E ? + +... +E +E ...Full output truncated (14 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_html.py:698: AssertionError +__________________________ TestToLatex.test_to_latex ___________________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = +float_frame = A B C D +uj3aupNrPB 0.764807 -0.195406 0.356168 -0.179633 +RKPro2v73m -2.46....453160 -0.238971 +1etvcSah90 0.276267 1.277534 0.532934 -0.552912 +R2nHvbi4bL -0.744058 -0.476803 -0.115628 -0.261460 + + def test_to_latex(self, float_frame): + # it works! + float_frame.to_latex() + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex() + withindex_expected = r"""\begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{lrl} +E \toprule +E - {} & a & b \\ +E + {} & a & b \\ +E ? + +E \midrule +E - 0 & 1 & b1 \\... +E +E ...Full output truncated (10 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:48: AssertionError +_______________________ TestToLatex.test_to_latex_format _______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = +float_frame = A B C D +Wtc0ZRSd9n 0.818695 -0.157144 -0.145023 0.203286 +bkVL7MON9p 0.09....145206 -0.713457 +CAq6YHmEYL 0.443476 1.554402 1.031790 0.715314 +gRKxa8k8At -1.380405 0.488245 1.099418 -1.594554 + + def test_to_latex_format(self, float_frame): + # GH Bug #9402 + float_frame.to_latex(column_format="ccc") + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(column_format="ccc") + withindex_expected = r"""\begin{tabular}{ccc} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{ccc} +E \toprule +E - {} & a & b \\ +E + {} & a & b \\ +E ? + +E \midrule +E - 0 & 1 & b1 \\... +E +E ...Full output truncated (10 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:79: AssertionError +_____________________ TestToLatex.test_to_latex_multiindex _____________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_multiindex(self): + df = DataFrame({("x", "y"): ["a"]}) + result = df.to_latex() + expected = r"""\begin{tabular}{ll} + \toprule + {} & x \\ + {} & y \\ + \midrule + 0 & a \\ + \bottomrule + \end{tabular} + """ + +> assert result == expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{ll} +E \toprule +E - {} & x \\ +E + {} & x \\ +E ? + +E - {} & y \\ +E + {} & y \\... +E +E ...Full output truncated (9 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:152: AssertionError +_______________________ TestToLatex.test_to_latex_escape _______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_escape(self): + a = "a" + b = "b" + + test_dict = {"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}} + + unescaped_result = DataFrame(test_dict).to_latex(escape=False) + escaped_result = DataFrame(test_dict).to_latex() # default: escape=True + + unescaped_expected = r"""\begin{tabular}{lll} + \toprule + {} & co$e^x$ & co^l1 \\ + \midrule + a & a & a \\ + b & b & b \\ + \bottomrule + \end{tabular} + """ + + escaped_expected = r"""\begin{tabular}{lll} + \toprule + {} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\ + \midrule + a & a & a \\ + b & b & b \\ + \bottomrule + \end{tabular} + """ + +> assert unescaped_result == unescaped_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E Skipping 61 identical leading characters in diff, use -v to show +E e +E - a & a & a \\ +E ? ^^ ^^ +E + a & 'a' & 'a' \\ +E ? ^ + ^ + +E - b & b & b \\... +E +E ...Full output truncated (6 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:372: AssertionError +___________________ TestToLatex.test_to_latex_special_escape ___________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_special_escape(self): + df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"]) + + escaped_result = df.to_latex() + escaped_expected = r"""\begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & a\textbackslash b\textbackslash c \\ + 1 & \textasciicircum a\textasciicircum b\textasciicircum c \\ + 2 & \textasciitilde a\textasciitilde b\textasciitilde c \\ + \bottomrule + \end{tabular} + """ +> assert escaped_result == escaped_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{ll} +E \toprule +E - {} & 0 \\ +E + {} & 0 \\ +E ? + +E \midrule +E - 0 & a\textbackslash b\textbackslash c \\... +E +E ...Full output truncated (14 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:389: AssertionError +_____________________ TestToLatex.test_to_latex_longtable ______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_longtable(self): + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(longtable=True) + withindex_expected = r"""\begin{longtable}{lrl} + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{long...{longtable}\n' == '\\begin{long...{longtable}\n' +E Skipping 34 identical leading characters in diff, use -v to show +E - & a & b \\ +E + & a & b \\ +E ? + +E \midrule +E \endhead +E \midrule... +E +E ...Full output truncated (16 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:411: AssertionError +___________________ TestToLatex.test_to_latex_caption_label ____________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_caption_label(self): + # GH 25436 + the_caption = "a table in a \\texttt{table/tabular} environment" + the_label = "tab:table_tabular" + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + # test when only the caption is provided + result_c = df.to_latex(caption=the_caption) + + expected_c = r"""\begin{table} + \centering + \caption{a table in a \texttt{table/tabular} environment} + \begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + \end{table} + """ +> assert result_c == expected_c +E AssertionError: assert '\\begin{tabl...\end{table}\n' == '\\begin{tabl...\end{table}\n' +E Skipping 115 identical leading characters in diff, use -v to show +E - & a & b \\ +E + & a & b \\ +E ? + +E \midrule +E - 0 & 1 & b1 \\ +E ? ^... +E +E ...Full output truncated (10 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:464: AssertionError +______________ TestToLatex.test_to_latex_longtable_caption_label _______________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_longtable_caption_label(self): + # GH 25436 + the_caption = "a table in a \\texttt{longtable} environment" + the_label = "tab:longtable" + + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + + # test when only the caption is provided + result_c = df.to_latex(longtable=True, caption=the_caption) + + expected_c = r"""\begin{longtable}{lrl} + \caption{a table in a \texttt{longtable} environment}\\ + \toprule + {} & a & b \\ + \midrule + \endhead + \midrule + \multicolumn{3}{r}{{Continued on next page}} \\ + \midrule + \endfoot + + \bottomrule + \endlastfoot + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \end{longtable} + """ +> assert result_c == expected_c +E AssertionError: assert '\\begin{long...{longtable}\n' == '\\begin{long...{longtable}\n' +E Skipping 90 identical leading characters in diff, use -v to show +E - & a & b \\ +E + & a & b \\ +E ? + +E \midrule +E \endhead +E \midrule... +E +E ...Full output truncated (16 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:530: AssertionError +________________ TestToLatex.test_to_latex_escape_special_chars ________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_escape_special_chars(self): + special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"] + df = DataFrame(data=special_characters) + observed = df.to_latex() + expected = r"""\begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & \& \\ + 1 & \% \\ + 2 & \$ \\ + 3 & \# \\ + 4 & \_ \\ + 5 & \{ \\ + 6 & \} \\ + 7 & \textasciitilde \\ + 8 & \textasciicircum \\ + 9 & \textbackslash \\ + \bottomrule + \end{tabular} + """ + +> assert observed == expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{ll} +E \toprule +E - {} & 0 \\ +E + {} & 0 \\ +E ? + +E \midrule +E - 0 & \& \\... +E +E ...Full output truncated (42 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:598: AssertionError +_____________________ TestToLatex.test_to_latex_no_header ______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_no_header(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(header=False) + withindex_expected = r"""\begin{tabular}{lrl} + \toprule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{lrl} +E \toprule +E - 0 & 1 & b1 \\ +E ? ^ +E + 0 & 1 & 'b1' \\ +E ? ^ + +E - 1 & 2 & b2 \\... +E +E ...Full output truncated (6 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:612: AssertionError +__________________ TestToLatex.test_to_latex_specified_header __________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_specified_header(self): + # GH 7124 + df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(header=["AA", "BB"]) + withindex_expected = r"""\begin{tabular}{lrl} + \toprule + {} & AA & BB \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ + +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{lrl} +E \toprule +E - {} & AA & BB \\ +E + {} & AA & BB \\ +E ? + +E \midrule +E - 0 & 1 & b1 \\... +E +E ...Full output truncated (10 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:639: AssertionError +______________________ TestToLatex.test_to_latex_decimal _______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = +float_frame = A B C D +2gSI2RndbR 0.423719 -1.539780 -0.691211 -0.112028 +BnGEeeJ54Q -1.45....071262 -0.213009 +vgARWWzFsz 0.958321 0.239552 -0.934697 -0.215599 +9vVX63NXHQ 0.843613 2.314023 -0.550290 -0.708401 + + def test_to_latex_decimal(self, float_frame): + # GH 12031 + float_frame.to_latex() + + df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]}) + withindex_result = df.to_latex(decimal=",") + + withindex_expected = r"""\begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1,0 & b1 \\ + 1 & 2,1 & b2 \\ + \bottomrule + \end{tabular} + """ + +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E Skipping 34 identical leading characters in diff, use -v to show +E - a & b \\ +E + a & b \\ +E ? + +E \midrule +E - 0 & 1,0 & b1 \\ +E ? ^... +E +E ...Full output truncated (9 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:688: AssertionError +_______________________ TestToLatex.test_to_latex_series _______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_series(self): + s = Series(["a", "b", "c"]) + withindex_result = s.to_latex() + withindex_expected = r"""\begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & a \\ + 1 & b \\ + 2 & c \\ + \bottomrule + \end{tabular} + """ +> assert withindex_result == withindex_expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{ll} +E \toprule +E - {} & 0 \\ +E + {} & 0 \\ +E ? + +E \midrule +E - 0 & a \\... +E +E ...Full output truncated (14 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:703: AssertionError +_____________________ TestToLatex.test_to_latex_bold_rows ______________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_bold_rows(self): + # GH 16707 + df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + observed = df.to_latex(bold_rows=True) + expected = r"""\begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + \textbf{0} & 1 & b1 \\ + \textbf{1} & 2 & b2 \\ + \bottomrule + \end{tabular} + """ +> assert observed == expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{lrl} +E \toprule +E - {} & a & b \\ +E + {} & a & b \\ +E ? + +E \midrule +E - \textbf{0} & 1 & b1 \\... +E +E ...Full output truncated (10 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:718: AssertionError +____________________ TestToLatex.test_to_latex_no_bold_rows ____________________ +[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_to_latex_no_bold_rows(self): + # GH 16707 + df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) + observed = df.to_latex(bold_rows=False) + expected = r"""\begin{tabular}{lrl} + \toprule + {} & a & b \\ + \midrule + 0 & 1 & b1 \\ + 1 & 2 & b2 \\ + \bottomrule + \end{tabular} + """ +> assert observed == expected +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{lrl} +E \toprule +E - {} & a & b \\ +E + {} & a & b \\ +E ? + +E \midrule +E - 0 & 1 & b1 \\... +E +E ...Full output truncated (10 lines hidden), use '-vv' to show + +pandas/tests/io/formats/test_to_latex.py:733: AssertionError +___________________________ TestSeriesRepr.test_repr ___________________________ +[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = +datetime_series = 2000-01-03 0.254242 +2000-01-04 -0.951122 +2000-01-05 -0.567236 +2000-01-06 0.653409 +2000-01-07 1.750258 +200...2-08 1.424223 +2000-02-09 0.800410 +2000-02-10 0.542263 +2000-02-11 0.676446 +Freq: B, Name: ts, dtype: float64 +string_series = viK3MdxVn7 0.742605 +scvE0uHTYw -1.052345 +Bv933O5t9b 0.384895 +AH9RcVYr6i 0.779294 +zpAzWStAW1 0.822783 +Eh0...wcFrd58 -1.094470 +pQfdyl6mPL 0.548832 +MXmCRYkSNF -0.467209 +daZyob0osC -0.113301 +Name: (α, bar), dtype: float64 +object_series = mYai0pomyX 'jMRkmUlN9P' +dQlypkXvbe 'TA150g5Yr3' +bcRrHo5Yoe 'FynxFVZt5v' +VU4HUw7Cyj 'zp7XBvYrE8' +yK9fmCi27h '...'585znXWXuS' +mL7428VEve 'jUPPIg5T1J' +r2uMk60SBz 'Ks2lEcpjSW' +lGX1GOuEGg 'ji0H1UTC0r' +Name: objects, dtype: object + + def test_repr(self, datetime_series, string_series, object_series): + str(datetime_series) + str(string_series) + str(string_series.astype(int)) + str(object_series) + + str(Series(tm.randn(1000), index=np.arange(1000))) + str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1))) + + # empty + str(Series(dtype=object)) + + # with NaNs + string_series[5:7] = np.NaN + str(string_series) + + # with Nones + ots = datetime_series.astype("O") + ots[::2] = None + repr(ots) + + # various names + for name in [ + "", + 1, + 1.2, + "foo", + "\u03B1\u03B2\u03B3", + "loooooooooooooooooooooooooooooooooooooooooooooooooooong", + ("foo", "bar", "baz"), + (1, 2), + ("foo", 1, 2.3), + ("\u03B1", "\u03B2", "\u03B3"), + ("\u03B1", "bar"), + ]: + string_series.name = name + repr(string_series) + + biggie = Series( + tm.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz") + ) + repr(biggie) + + # 0 as name + ser = Series(np.random.randn(100), name=0) + rep_str = repr(ser) + assert "Name: 0" in rep_str + + # tidy repr + ser = Series(np.random.randn(1001), name=0) + rep_str = repr(ser) + assert "Name: 0" in rep_str + + ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) +> assert "\t" not in repr(ser) +E assert '\t' not in "a\\n\\r\\tf...type: object" +E '\t' is contained here: +E a\n\r\tf 'a +E +E b' +E ? + +E Name: a\n\r\td, dtype: object + +pandas/tests/series/test_repr.py:122: AssertionError +________________________ TestSeriesRepr.test_latex_repr ________________________ +[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_latex_repr(self): + result = r"""\begin{tabular}{ll} + \toprule + {} & 0 \\ + \midrule + 0 & $\alpha$ \\ + 1 & b \\ + 2 & c \\ + \bottomrule + \end{tabular} + """ + with option_context("display.latex.escape", False, "display.latex.repr", True): + s = Series([r"$\alpha$", "b", "c"]) +> assert result == s._repr_latex_() +E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' +E \begin{tabular}{ll} +E \toprule +E - {} & 0 \\ +E ? - +E + {} & 0 \\ +E \midrule +E - 0 & '$\alpha$' \\... +E +E ...Full output truncated (14 lines hidden), use '-vv' to show + +pandas/tests/series/test_repr.py:209: AssertionError +__________________ TestCategoricalRepr.test_categorical_repr ___________________ +[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python + +self = + + def test_categorical_repr(self): + a = Series(Categorical([1, 2, 3, 4])) + exp = ( + "0 1\n1 2\n2 3\n3 4\n" + + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" + ) + + assert exp == a.__str__() + + a = Series(Categorical(["a", "b"] * 25)) + exp = ( + "0 a\n1 b\n" + + " ..\n" + + "48 a\n49 b\n" + + "Length: 50, dtype: category\nCategories (2, object): [a, b]" + ) + with option_context("display.max_rows", 5): +> assert exp == repr(a) +E assert '0 a\n1 ...ject): [a, b]' == "0 'a'\n1 ...): ['a', 'b']" +E - 0 'a' +E ? ^ - +E + 0 a +E ? ^ +E - 1 'b' +E ? ^ - +E + 1 b... +E +E ...Full output truncated (17 lines hidden), use '-vv' to show + +pandas/tests/series/test_repr.py:276: AssertionError diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 2e047b5c4eb60..2e2f8f666f4e9 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -11,10 +11,10 @@ def test_repr(): df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype="string")}) - expected = " A\n0 a\n1 \n2 b" + expected = " A\n0 'a'\n1 \n2 'b'" assert repr(df) == expected - expected = "0 a\n1 \n2 b\nName: A, dtype: string" + expected = "0 'a'\n1 \n2 'b'\nName: A, dtype: string" assert repr(df.A) == expected expected = "\n['a', , 'b']\nLength: 3, dtype: string" diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index 6d786d9580542..b9fe2fc190d8d 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -66,9 +66,9 @@ def test_repr(self, float_frame): DataFrame().info(buf=buf) df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) - assert "\t" not in repr(df) - assert "\r" not in repr(df) - assert "a\n" not in repr(df) + assert "\t" in repr(df) + assert "\r" in repr(df) + assert "a\n" in repr(df) def test_repr_dimensions(self): df = DataFrame([[1, 2], [3, 4]]) @@ -123,7 +123,7 @@ def test_repr_unicode(self): df = DataFrame({"A": [uval, uval]}) result = repr(df) - ex_top = " A" + ex_top = " A" assert result.split("\n")[0].rstrip() == ex_top df = DataFrame({"A": [uval, uval]}) @@ -173,10 +173,10 @@ def test_repr_column_name_unicode_truncation_bug(self): def test_latex_repr(self): result = r"""\begin{tabular}{llll} \toprule -{} & 0 & 1 & 2 \\ +{} & 0 & 1 & 2 \\ \midrule -0 & $\alpha$ & b & c \\ -1 & 1 & 2 & 3 \\ +0 & '$\alpha$' & 'b' & 'c' \\ +1 & 1 & 2 & 3 \\ \bottomrule \end{tabular} """ diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 337a06b91e443..7846720c6db25 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -185,10 +185,10 @@ def test_series_equal_categorical_values_mismatch(check_less_precise): Series values are different \\(66\\.66667 %\\) \\[index\\]: \\[0, 1, 2\\] -\\[left\\]: \\[a, b, c\\] -Categories \\(3, object\\): \\[a, b, c\\] -\\[right\\]: \\[a, c, b\\] -Categories \\(3, object\\): \\[a, b, c\\]""" +\\[left\\]: \\['a', 'b', 'c'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\] +\\[right\\]: \\['a', 'c', 'b'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\]""" s1 = Series(Categorical(["a", "b", "c"])) s2 = Series(Categorical(["a", "c", "b"])) From 5a93a676307529a72ae04d0c8372e336e0987ffa Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 21 May 2020 09:33:28 +0100 Subject: [PATCH 03/15] wip --- .travis.yml | 3 +- README.md | 1 + doc/source/ecosystem.rst | 18 +- doc/source/user_guide/computation.rst | 18 ++ doc/source/whatsnew/v1.1.0.rst | 3 +- pandas/_libs/tslibs/frequencies.pxd | 3 +- pandas/_libs/tslibs/frequencies.pyx | 61 +++-- pandas/_libs/tslibs/np_datetime.pxd | 2 - pandas/_libs/tslibs/np_datetime.pyx | 9 - pandas/_libs/tslibs/offsets.pyx | 30 ++- pandas/_libs/tslibs/period.pyx | 6 +- pandas/_libs/tslibs/resolution.pyx | 72 +----- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/_libs/tslibs/timestamps.pxd | 2 +- pandas/_libs/tslibs/timestamps.pyx | 26 +-- pandas/core/arrays/categorical.py | 12 +- pandas/core/arrays/datetimelike.py | 11 +- pandas/core/arrays/datetimes.py | 3 + pandas/core/arrays/integer.py | 4 + pandas/core/arrays/period.py | 3 +- pandas/core/arrays/sparse/array.py | 21 +- pandas/core/arrays/timedeltas.py | 23 +- pandas/core/dtypes/missing.py | 4 +- pandas/core/groupby/groupby.py | 164 +++++++------ pandas/core/groupby/ops.py | 15 +- pandas/core/indexes/base.py | 36 +-- pandas/core/indexes/category.py | 7 - pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/datetimes.py | 19 +- pandas/core/indexes/extension.py | 4 - pandas/core/indexes/numeric.py | 4 - pandas/core/indexes/period.py | 24 +- pandas/core/indexes/range.py | 12 +- pandas/core/internals/blocks.py | 2 +- pandas/core/nanops.py | 2 +- pandas/core/ops/common.py | 24 +- pandas/core/strings.py | 17 +- pandas/core/tools/datetimes.py | 185 +++------------ pandas/core/tools/times.py | 141 +++++++++++ pandas/core/window/rolling.py | 18 +- pandas/io/formats/format.py | 9 +- pandas/plotting/_matplotlib/converter.py | 13 +- pandas/plotting/_matplotlib/timeseries.py | 6 +- pandas/tests/arithmetic/test_datetime64.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 2 +- pandas/tests/arithmetic/test_timedelta64.py | 16 +- pandas/tests/arrays/boolean/test_logical.py | 2 +- pandas/tests/arrays/categorical/test_repr.py | 26 +-- .../tests/arrays/integer/test_arithmetic.py | 5 +- pandas/tests/arrays/sparse/test_array.py | 19 ++ pandas/tests/arrays/string_/test_string.py | 6 +- pandas/tests/arrays/test_datetimelike.py | 3 +- pandas/tests/frame/test_repr_info.py | 14 +- .../indexes/categorical/test_category.py | 2 +- .../tests/indexes/interval/test_interval.py | 2 +- pandas/tests/indexes/test_base.py | 10 + pandas/tests/tools/test_to_time.py | 12 +- .../tseries/frequencies/test_freq_code.py | 61 +++-- pandas/tests/tslibs/test_period_asfreq.py | 13 +- pandas/tests/util/test_assert_series_equal.py | 8 +- pandas/tests/window/test_expanding.py | 97 +++++++- pandas/tests/window/test_rolling.py | 219 ++++++++++++++++-- pandas/tseries/frequencies.py | 9 +- pandas/tseries/offsets.py | 83 +++---- pandas/util/_decorators.py | 6 +- 65 files changed, 986 insertions(+), 672 deletions(-) create mode 100644 pandas/core/tools/times.py diff --git a/.travis.yml b/.travis.yml index 7943ca370af1a..c5dbddacc6a43 100644 --- a/.travis.yml +++ b/.travis.yml @@ -75,8 +75,7 @@ matrix: before_install: - echo "before_install" - # set non-blocking IO on travis - # https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 + # Use blocking IO on travis. Ref: https://github.com/travis-ci/travis-ci/issues/8920#issuecomment-352661024 - python -c 'import os,sys,fcntl; flags = fcntl.fcntl(sys.stdout, fcntl.F_GETFL); fcntl.fcntl(sys.stdout, fcntl.F_SETFL, flags&~os.O_NONBLOCK);' - source ci/travis_process_gbq_encryption.sh - export PATH="$HOME/miniconda3/bin:$PATH" diff --git a/README.md b/README.md index 33dfbf10ff743..7edee8d3feeed 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ [![Downloads](https://anaconda.org/conda-forge/pandas/badges/downloads.svg)](https://pandas.pydata.org) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/pydata/pandas) [![Powered by NumFOCUS](https://img.shields.io/badge/powered%20by-NumFOCUS-orange.svg?style=flat&colorA=E1523D&colorB=007D8A)](https://numfocus.org) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## What is it? diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index fd5e7c552fe0a..6c6a7f42d4b7e 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -30,7 +30,7 @@ substantial projects that you feel should be on this list, please let us know. Data cleaning and validation ---------------------------- -`pyjanitor `__ +`Pyjanitor `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Pyjanitor provides a clean API for cleaning data, using method chaining. @@ -115,7 +115,7 @@ It is very similar to the matplotlib plotting backend, but provides interactive web-based charts and maps. -`seaborn `__ +`Seaborn `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Seaborn is a Python visualization library based on @@ -136,7 +136,7 @@ provides a powerful, declarative and extremely general way to generate bespoke p Various implementations to other languages are available. A good implementation for Python users is `has2k1/plotnine `__. -`IPython Vega `__ +`IPython vega `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `IPython Vega `__ leverages `Vega @@ -147,7 +147,7 @@ A good implementation for Python users is `has2k1/plotnine `__ `Python API `__ enables interactive figures and web shareability. Maps, 2D, 3D, and live-streaming graphs are rendered with WebGL and `D3.js `__. The library supports plotting directly from a pandas DataFrame and cloud-based collaboration. Users of `matplotlib, ggplot for Python, and Seaborn `__ can convert figures into interactive web-based plots. Plots can be drawn in `IPython Notebooks `__ , edited with R or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly is free for unlimited sharing, and has `cloud `__, `offline `__, or `on-premise `__ accounts for private use. -`QtPandas `__ +`Qtpandas `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Spun off from the main pandas library, the `qtpandas `__ @@ -187,7 +187,7 @@ See :ref:`Options and Settings ` and :ref:`Available Options ` for pandas ``display.`` settings. -`quantopian/qgrid `__ +`Quantopian/qgrid `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ qgrid is "an interactive grid for sorting and filtering @@ -249,12 +249,12 @@ The following data feeds are available: * Stooq Index Data * MOEX Data -`quandl/Python `__ +`Quandl/Python `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Quandl API for Python wraps the Quandl REST API to return Pandas DataFrames with timeseries indexes. -`pydatastream `__ +`Pydatastream `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PyDatastream is a Python interface to the `Refinitiv Datastream (DWS) `__ @@ -384,7 +384,7 @@ Pandas provides an interface for defining system. The following libraries implement that interface to provide types not found in NumPy or pandas, which work well with pandas' data containers. -`cyberpandas`_ +`Cyberpandas`_ ~~~~~~~~~~~~~~ Cyberpandas provides an extension type for storing arrays of IP Addresses. These @@ -411,4 +411,4 @@ Library Accessor Classes Description .. _pdvega: https://altair-viz.github.io/pdvega/ .. _Altair: https://altair-viz.github.io/ .. _pandas_path: https://github.com/drivendataorg/pandas-path/ -.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html \ No newline at end of file +.. _pathlib.Path: https://docs.python.org/3/library/pathlib.html diff --git a/doc/source/user_guide/computation.rst b/doc/source/user_guide/computation.rst index d371f6d5f273c..cf630a9671013 100644 --- a/doc/source/user_guide/computation.rst +++ b/doc/source/user_guide/computation.rst @@ -648,6 +648,24 @@ from present information back to past information. This allows the rolling windo Currently, this feature is only implemented for time-based windows. For fixed windows, the closed parameter cannot be set and the rolling window will always have both endpoints closed. +.. _stats.iter_rolling_window: + +Iteration over window: +~~~~~~~~~~~~~~~~~~~~~~ + +.. versionadded:: 1.1.0 + +``Rolling`` and ``Expanding`` objects now support iteration. Be noted that ``min_periods`` is ignored in iteration. + +.. ipython:: + + In [1]: df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + In [2]: for i in df.rolling(2): + ...: print(i) + ...: + + .. _stats.moments.ts-versus-resampling: Time-aware rolling vs. resampling diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 73892da2cbf71..3b60085e9fa66 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -235,6 +235,7 @@ Other enhancements :class:`~pandas.io.stata.StataWriter`, :class:`~pandas.io.stata.StataWriter117`, and :class:`~pandas.io.stata.StataWriterUTF8` (:issue:`26599`). - :meth:`HDFStore.put` now accepts `track_times` parameter. Parameter is passed to ``create_table`` method of ``PyTables`` (:issue:`32682`). +- Make :class:`pandas.core.window.Rolling` and :class:`pandas.core.window.Expanding` iterable(:issue:`11704`) .. --------------------------------------------------------------------------- @@ -852,7 +853,7 @@ Sparse ^^^^^^ - Creating a :class:`SparseArray` from timezone-aware dtype will issue a warning before dropping timezone information, instead of doing so silently (:issue:`32501`) - Bug in :meth:`arrays.SparseArray.from_spmatrix` wrongly read scipy sparse matrix (:issue:`31991`) -- +- Bug in :meth:`Series.sum` with ``SparseArray`` raises ``TypeError`` (:issue:`25777`) ExtensionArray ^^^^^^^^^^^^^^ diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd index 1b7efb8c5dfdf..c96661aaab443 100644 --- a/pandas/_libs/tslibs/frequencies.pxd +++ b/pandas/_libs/tslibs/frequencies.pxd @@ -1,7 +1,8 @@ +cdef dict attrname_to_abbrevs + cpdef str get_rule_month(object source, str default=*) cpdef get_freq_code(freqstr) -cpdef object get_freq(object freq) cpdef str get_base_alias(freqstr) cpdef int get_to_timestamp_base(int base) cpdef str get_freq_str(base, mult=*) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index d97a9fa0ba2fa..60f750da92091 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -124,8 +124,50 @@ _lite_rule_alias = { _dont_uppercase = {'MS', 'ms'} +# Map attribute-name resolutions to resolution abbreviations +_attrname_to_abbrevs = { + "year": "A", + "quarter": "Q", + "month": "M", + "day": "D", + "hour": "H", + "minute": "T", + "second": "S", + "millisecond": "L", + "microsecond": "U", + "nanosecond": "N", +} +cdef dict attrname_to_abbrevs = _attrname_to_abbrevs + + # ---------------------------------------------------------------------- +def get_freq_group(freq) -> int: + """ + Return frequency code group of given frequency str or offset. + + Examples + -------- + >>> get_freq_group('W-MON') + 4000 + + >>> get_freq_group('W-FRI') + 4000 + """ + if is_offset_object(freq): + freq = freq.rule_code + + if isinstance(freq, str): + freq = attrname_to_abbrevs.get(freq, freq) + base, mult = get_freq_code(freq) + freq = base + elif isinstance(freq, int): + pass + else: + raise ValueError('input must be str, offset or int') + return (freq // 1000) * 1000 + + cpdef get_freq_code(freqstr): """ Return freq str or tuple to freq code and stride (mult) @@ -306,25 +348,6 @@ cpdef int get_to_timestamp_base(int base): return base -cpdef object get_freq(object freq): - """ - Return frequency code of given frequency str. - If input is not string, return input as it is. - - Examples - -------- - >>> get_freq('A') - 1000 - - >>> get_freq('3A') - 1000 - """ - if isinstance(freq, str): - base, mult = get_freq_code(freq) - freq = base - return freq - - # ---------------------------------------------------------------------- # Frequency comparison diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index c936d42b34db5..038632e1575c3 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -53,8 +53,6 @@ cdef extern from "src/datetime/np_datetime.h": npy_datetimestruct *result) nogil -cdef int reverse_ops[6] - cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(npy_datetimestruct *dts) diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index 9a8a8fdae6d2f..5ac0e4fa44bee 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -68,15 +68,6 @@ cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: # ---------------------------------------------------------------------- # Comparison -cdef int reverse_ops[6] - -reverse_ops[Py_LT] = Py_GT -reverse_ops[Py_LE] = Py_GE -reverse_ops[Py_EQ] = Py_EQ -reverse_ops[Py_NE] = Py_NE -reverse_ops[Py_GT] = Py_LT -reverse_ops[Py_GE] = Py_LE - cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: """ diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index bdd9a7da8842d..c113897e4fe82 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -23,7 +23,7 @@ from pandas._libs.tslibs.util cimport is_integer_object, is_datetime64_object from pandas._libs.tslibs.base cimport ABCTick, ABCTimestamp, is_tick_object -from pandas._libs.tslibs.ccalendar import MONTHS, DAYS +from pandas._libs.tslibs.ccalendar import MONTHS, DAYS, weekday_to_int, int_to_weekday from pandas._libs.tslibs.ccalendar cimport get_days_in_month, dayofweek from pandas._libs.tslibs.conversion cimport ( convert_datetime_to_tsobject, @@ -852,10 +852,13 @@ cdef class _Tick(ABCTick): self.normalize = False -class BusinessMixin: +class BusinessMixin(BaseOffset): """ Mixin to business types to provide related functions. """ + def __init__(self, n=1, normalize=False, offset=timedelta(0)): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "_offset", offset) @property def offset(self): @@ -879,7 +882,11 @@ class BusinessMixin: class BusinessHourMixin(BusinessMixin): _adjust_dst = False - def __init__(self, start="09:00", end="17:00", offset=timedelta(0)): + def __init__( + self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) + ): + BusinessMixin.__init__(self, n, normalize, offset) + # must be validated here to equality check if np.ndim(start) == 0: # i.e. not is_list_like @@ -923,7 +930,6 @@ class BusinessHourMixin(BusinessMixin): object.__setattr__(self, "start", start) object.__setattr__(self, "end", end) - object.__setattr__(self, "_offset", offset) def _repr_attrs(self) -> str: out = super()._repr_attrs() @@ -986,10 +992,16 @@ class CustomMixin: object.__setattr__(self, "calendar", calendar) -class WeekOfMonthMixin: +class WeekOfMonthMixin(BaseOffset): """ Mixin for methods common to WeekOfMonth and LastWeekOfMonth. """ + def __init__(self, n=1, normalize=False, weekday=0): + BaseOffset.__init__(self, n, normalize) + object.__setattr__(self, "weekday", weekday) + + if weekday < 0 or weekday > 6: + raise ValueError(f"Day must be 0<=day<=6, got {weekday}") @apply_wraps def apply(self, other): @@ -1010,6 +1022,14 @@ class WeekOfMonthMixin: return False return dt.day == self._get_offset_day(dt) + @property + def rule_code(self) -> str: + weekday = int_to_weekday.get(self.weekday, "") + if self.week == -1: + # LastWeekOfMonth + return f"{self._prefix}-{weekday}" + return f"{self._prefix}-{self.week + 1}{weekday}" + # ---------------------------------------------------------------------- # RelativeDelta Arithmetic diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index c5be5b1d96469..8af467c3b0950 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -53,6 +53,7 @@ from pandas._libs.tslibs.ccalendar cimport ( ) from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS from pandas._libs.tslibs.frequencies cimport ( + attrname_to_abbrevs, get_base_alias, get_freq_code, get_freq_str, @@ -60,7 +61,6 @@ from pandas._libs.tslibs.frequencies cimport ( get_to_timestamp_base, ) from pandas._libs.tslibs.parsing import parse_time_string -from pandas._libs.tslibs.resolution import Resolution from pandas._libs.tslibs.nattype cimport ( _nat_scalar_rules, NPY_NAT, @@ -708,6 +708,8 @@ cdef char* c_strftime(npy_datetimestruct *dts, char *fmt): # Conversion between date_info and npy_datetimestruct cdef inline int get_freq_group(int freq) nogil: + # Note: this is equivalent to libfrequencies.get_freq_group, specialized + # to integer argument. return (freq // 1000) * 1000 @@ -2431,7 +2433,7 @@ class Period(_Period): if freq is None: try: - freq = Resolution.get_freq(reso) + freq = attrname_to_abbrevs[reso] except KeyError: raise ValueError(f"Invalid frequency or could not " f"infer: {reso}") diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 80a3466f5a1aa..3d76483f76600 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -1,11 +1,11 @@ import numpy as np from numpy cimport ndarray, int64_t, int32_t -from pandas._libs.tslibs.util cimport get_nat, is_offset_object +from pandas._libs.tslibs.util cimport get_nat from pandas._libs.tslibs.np_datetime cimport ( npy_datetimestruct, dt64_to_dtstruct) -from pandas._libs.tslibs.frequencies cimport get_freq_code +from pandas._libs.tslibs.frequencies cimport attrname_to_abbrevs from pandas._libs.tslibs.timezones cimport ( is_utc, is_tzlocal, maybe_get_tz, get_dst_info) from pandas._libs.tslibs.ccalendar cimport get_days_in_month @@ -25,6 +25,7 @@ cdef: int RESO_HR = 5 int RESO_DAY = 6 + # ---------------------------------------------------------------------- cpdef resolution(const int64_t[:] stamps, tz=None): @@ -106,31 +107,6 @@ cdef inline int _reso_stamp(npy_datetimestruct *dts): return RESO_DAY -def get_freq_group(freq) -> int: - """ - Return frequency code group of given frequency str or offset. - - Examples - -------- - >>> get_freq_group('W-MON') - 4000 - - >>> get_freq_group('W-FRI') - 4000 - """ - if is_offset_object(freq): - freq = freq.rule_code - - if isinstance(freq, str): - base, mult = get_freq_code(freq) - freq = base - elif isinstance(freq, int): - pass - else: - raise ValueError('input must be str, offset or int') - return (freq // 1000) * 1000 - - class Resolution: # Note: cython won't allow us to reference the cdef versions at the @@ -163,7 +139,7 @@ class Resolution: RESO_HR: 60, RESO_DAY: 24} - _reso_str_bump_map = { + reso_str_bump_map = { 'D': 'H', 'H': 'T', 'T': 'S', @@ -174,19 +150,7 @@ class Resolution: _str_reso_map = {v: k for k, v in _reso_str_map.items()} - _reso_freq_map = { - 'year': 'A', - 'quarter': 'Q', - 'month': 'M', - 'day': 'D', - 'hour': 'H', - 'minute': 'T', - 'second': 'S', - 'millisecond': 'L', - 'microsecond': 'U', - 'nanosecond': 'N'} - - _freq_reso_map = {v: k for k, v in _reso_freq_map.items()} + _freq_reso_map = {v: k for k, v in attrname_to_abbrevs.items()} @classmethod def get_str(cls, reso: int) -> str: @@ -215,30 +179,6 @@ class Resolution: """ return cls._str_reso_map.get(resostr, cls.RESO_DAY) - @classmethod - def get_freq_group(cls, resostr: str) -> int: - """ - Return frequency str against resolution str. - - Examples - -------- - >>> f.Resolution.get_freq_group('day') - 4000 - """ - return get_freq_group(cls.get_freq(resostr)) - - @classmethod - def get_freq(cls, resostr: str) -> str: - """ - Return frequency str against resolution str. - - Examples - -------- - >>> f.Resolution.get_freq('day') - 'D' - """ - return cls._reso_freq_map[resostr] - @classmethod def get_str_from_freq(cls, freq: str) -> str: """ @@ -303,7 +243,7 @@ class Resolution: ) next_value = cls._reso_mult_map[start_reso] * value - next_name = cls._reso_str_bump_map[freq] + next_name = cls.reso_str_bump_map[freq] return cls.get_stride_from_decimal(next_value, next_name) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 03419a6267983..7a3af169a960e 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -26,7 +26,7 @@ from pandas._libs.tslibs.base cimport ABCTimedelta, ABCTimestamp, is_tick_object from pandas._libs.tslibs.ccalendar cimport DAY_NANOS from pandas._libs.tslibs.np_datetime cimport ( - cmp_scalar, reverse_ops, td64_to_tdstruct, pandas_timedeltastruct) + cmp_scalar, td64_to_tdstruct, pandas_timedeltastruct) from pandas._libs.tslibs.nattype cimport ( checknull_with_nat, diff --git a/pandas/_libs/tslibs/timestamps.pxd b/pandas/_libs/tslibs/timestamps.pxd index 90f50e3af503c..88d21b19e1e37 100644 --- a/pandas/_libs/tslibs/timestamps.pxd +++ b/pandas/_libs/tslibs/timestamps.pxd @@ -15,7 +15,7 @@ cdef class _Timestamp(ABCTimestamp): cdef readonly: int64_t value, nanosecond object freq - list _date_attributes + cpdef bint _get_start_end_field(self, str field) cpdef _get_date_name_field(self, object field, object locale) cdef int64_t _maybe_convert_value_to_local(self) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 0982cf8e5da33..4f8b85240c79f 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -387,6 +387,10 @@ cdef class _Timestamp(ABCTimestamp): dtype=object, ) + elif not isinstance(self, _Timestamp): + # cython semantics, args have been switched and this is __radd__ + return other.__add__(self) + return NotImplemented def __sub__(self, other): @@ -1051,7 +1055,7 @@ timedelta}, default 'raise' return Period(self, freq=freq) @property - def dayofweek(self): + def dayofweek(self) -> int: """ Return day of the week. """ @@ -1092,7 +1096,7 @@ timedelta}, default 'raise' return self._get_date_name_field('month_name', locale) @property - def dayofyear(self): + def dayofyear(self) -> int: """ Return the day of the year. """ @@ -1115,7 +1119,7 @@ timedelta}, default 'raise' return ((self.month - 1) // 3) + 1 @property - def days_in_month(self): + def days_in_month(self) -> int: """ Return the number of days in the month. """ @@ -1428,16 +1432,7 @@ default 'raise' return base1 + base2 - def _has_time_component(self) -> bool: - """ - Returns if the Timestamp has a time component - in addition to the date part - """ - return (self.time() != _zero_time - or self.tzinfo is not None - or self.nanosecond != 0) - - def to_julian_date(self): + def to_julian_date(self) -> np.float64: """ Convert TimeStamp to a Julian Date. 0 Julian date is noon January 1, 4713 BC. @@ -1474,11 +1469,6 @@ default 'raise' np.array([self.value], dtype='i8'), tz=self.tz)[0] return Timestamp(normalized_value).tz_localize(self.tz) - def __radd__(self, other): - # __radd__ on cython extension types like _Timestamp is not used, so - # define it here instead - return self + other - # Add the min and max fields at the class level cdef int64_t _NS_UPPER_BOUND = np.iinfo(np.int64).max diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 737c130161246..01d36ae015926 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -58,12 +58,16 @@ from pandas.io.formats import console - +import csv def _cat_compare_op(op): opname = f"__{op.__name__}__" @unpack_zerodim_and_defer(opname) def func(self, other): + if is_list_like(other) and len(other) != len(self): + # TODO: Could this fail if the categories are listlike objects? + raise ValueError("Lengths must match.") + if not self.ordered: if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: raise TypeError( @@ -1869,11 +1873,11 @@ def _repr_categories(self): if len(self.categories) > max_categories: num = max_categories // 2 - head = fmt.format_array(self.categories[:num], None) - tail = fmt.format_array(self.categories[-num:], None) + head = fmt.format_array(self.categories[:num], None, quoting=csv.QUOTE_NONNUMERIC) + tail = fmt.format_array(self.categories[-num:], None, quoting=csv.QUOTE_NONNUMERIC) category_strs = head + ["..."] + tail else: - category_strs = fmt.format_array(self.categories, None) + category_strs = fmt.format_array(self.categories, None, quoting=csv.QUOTE_NONNUMERIC) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index bbaa64dae3eea..708b0ea4da96d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -6,6 +6,7 @@ import numpy as np from pandas._libs import NaT, NaTType, Period, Timestamp, algos, iNaT, lib +from pandas._libs.tslibs.resolution import Resolution from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.timestamps import ( RoundTo, @@ -84,6 +85,9 @@ def _validate_comparison_value(self, other): elif not is_list_like(other): raise InvalidComparison(other) + elif len(other) != len(self): + raise ValueError("Lengths must match") + else: try: other = self._validate_listlike(other, opname, allow_object=True) @@ -1091,14 +1095,14 @@ def inferred_freq(self): @property # NB: override with cache_readonly in immutable subclasses def _resolution(self): - return frequencies.Resolution.get_reso_from_freq(self.freqstr) + return Resolution.get_reso_from_freq(self.freqstr) @property # NB: override with cache_readonly in immutable subclasses def resolution(self) -> str: """ Returns day, hour, minute, second, millisecond or microsecond """ - return frequencies.Resolution.get_str(self._resolution) + return Resolution.get_str(self._resolution) @classmethod def _validate_frequency(cls, index, freq, **kwargs): @@ -1234,6 +1238,9 @@ def _add_timedelta_arraylike(self, other): """ # overridden by PeriodArray + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + if isinstance(other, np.ndarray): # ndarray[timedelta64]; wrap in TimedeltaIndex for op from pandas.core.arrays import TimedeltaArray diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 897c53c5c75d1..90088c370697e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -654,6 +654,9 @@ def _assert_tzawareness_compat(self, other): def _sub_datetime_arraylike(self, other): """subtract DatetimeArray/Index or ndarray[datetime64]""" + if len(self) != len(other): + raise ValueError("cannot add indices of unequal length") + if isinstance(other, np.ndarray): assert is_datetime64_dtype(other) other = type(self)(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 3ca7e028913c6..5a90ea4a36a21 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -517,6 +517,8 @@ def cmp_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) + if len(self) != len(other): + raise ValueError("Lengths must match to compare") if other is libmissing.NA: # numpy does not handle pd.NA well as "other" scalar (it returns @@ -620,6 +622,8 @@ def integer_arithmetic_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) + if len(self) != len(other): + raise ValueError("Lengths must match") if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError("can only perform ops with numeric values") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5c700a53ceac4..3978161829481 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -746,8 +746,7 @@ def _check_timedeltalike_freq_compat(self, other): IncompatibleFrequency """ assert isinstance(self.freq, Tick) # checked by calling function - own_offset = frequencies.to_offset(self.freq.rule_code) - base_nanos = delta_to_nanoseconds(own_offset) + base_nanos = self.freq.base.nanos if isinstance(other, (timedelta, np.timedelta64, Tick)): nanos = delta_to_nanoseconds(other) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 2720c831bcff6..3cfd92d778823 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -13,6 +13,7 @@ import pandas._libs.sparse as splib from pandas._libs.sparse import BlockIndex, IntIndex, SparseIndex from pandas._libs.tslibs import NaT +from pandas._typing import Scalar import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning @@ -46,6 +47,7 @@ from pandas.core.construction import extract_array, sanitize_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import interpolate_2d +from pandas.core.nanops import check_below_min_count import pandas.core.ops as ops from pandas.core.ops.common import unpack_zerodim_and_defer @@ -1220,21 +1222,36 @@ def any(self, axis=0, *args, **kwargs): return values.any().item() - def sum(self, axis=0, *args, **kwargs): + def sum(self, axis: int = 0, min_count: int = 0, *args, **kwargs) -> Scalar: """ Sum of non-NA/null values + Parameters + ---------- + axis : int, default 0 + Not Used. NumPy compatibility. + min_count : int, default 0 + The required number of valid values to perform the summation. If fewer + than ``min_count`` valid values are present, the result will be the missing + value indicator for subarray type. + *args, **kwargs + Not Used. NumPy compatibility. + Returns ------- - sum : float + scalar """ nv.validate_sum(args, kwargs) valid_vals = self._valid_sp_values sp_sum = valid_vals.sum() if self._null_fill_value: + if check_below_min_count(valid_vals.shape, None, min_count): + return na_value_for_dtype(self.dtype.subtype, compat=False) return sp_sum else: nsparse = self.sp_index.ngaps + if check_below_min_count(valid_vals.shape, None, min_count - nsparse): + return na_value_for_dtype(self.dtype.subtype, compat=False) return sp_sum + self.fill_value * nsparse def cumsum(self, axis=0, *args, **kwargs): diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4b84b3ea8b46a..bc215eec4c345 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -471,6 +471,10 @@ def __mul__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) + if len(other) != len(self) and not is_timedelta64_dtype(other.dtype): + # Exclude timedelta64 here so we correctly raise TypeError + # for that instead of ValueError + raise ValueError("Cannot multiply with unequal lengths") if is_object_dtype(other.dtype): # this multiplication will succeed only if all elements of other @@ -514,7 +518,10 @@ def __truediv__(self, other): # e.g. list, tuple other = np.array(other) - if is_timedelta64_dtype(other.dtype): + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): # let numpy handle it return self._data / other @@ -564,7 +571,10 @@ def __rtruediv__(self, other): # e.g. list, tuple other = np.array(other) - if is_timedelta64_dtype(other.dtype): + if len(other) != len(self): + raise ValueError("Cannot divide vectors with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): # let numpy handle it return other / self._data @@ -613,8 +623,10 @@ def __floordiv__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") - if is_timedelta64_dtype(other.dtype): + elif is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -666,7 +678,10 @@ def __rfloordiv__(self, other): # list, tuple other = np.array(other) - if is_timedelta64_dtype(other.dtype): + if len(other) != len(self): + raise ValueError("Cannot divide with unequal lengths") + + elif is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 443206754ba69..75188ad5b00eb 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -520,7 +520,9 @@ def na_value_for_dtype(dtype, compat: bool = True): return 0 return np.nan elif is_bool_dtype(dtype): - return False + if compat: + return False + return np.nan return np.nan diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d9b65f92ac0e1..55b9c28c74cb2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -36,7 +36,6 @@ class providing the base-class of operations. from pandas._libs import Timestamp import pandas._libs.groupby as libgroupby from pandas._typing import FrameOrSeries, Scalar -from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly, doc @@ -192,6 +191,24 @@ class providing the base-class of operations. """, ) +_groupby_agg_method_template = """ +Compute {fname} of group values. + +Parameters +---------- +numeric_only : bool, default {no} + Include only float, int, boolean columns. If None, will attempt to use + everything, then use only numeric data. +min_count : int, default {mc} + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + +Returns +------- +Series or DataFrame + Computed {fname} of values within each group. +""" + _pipe_template = """ Apply a function `func` with arguments to this %(klass)s object and return the function's result. @@ -945,6 +962,37 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]): def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False): raise AbstractMethodError(self) + def _agg_general( + self, + numeric_only: bool = True, + min_count: int = -1, + *, + alias: str, + npfunc: Callable, + ): + self._set_group_selection() + + # try a cython aggregation if we can + try: + return self._cython_agg_general( + how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count, + ) + except DataError: + pass + except NotImplementedError as err: + if "function is not implemented for this dtype" in str( + err + ) or "category dtype not supported" in str(err): + # raised in _get_cython_function, in some cases can + # be trimmed by implementing cython funcs for more dtypes + pass + else: + raise + + # apply a non-cython aggregation + result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) + return result + def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ): @@ -1438,74 +1486,36 @@ def size(self): result = self._obj_1d_constructor(result) return self._reindex_output(result, fill_value=0) - @classmethod - def _add_numeric_operations(cls): - """ - Add numeric operations to the GroupBy generically. - """ + @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) + def sum(self, numeric_only: bool = True, min_count: int = 0): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="add", npfunc=np.sum + ) - def groupby_function( - name: str, - alias: str, - npfunc, - numeric_only: bool = True, - min_count: int = -1, - ): + @doc(_groupby_agg_method_template, fname="prod", no=True, mc=0) + def prod(self, numeric_only: bool = True, min_count: int = 0): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod + ) - _local_template = """ - Compute %(f)s of group values. - - Parameters - ---------- - numeric_only : bool, default %(no)s - Include only float, int, boolean columns. If None, will attempt to use - everything, then use only numeric data. - min_count : int, default %(mc)s - The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. - - Returns - ------- - Series or DataFrame - Computed %(f)s of values within each group. - """ - - @Substitution(name="groupby", f=name, no=numeric_only, mc=min_count) - @Appender(_common_see_also) - @Appender(_local_template) - def func(self, numeric_only=numeric_only, min_count=min_count): - self._set_group_selection() - - # try a cython aggregation if we can - try: - return self._cython_agg_general( - how=alias, - alt=npfunc, - numeric_only=numeric_only, - min_count=min_count, - ) - except DataError: - pass - except NotImplementedError as err: - if "function is not implemented for this dtype" in str( - err - ) or "category dtype not supported" in str(err): - # raised in _get_cython_function, in some cases can - # be trimmed by implementing cython funcs for more dtypes - pass - else: - raise - - # apply a non-cython aggregation - result = self.aggregate(lambda x: npfunc(x, axis=self.axis)) - return result - - set_function_name(func, name, cls) - - return func + @doc(_groupby_agg_method_template, fname="min", no=False, mc=-1) + def min(self, numeric_only: bool = False, min_count: int = -1): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min + ) + @doc(_groupby_agg_method_template, fname="max", no=False, mc=-1) + def max(self, numeric_only: bool = False, min_count: int = -1): + return self._agg_general( + numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max + ) + + @doc(_groupby_agg_method_template, fname="first", no=False, mc=-1) + def first(self, numeric_only: bool = False, min_count: int = -1): def first_compat(obj: FrameOrSeries, axis: int = 0): def first(x: Series): + """Helper function for first item that isn't NA. + """ x = x.array[notna(x.array)] if len(x) == 0: return np.nan @@ -1518,8 +1528,19 @@ def first(x: Series): else: raise TypeError(type(obj)) + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="first", + npfunc=first_compat, + ) + + @doc(_groupby_agg_method_template, fname="last", no=False, mc=-1) + def last(self, numeric_only: bool = False, min_count: int = -1): def last_compat(obj: FrameOrSeries, axis: int = 0): def last(x: Series): + """Helper function for last item that isn't NA. + """ x = x.array[notna(x.array)] if len(x) == 0: return np.nan @@ -1532,12 +1553,12 @@ def last(x: Series): else: raise TypeError(type(obj)) - cls.sum = groupby_function("sum", "add", np.sum, min_count=0) - cls.prod = groupby_function("prod", "prod", np.prod, min_count=0) - cls.min = groupby_function("min", "min", np.min, numeric_only=False) - cls.max = groupby_function("max", "max", np.max, numeric_only=False) - cls.first = groupby_function("first", "first", first_compat, numeric_only=False) - cls.last = groupby_function("last", "last", last_compat, numeric_only=False) + return self._agg_general( + numeric_only=numeric_only, + min_count=min_count, + alias="last", + npfunc=last_compat, + ) @Substitution(name="groupby") @Appender(_common_see_also) @@ -2637,9 +2658,6 @@ def _reindex_output( return output.reset_index(drop=True) -GroupBy._add_numeric_operations() - - @doc(GroupBy) def get_groupby( obj: NDFrame, diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 597a160995eef..74db87f46c5e2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -952,7 +952,9 @@ def _chop(self, sdata, slice_obj: slice) -> NDFrame: class SeriesSplitter(DataSplitter): def _chop(self, sdata: Series, slice_obj: slice) -> Series: - return sdata.iloc[slice_obj] + # fastpath equivalent to `sdata.iloc[slice_obj]` + mgr = sdata._mgr.get_slice(slice_obj) + return type(sdata)(mgr, name=sdata.name, fastpath=True) class FrameSplitter(DataSplitter): @@ -962,10 +964,13 @@ def fast_apply(self, f: F, sdata: FrameOrSeries, names): return libreduction.apply_frame_axis0(sdata, f, names, starts, ends) def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: - if self.axis == 0: - return sdata.iloc[slice_obj] - else: - return sdata.iloc[:, slice_obj] + # Fastpath equivalent to: + # if self.axis == 0: + # return sdata.iloc[slice_obj] + # else: + # return sdata.iloc[:, slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + return type(sdata)(mgr) def get_splitter( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b8a9827b5effd..d9828707b6164 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -54,7 +54,6 @@ ABCCategorical, ABCDataFrame, ABCDatetimeIndex, - ABCIntervalIndex, ABCMultiIndex, ABCPandasArray, ABCPeriodIndex, @@ -75,7 +74,6 @@ from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name -from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods @@ -109,8 +107,10 @@ def _make_comparison_op(op, cls): - @unpack_zerodim_and_defer(op.__name__) def cmp_method(self, other): + if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError("Lengths must match to compare") if is_object_dtype(self.dtype) and isinstance(other, ABCCategorical): left = type(other)(self._values, dtype=other.dtype) @@ -4101,37 +4101,13 @@ def append(self, other): return self._concat(to_concat, name) def _concat(self, to_concat, name): - - typs = _concat.get_dtype_kinds(to_concat) - - if len(typs) == 1: - return self._concat_same_dtype(to_concat, name=name) - return Index._concat_same_dtype(self, to_concat, name=name) - - def _concat_same_dtype(self, to_concat, name): """ - Concatenate to_concat which has the same class. + Concatenate multiple Index objects. """ - # must be overridden in specific classes - klasses = ( - ABCDatetimeIndex, - ABCTimedeltaIndex, - ABCPeriodIndex, - ExtensionArray, - ABCIntervalIndex, - ) - to_concat = [ - x.astype(object) if isinstance(x, klasses) else x for x in to_concat - ] - - self = to_concat[0] - attribs = self._get_attributes_dict() - attribs["name"] = name - to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] - res_values = np.concatenate(to_concat) - return Index(res_values, name=name) + result = _concat.concat_compat(to_concat) + return Index(result, name=name) def putmask(self, mask, value): """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 25df4a0bee737..2a79c83de7ef2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -738,13 +738,6 @@ def insert(self, loc: int, item): def _concat(self, to_concat, name): # if calling index is category, don't check dtype of others - return CategoricalIndex._concat_same_dtype(self, to_concat, name) - - def _concat_same_dtype(self, to_concat, name): - """ - Concatenate to_concat which has the same class - ValueError if other is not in the categories - """ codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat]) result = self._create_from_codes(codes, name=name) # if name is None, _create_from_codes sets self.name diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a12d5b64bb06c..2a7cd0eac04a6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -8,6 +8,7 @@ from pandas._libs import NaT, Timedelta, iNaT, join as libjoin, lib from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.parsing import DateParseError from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -41,7 +42,6 @@ from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name from pandas.core.sorting import ensure_key_mapped -from pandas.core.tools.datetimes import DateParseError from pandas.core.tools.timedeltas import to_timedelta from pandas.tseries.offsets import DateOffset, Tick diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 5a89c45a3e425..6f1614d050cad 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -5,8 +5,9 @@ import numpy as np -from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts -from pandas._libs.tslibs import fields, parsing, timezones +from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib +from pandas._libs.tslibs import fields, parsing, resolution as libresolution, timezones +from pandas._libs.tslibs.frequencies import get_freq_group from pandas._typing import DtypeObj, Label from pandas.util._decorators import cache_readonly @@ -26,9 +27,9 @@ from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.indexes.extension import inherit_names -import pandas.core.tools.datetimes as tools +from pandas.core.tools.times import to_time -from pandas.tseries.frequencies import Resolution, to_offset +from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import prefix_mapping @@ -323,7 +324,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: def _mpl_repr(self): # how to represent ourselves to matplotlib - return libts.ints_to_pydatetime(self.asi8, self.tz) + return tslib.ints_to_pydatetime(self.asi8, self.tz) @property def _formatter_func(self): @@ -500,7 +501,7 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): if reso not in valid_resos: raise KeyError - grp = Resolution.get_freq_group(reso) + grp = get_freq_group(reso) per = Period(parsed, freq=(grp, 1)) start, end = per.start_time, per.end_time @@ -525,7 +526,7 @@ def _validate_partial_date_slice(self, reso: str): if ( self.is_monotonic and reso in ["day", "hour", "minute", "second"] - and self._resolution >= Resolution.get_reso(reso) + and self._resolution >= libresolution.Resolution.get_reso(reso) ): # These resolution/monotonicity validations came from GH3931, # GH3452 and GH2369. @@ -777,8 +778,8 @@ def indexer_between_time( indexer_at_time : Get index locations of values at particular time of day. DataFrame.between_time : Select values between particular times of day. """ - start_time = tools.to_time(start_time) - end_time = tools.to_time(end_time) + start_time = to_time(start_time) + end_time = to_time(end_time) time_micros = self._get_time_micros() start_micros = _time_to_micros(start_time) end_micros = _time_to_micros(end_time) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 664e49313507f..badf6502aa723 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -236,10 +236,6 @@ def insert(self, loc: int, item): # ExtensionIndex subclasses must override Index.insert raise AbstractMethodError(self) - def _concat_same_dtype(self, to_concat, name): - arr = type(self._data)._concat_same_type(to_concat) - return type(self)._simple_new(arr, name=name) - def _get_unique_index(self, dropna=False): if self.is_unique and not dropna: return self diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 06040166d0f9e..5020a25c88ff4 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -147,10 +147,6 @@ def _assert_safe_casting(cls, data, subarr): """ pass - def _concat_same_dtype(self, indexes, name): - result = type(indexes[0])(np.concatenate([x._values for x in indexes])) - return result.rename(name) - @property def is_all_dates(self) -> bool: """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b0b85f69396ba..14922000c9707 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -5,8 +5,9 @@ from pandas._libs import index as libindex from pandas._libs.lib import no_default -from pandas._libs.tslibs import Period, frequencies as libfrequencies, resolution -from pandas._libs.tslibs.parsing import parse_time_string +from pandas._libs.tslibs import Period +from pandas._libs.tslibs.frequencies import get_freq_group +from pandas._libs.tslibs.parsing import DateParseError, parse_time_string from pandas._typing import DtypeObj, Label from pandas.util._decorators import Appender, cache_readonly, doc @@ -42,9 +43,7 @@ from pandas.core.indexes.extension import inherit_names from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name -from pandas.core.tools.datetimes import DateParseError -from pandas.tseries import frequencies from pandas.tseries.offsets import DateOffset, Tick _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -278,15 +277,12 @@ def _maybe_convert_timedelta(self, other): of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): - offset = frequencies.to_offset(self.freq.rule_code) - if isinstance(offset, Tick): + if isinstance(self.freq, Tick): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta elif isinstance(other, DateOffset): - freqstr = other.rule_code - base = libfrequencies.get_base_alias(freqstr) - if base == self.freq.rule_code: + if other.base == self.freq.base: return other.n raise raise_on_incompatible(self, other) @@ -505,8 +501,8 @@ def get_loc(self, key, method=None, tolerance=None): # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") from err - grp = resolution.Resolution.get_freq_group(reso) - freqn = resolution.get_freq_group(self.freq) + grp = get_freq_group(reso) + freqn = get_freq_group(self.freq) # _get_string_slice will handle cases where grp < freqn assert grp >= freqn @@ -577,13 +573,13 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime): if reso not in ["year", "month", "quarter", "day", "hour", "minute", "second"]: raise KeyError(reso) - grp = resolution.Resolution.get_freq_group(reso) + grp = get_freq_group(reso) iv = Period(parsed, freq=(grp, 1)) return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end")) def _validate_partial_date_slice(self, reso: str): - grp = resolution.Resolution.get_freq_group(reso) - freqn = resolution.get_freq_group(self.freq) + grp = get_freq_group(reso) + freqn = get_freq_group(self.freq) if not grp < freqn: # TODO: we used to also check for diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c34b8965ca36a..49a0f0fb7ae92 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -627,14 +627,18 @@ def join(self, other, how="left", level=None, return_indexers=False, sort=False) return super().join(other, how, level, return_indexers, sort) - def _concat_same_dtype(self, indexes, name): + def _concat(self, indexes, name): """ - Concatenates multiple RangeIndex instances. All members of "indexes" must - be of type RangeIndex; result will be RangeIndex if possible, Int64Index - otherwise. E.g.: + Overriding parent method for the case of all RangeIndex instances. + + When all members of "indexes" are of type RangeIndex: result will be + RangeIndex if possible, Int64Index otherwise. E.g.: indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) """ + if not all(isinstance(x, RangeIndex) for x in indexes): + return super()._concat(indexes, name) + start = step = next_ = None # Filter the empty indexes diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3e2b5bdccd5d1..c052c6c9d7d1d 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -251,7 +251,7 @@ def make_block_same_class(self, values, placement=None, ndim=None): placement = self.mgr_locs if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, klass=type(self)) + return type(self)(values, placement=placement, ndim=ndim) def __repr__(self) -> str: # don't want to print out all of the items here diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0a9d6f2172ff8..6b8518d8a47a0 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1283,7 +1283,7 @@ def _maybe_null_out( def check_below_min_count( shape: Tuple[int, ...], mask: Optional[np.ndarray], min_count: int -): +) -> bool: """ Check for the `min_count` keyword. Returns True if below `min_count` (when missing value should be returned from the reduction). diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 1fb9398083884..515a0a5198d74 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -1,13 +1,10 @@ """ Boilerplate functions used in defining binary operations. """ -from collections import UserDict from functools import wraps from typing import Callable -import numpy as np - -from pandas._libs.lib import is_list_like, item_from_zerodim +from pandas._libs.lib import item_from_zerodim from pandas._typing import F from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries @@ -65,25 +62,6 @@ def new_method(self, other): other = item_from_zerodim(other) - if isinstance(self, (ABCSeries, ABCDataFrame)) and isinstance( - other, (ABCSeries, ABCDataFrame) - ): - # we dont require length matches - pass - elif is_list_like(other, allow_sets=False) and not isinstance( - other, (dict, UserDict) - ): - if len(other) != len(self): - if len(other) == 1 and not hasattr(other, "dtype"): - # i.e. unpack scalar list, but leave e.g. Categorical, - # for which the scalar behavior doesnt match the - # array behavior - other = other[0] - else: - raise ValueError( - "Lengths must match", self.shape, np.shape(other), type(other) - ) - return method(self, other) return new_method diff --git a/pandas/core/strings.py b/pandas/core/strings.py index bb62cd6b34722..b27ad744dbdba 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2975,7 +2975,7 @@ def encode(self, encoding, errors="strict"): _shared_docs[ "str_strip" ] = r""" - Remove leading and trailing characters. + Remove %(position)s characters. Strip whitespaces (including newlines) or a set of specified characters from each string in the Series/Index from %(side)s. @@ -3038,20 +3038,29 @@ def encode(self, encoding, errors="strict"): """ @Appender( - _shared_docs["str_strip"] % dict(side="left and right sides", method="strip") + _shared_docs["str_strip"] + % dict( + side="left and right sides", method="strip", position="leading and trailing" + ) ) @forbid_nonstring_types(["bytes"]) def strip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="both") return self._wrap_result(result) - @Appender(_shared_docs["str_strip"] % dict(side="left side", method="lstrip")) + @Appender( + _shared_docs["str_strip"] + % dict(side="left side", method="lstrip", position="leading") + ) @forbid_nonstring_types(["bytes"]) def lstrip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="left") return self._wrap_result(result) - @Appender(_shared_docs["str_strip"] % dict(side="right side", method="rstrip")) + @Appender( + _shared_docs["str_strip"] + % dict(side="right side", method="rstrip", position="trailing") + ) @forbid_nonstring_types(["bytes"]) def rstrip(self, to_strip=None): result = str_strip(self._parent, to_strip, side="right") diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 829dd7f7b94c8..42bffa0374472 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,8 +1,9 @@ from collections import abc -from datetime import datetime, time +from datetime import datetime from functools import partial from itertools import islice -from typing import List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Optional, TypeVar, Union +import warnings import numpy as np @@ -28,28 +29,31 @@ is_numeric_dtype, is_scalar, ) -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCDatetimeIndex, - ABCIndex, - ABCIndexClass, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.dtypes.missing import notna from pandas.arrays import DatetimeArray, IntegerArray from pandas.core import algorithms from pandas.core.algorithms import unique -from pandas.core.arrays.datetimes import tz_to_dtype +from pandas.core.arrays.datetimes import ( + maybe_convert_dtype, + objects_to_datetime64ns, + tz_to_dtype, +) +from pandas.core.indexes.base import Index +from pandas.core.indexes.datetimes import DatetimeIndex + +if TYPE_CHECKING: + from pandas import Series # noqa:F401 # --------------------------------------------------------------------- # types used in annotations -ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries] +ArrayConvertible = Union[list, tuple, ArrayLike, "Series"] Scalar = Union[int, float, str] DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) DatetimeScalarOrArrayConvertible = Union[ - DatetimeScalar, list, tuple, ArrayLike, ABCSeries + DatetimeScalar, list, tuple, ArrayLike, "Series" ] @@ -156,7 +160,7 @@ def _maybe_cache(arg, format, cache, convert_listlike): def _box_as_indexlike( dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None -) -> Union[ABCIndex, ABCDatetimeIndex]: +) -> Index: """ Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead @@ -176,7 +180,6 @@ def _box_as_indexlike( - DatetimeIndex if convertible to sole datetime64 type - general Index otherwise """ - from pandas import DatetimeIndex, Index if is_datetime64_dtype(dt_array): tz = "utc" if utc else None @@ -186,9 +189,9 @@ def _box_as_indexlike( def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, - cache_array: ABCSeries, + cache_array: "Series", name: Optional[str] = None, -) -> ABCIndexClass: +) -> "Index": """ Convert array of dates with a cache and wrap the result in an Index. @@ -235,7 +238,6 @@ def _return_parsed_timezone_results(result, timezones, tz, name): if tz is not None: # Convert to the same tz tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results]) - from pandas import Index return Index(tz_results, name=name) @@ -281,11 +283,6 @@ def _convert_listlike_datetimes( ------- Index-like of parsed dates """ - from pandas import DatetimeIndex - from pandas.core.arrays.datetimes import ( - maybe_convert_dtype, - objects_to_datetime64ns, - ) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") @@ -332,7 +329,6 @@ def _convert_listlike_datetimes( ) if errors == "ignore": - from pandas import Index result = Index(result, name=name) else: @@ -366,8 +362,6 @@ def _convert_listlike_datetimes( result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(result, name=name) elif errors == "ignore": - from pandas import Index - result = Index(arg, name=name) return result raise @@ -539,9 +533,7 @@ def _adjust_to_origin(arg, origin, unit): offset = offset // tslibs.Timedelta(1, unit=unit) # scalars & ndarray-like can handle the addition - if is_list_like(arg) and not isinstance( - arg, (ABCSeries, ABCIndexClass, np.ndarray) - ): + if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): arg = np.asarray(arg) arg = arg + offset return arg @@ -749,7 +741,7 @@ def to_datetime( result = arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): result = _assemble_from_unit_mappings(arg, errors, tz) - elif isinstance(arg, ABCIndexClass): + elif isinstance(arg, Index): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, name=arg.name) @@ -944,131 +936,14 @@ def calc_with_mask(carg, mask): return None -# Fixed time formats for time parsing -_time_formats = [ - "%H:%M", - "%H%M", - "%I:%M%p", - "%I%M%p", - "%H:%M:%S", - "%H%M%S", - "%I:%M:%S%p", - "%I%M%S%p", -] - - -def _guess_time_format_for_array(arr): - # Try to guess the format based on the first non-NaN element - non_nan_elements = notna(arr).nonzero()[0] - if len(non_nan_elements): - element = arr[non_nan_elements[0]] - for time_format in _time_formats: - try: - datetime.strptime(element, time_format) - return time_format - except ValueError: - pass - - return None - - def to_time(arg, format=None, infer_time_format=False, errors="raise"): - """ - Parse time strings to time objects using fixed strptime formats ("%H:%M", - "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", - "%I%M%S%p") - - Use infer_time_format if all the strings are in the same format to speed - up conversion. - - Parameters - ---------- - arg : string in time format, datetime.time, list, tuple, 1-d array, Series - format : str, default None - Format used to convert arg into a time object. If None, fixed formats - are used. - infer_time_format: bool, default False - Infer the time format based on the first non-NaN element. If all - strings are in the same format, this will speed up conversion. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception - - If 'coerce', then invalid parsing will be set as None - - If 'ignore', then invalid parsing will return the input - - Returns - ------- - datetime.time - """ - - def _convert_listlike(arg, format): - - if isinstance(arg, (list, tuple)): - arg = np.array(arg, dtype="O") - - elif getattr(arg, "ndim", 1) > 1: - raise TypeError( - "arg must be a string, datetime, list, tuple, 1-d array, or Series" - ) - - arg = ensure_object(arg) - - if infer_time_format and format is None: - format = _guess_time_format_for_array(arg) - - times: List[Optional[time]] = [] - if format is not None: - for element in arg: - try: - times.append(datetime.strptime(element, format).time()) - except (ValueError, TypeError) as err: - if errors == "raise": - msg = ( - f"Cannot convert {element} to a time with given " - f"format {format}" - ) - raise ValueError(msg) from err - elif errors == "ignore": - return arg - else: - times.append(None) - else: - formats = _time_formats[:] - format_found = False - for element in arg: - time_object = None - for time_format in formats: - try: - time_object = datetime.strptime(element, time_format).time() - if not format_found: - # Put the found format in front - fmt = formats.pop(formats.index(time_format)) - formats.insert(0, fmt) - format_found = True - break - except (ValueError, TypeError): - continue - - if time_object is not None: - times.append(time_object) - elif errors == "raise": - raise ValueError(f"Cannot convert arg {arg} to a time") - elif errors == "ignore": - return arg - else: - times.append(None) - - return times - - if arg is None: - return arg - elif isinstance(arg, time): - return arg - elif isinstance(arg, ABCSeries): - values = _convert_listlike(arg._values, format) - return arg._constructor(values, index=arg.index, name=arg.name) - elif isinstance(arg, ABCIndexClass): - return _convert_listlike(arg, format) - elif is_list_like(arg): - return _convert_listlike(arg, format) + # GH#34145 + warnings.warn( + "`to_time` has been moved, should be imported from pandas.core.tools.times. " + "This alias will be removed in a future version.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.tools.times import to_time - return _convert_listlike(np.array([arg]), format)[0] + return to_time(arg, format, infer_time_format, errors) diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py new file mode 100644 index 0000000000000..3bac4cf0edb63 --- /dev/null +++ b/pandas/core/tools/times.py @@ -0,0 +1,141 @@ +from datetime import datetime, time +from typing import List, Optional + +import numpy as np + +from pandas._libs.lib import is_list_like + +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.missing import notna + +from pandas.core.indexes.base import Index + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + """ + Parse time strings to time objects using fixed strptime formats ("%H:%M", + "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p") + + Use infer_time_format if all the strings are in the same format to speed + up conversion. + + Parameters + ---------- + arg : string in time format, datetime.time, list, tuple, 1-d array, Series + format : str, default None + Format used to convert arg into a time object. If None, fixed formats + are used. + infer_time_format: bool, default False + Infer the time format based on the first non-NaN element. If all + strings are in the same format, this will speed up conversion. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as None + - If 'ignore', then invalid parsing will return the input + + Returns + ------- + datetime.time + """ + + def _convert_listlike(arg, format): + + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + arg = np.asarray(arg, dtype="O") + + if infer_time_format and format is None: + format = _guess_time_format_for_array(arg) + + times: List[Optional[time]] = [] + if format is not None: + for element in arg: + try: + times.append(datetime.strptime(element, format).time()) + except (ValueError, TypeError) as err: + if errors == "raise": + msg = ( + f"Cannot convert {element} to a time with given " + f"format {format}" + ) + raise ValueError(msg) from err + elif errors == "ignore": + return arg + else: + times.append(None) + else: + formats = _time_formats[:] + format_found = False + for element in arg: + time_object = None + for time_format in formats: + try: + time_object = datetime.strptime(element, time_format).time() + if not format_found: + # Put the found format in front + fmt = formats.pop(formats.index(time_format)) + formats.insert(0, fmt) + format_found = True + break + except (ValueError, TypeError): + continue + + if time_object is not None: + times.append(time_object) + elif errors == "raise": + raise ValueError(f"Cannot convert arg {arg} to a time") + elif errors == "ignore": + return arg + else: + times.append(None) + + return times + + if arg is None: + return arg + elif isinstance(arg, time): + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, format) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, Index): + return _convert_listlike(arg, format) + elif is_list_like(arg): + return _convert_listlike(arg, format) + + return _convert_listlike(np.array([arg]), format)[0] + + +# Fixed time formats for time parsing +_time_formats = [ + "%H:%M", + "%H%M", + "%I:%M%p", + "%I%M%p", + "%H:%M:%S", + "%H%M%S", + "%I:%M:%S%p", + "%I%M%S%p", +] + + +def _guess_time_format_for_array(arr): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + element = arr[non_nan_elements[0]] + for time_format in _time_formats: + try: + datetime.strptime(element, time_format) + return time_format + except ValueError: + pass + + return None diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 660fca61fd21c..c615e18af68e6 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -247,8 +247,22 @@ def __repr__(self) -> str: return f"{self._window_type} [{attrs}]" def __iter__(self): - url = "https://github.com/pandas-dev/pandas/issues/11704" - raise NotImplementedError(f"See issue #11704 {url}") + window = self._get_window(win_type=None) + blocks, obj = self._create_blocks() + index = self._get_window_indexer(window=window) + + start, end = index.get_window_bounds( + num_values=len(obj), + min_periods=self.min_periods, + center=self.center, + closed=self.closed, + ) + # From get_window_bounds, those two should be equal in length of array + assert len(start) == len(end) + + for s, e in zip(start, end): + result = obj.iloc[slice(s, e)] + yield result def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: """Convert input to numpy arrays for Cython routines""" diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 68eeda431b9b4..54e4f6079827e 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -191,11 +191,13 @@ def _get_footer(self) -> str: return str(footer) def _get_formatted_values(self) -> List[str]: + breakpoint() return format_array( self.categorical._internal_get_values(), None, float_format=None, na_rep=self.na_rep, + quoting=2 ) def to_string(self) -> str: @@ -1086,6 +1088,7 @@ def format_array( justify: str = "right", decimal: str = ".", leading_space: Optional[bool] = None, + quoting=None ) -> List[str]: """ Format an array for printing. @@ -1148,11 +1151,11 @@ def format_array( justify=justify, decimal=decimal, leading_space=leading_space, + quoting=quoting ) - return fmt_obj.get_result() - +import csv class GenericArrayFormatter: def __init__( self, @@ -1237,8 +1240,6 @@ def _format(x): fmt_values.append(f" {_format(v)}") elif is_float_type[i]: fmt_values.append(float_format(v)) - elif isinstance(v, str): - fmt_values.append(f"'{v}'") else: if leading_space is False: # False specifically, so that the default is diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index c6d159d3d016b..1358ddf7005a3 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -11,8 +11,7 @@ import numpy as np from pandas._libs import lib, tslibs -from pandas._libs.tslibs import resolution -from pandas._libs.tslibs.frequencies import FreqGroup, get_freq +from pandas._libs.tslibs.frequencies import FreqGroup, get_freq_code, get_freq_group from pandas.core.dtypes.common import ( is_datetime64_ns_dtype, @@ -550,7 +549,7 @@ def _daily_finder(vmin, vmax, freq): elif freq == FreqGroup.FR_DAY: periodsperyear = 365 periodspermonth = 28 - elif resolution.get_freq_group(freq) == FreqGroup.FR_WK: + elif get_freq_group(freq) == FreqGroup.FR_WK: periodsperyear = 52 periodspermonth = 3 else: # pragma: no cover @@ -887,8 +886,8 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): if isinstance(freq, str): - freq = get_freq(freq) - fgroup = resolution.get_freq_group(freq) + freq = get_freq_code(freq)[0] + fgroup = get_freq_group(freq) if fgroup == FreqGroup.FR_ANN: return _annual_finder @@ -932,7 +931,7 @@ def __init__( plot_obj=None, ): if isinstance(freq, str): - freq = get_freq(freq) + freq = get_freq_code(freq)[0] self.freq = freq self.base = base (self.quarter, self.month, self.day) = (quarter, month, day) @@ -1011,7 +1010,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): if isinstance(freq, str): - freq = get_freq(freq) + freq = get_freq_code(freq)[0] self.format = None self.freq = freq self.locs = [] diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 3abce690cbe6b..f6e120e2f91e7 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -7,7 +7,7 @@ from pandas._libs.tslibs.frequencies import ( FreqGroup, get_base_alias, - get_freq, + get_freq_code, is_subperiod, is_superperiod, ) @@ -209,9 +209,9 @@ def _use_dynamic_x(ax, data): if freq is None: return False - # hack this for 0.10.1, creating more technical debt...sigh + # FIXME: hack this for 0.10.1, creating more technical debt...sigh if isinstance(data.index, ABCDatetimeIndex): - base = get_freq(freq) + base = get_freq_code(freq)[0] x = data.index if base <= FreqGroup.FR_DAY: return x[:1].is_normalized diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 0fb3cb1025639..8c480faa4ee81 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2206,7 +2206,7 @@ def test_sub_dti_dti(self): # different length raises ValueError dti1 = date_range("20130101", periods=3) dti2 = date_range("20130101", periods=4) - msg = "Lengths must match" + msg = "cannot add indices of unequal length" with pytest.raises(ValueError, match=msg): dti1 - dti2 diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index b6456a2141c06..a37339c66bf6e 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -649,7 +649,7 @@ def test_mul_datelike_raises(self, numeric_idx): def test_mul_size_mismatch_raises(self, numeric_idx): idx = numeric_idx - msg = "Lengths must match" + msg = "operands could not be broadcast together" with pytest.raises(ValueError, match=msg): idx * idx[0:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 180364420b021..65e3c6a07d4f3 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -451,7 +451,7 @@ def test_addition_ops(self): tm.assert_index_equal(result, expected) # unequal length - msg = "Lengths must match" + msg = "cannot add indices of unequal length" with pytest.raises(ValueError, match=msg): tdi + dti[0:1] with pytest.raises(ValueError, match=msg): @@ -1723,7 +1723,7 @@ def test_tdarr_div_length_mismatch(self, box_with_array): mismatched = [1, 2, 3, 4] rng = tm.box_expected(rng, box_with_array) - msg = "Lengths must match|Unable to coerce to Series" + msg = "Cannot divide vectors|Unable to coerce to Series" for obj in [mismatched, mismatched[:2]]: # one shorter, one longer for other in [obj, np.array(obj), pd.Index(obj)]: @@ -1905,14 +1905,12 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype="int64")) idx = tm.box_expected(idx, box_with_array) - msg = "|".join( - [ - "Lengths must match", # <- EA, Index, Series - "cannot use operands with types dtype", # <- DataFrame - "Unable to coerce to Series", # <- Series - ] + msg = ( + "cannot use operands with types dtype|" + "Cannot multiply with unequal lengths|" + "Unable to coerce to Series" ) - with pytest.raises((ValueError, TypeError), match=msg): + with pytest.raises(TypeError, match=msg): # length check before dtype check idx * idx[:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py index a61746d46daeb..bf4775bbd7b32 100644 --- a/pandas/tests/arrays/boolean/test_logical.py +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -46,7 +46,7 @@ def test_empty_ok(self, all_logical_operators): def test_logical_length_mismatch_raises(self, all_logical_operators): op_name = all_logical_operators a = pd.array([True, False, None], dtype="boolean") - msg = "Lengths must match" + msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): getattr(a, op_name)([True, False]) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 3164b54653efd..f7e19a911ff94 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -14,10 +14,7 @@ class TestCategoricalReprWithFactor(TestCategorical): def test_print(self): - expected = [ - "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", - "Categories (3, object): ['a' < 'b' < 'c']", - ] + expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] expected = "\n".join(expected) actual = repr(self.factor) assert actual == expected @@ -27,9 +24,9 @@ class TestCategoricalRepr: def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) expected = [ - "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", + "[a, b, c, a, b, ..., b, c, a, b, c]", "Length: 600", - "Categories (3, object): ['a', 'b', 'c']", + "Categories (3, object): [a, b, c]", ] expected = "\n".join(expected) @@ -39,13 +36,13 @@ def test_big_print(self): def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) - expected = "[], Categories (3, object): ['a', 'b', 'c']" + expected = "[], Categories (3, object): [a, b, c]" actual = repr(factor) assert actual == expected assert expected == actual factor = Categorical([], ["a", "b", "c"], ordered=True) - expected = "[], Categories (3, object): ['a' < 'b' < 'c']" + expected = "[], Categories (3, object): [a < b < c]" actual = repr(factor) assert expected == actual @@ -67,17 +64,17 @@ def test_print_none_width(self): def test_unicode_print(self): c = Categorical(["aaaaa", "bb", "cccc"] * 20) expected = """\ -['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] +[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] Length: 60 -Categories (3, object): ['aaaaa', 'bb', 'cccc']""" +Categories (3, object): [aaaaa, bb, cccc]""" assert repr(c) == expected c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) expected = """\ -['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] +[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 -Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa assert repr(c) == expected @@ -86,9 +83,9 @@ def test_unicode_print(self): with option_context("display.unicode.east_asian_width", True): c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) - expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] + expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 -Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa +Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa assert repr(c) == expected @@ -530,4 +527,5 @@ def test_categorical_index_repr_timedelta_ordered(self): def test_categorical_str_repr(self): result = repr(Categorical([1, "2", 3, 4])) expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" + breakpoint() assert result == expected diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index b7fdd8581101b..18f1dac3c13b2 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -232,9 +232,8 @@ def test_error(self, data, all_arithmetic_operators): result = opa(pd.DataFrame({"A": s})) assert result is NotImplemented - # msg = r"can only perform ops with 1-d structures" - msg = "Lengths must match" - with pytest.raises(ValueError, match=msg): + msg = r"can only perform ops with 1-d structures" + with pytest.raises(NotImplementedError, match=msg): opa(np.arange(len(s)).reshape(-1, len(s))) @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index f1e5050fa8a2e..8450253f853c3 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -983,6 +983,25 @@ def test_sum(self): out = SparseArray(data, fill_value=np.nan).sum() assert out == 40.0 + @pytest.mark.parametrize( + "arr", + [ + np.array([0, 1, np.nan, 1]), + np.array([0, 1, 1]), + np.array([True, True, False]), + ], + ) + @pytest.mark.parametrize("fill_value", [0, 1, np.nan, True, False]) + @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)]) + def test_sum_min_count(self, arr, fill_value, min_count, expected): + # https://github.com/pandas-dev/pandas/issues/25777 + sparray = SparseArray(arr, fill_value=fill_value) + result = sparray.sum(min_count=min_count) + if np.isnan(expected): + assert np.isnan(result) + else: + assert result == expected + def test_numpy_sum(self): data = np.arange(10).astype(float) out = np.sum(SparseArray(data)) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 2e2f8f666f4e9..6f9a1a5be4c43 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -11,10 +11,10 @@ def test_repr(): df = pd.DataFrame({"A": pd.array(["a", pd.NA, "b"], dtype="string")}) - expected = " A\n0 'a'\n1 \n2 'b'" + expected = " A\n0 a\n1 \n2 b" assert repr(df) == expected - expected = "0 'a'\n1 \n2 'b'\nName: A, dtype: string" + expected = "0 a\n1 \n2 b\nName: A, dtype: string" assert repr(df.A) == expected expected = "\n['a', , 'b']\nLength: 3, dtype: string" @@ -98,7 +98,7 @@ def test_add_2d(): a + b s = pd.Series(a) - with pytest.raises(ValueError, match="Lengths must match"): + with pytest.raises(ValueError, match="3 != 1"): s + b diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 61d78034f0747..d0bf5bb41bb2c 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -168,7 +168,8 @@ def test_concat_same_type(self): arr = self.array_cls(idx) result = arr._concat_same_type([arr[:-1], arr[1:], arr]) - expected = idx._concat_same_dtype([idx[:-1], idx[1:], idx], None) + arr2 = arr.astype(object) + expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) tm.assert_index_equal(self.index_cls(result), expected) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index b9fe2fc190d8d..6d786d9580542 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -66,9 +66,9 @@ def test_repr(self, float_frame): DataFrame().info(buf=buf) df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"]) - assert "\t" in repr(df) - assert "\r" in repr(df) - assert "a\n" in repr(df) + assert "\t" not in repr(df) + assert "\r" not in repr(df) + assert "a\n" not in repr(df) def test_repr_dimensions(self): df = DataFrame([[1, 2], [3, 4]]) @@ -123,7 +123,7 @@ def test_repr_unicode(self): df = DataFrame({"A": [uval, uval]}) result = repr(df) - ex_top = " A" + ex_top = " A" assert result.split("\n")[0].rstrip() == ex_top df = DataFrame({"A": [uval, uval]}) @@ -173,10 +173,10 @@ def test_repr_column_name_unicode_truncation_bug(self): def test_latex_repr(self): result = r"""\begin{tabular}{llll} \toprule -{} & 0 & 1 & 2 \\ +{} & 0 & 1 & 2 \\ \midrule -0 & '$\alpha$' & 'b' & 'c' \\ -1 & 1 & 2 & 3 \\ +0 & $\alpha$ & b & c \\ +1 & 1 & 2 & 3 \\ \bottomrule \end{tabular} """ diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 9765c77c6b60c..8a84090ea6e94 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -136,7 +136,7 @@ def test_append(self): tm.assert_index_equal(result, expected, exact=True) def test_append_to_another(self): - # hits Index._concat_same_dtype + # hits Index._concat fst = Index(["a", "b"]) snd = CategoricalIndex(["d", "e"]) result = fst.append(snd) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index fac9eb1c34dbf..997887cc18d61 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -579,7 +579,7 @@ def test_comparison(self): with pytest.raises(TypeError, match=msg): self.index > np.arange(2) - msg = "Lengths must match" + msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 9f235dcdbb295..466b491eb7a2c 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2423,6 +2423,16 @@ def test_index_repr_bool_nan(self): out2 = "Index([True, False, nan], dtype='object')" assert out2 == exp2 + @pytest.mark.filterwarnings("ignore:elementwise comparison failed:FutureWarning") + def test_index_with_tuple_bool(self): + # GH34123 + # TODO: remove tupleize_cols=False once correct behaviour is restored + # TODO: also this op right now produces FutureWarning from numpy + idx = Index([("a", "b"), ("b", "c"), ("c", "a")], tupleize_cols=False) + result = idx == ("c", "a",) + expected = np.array([False, False, True]) + tm.assert_numpy_array_equal(result, expected) + class TestIndexUtils: @pytest.mark.parametrize( diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index 937570d89fb77..bfd347fd122c3 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -7,7 +7,8 @@ from pandas import Series import pandas._testing as tm -from pandas.core.tools.datetimes import to_time +from pandas.core.tools.datetimes import to_time as to_time_alias +from pandas.core.tools.times import to_time class TestToTime: @@ -57,3 +58,12 @@ def test_parsers_time(self): res = to_time(np.array(arg)) assert isinstance(res, list) assert res == expected_arr + + +def test_to_time_alias(): + expected = time(14, 15) + + with tm.assert_produces_warning(FutureWarning): + result = to_time_alias(expected) + + assert result == expected diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index be07f829dbae8..1c51ad0c45238 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -1,12 +1,14 @@ import pytest -from pandas._libs.tslibs import frequencies as libfrequencies, resolution from pandas._libs.tslibs.frequencies import ( FreqGroup, + _attrname_to_abbrevs, _period_code_map, - get_freq, get_freq_code, + get_freq_group, + get_to_timestamp_base, ) +from pandas._libs.tslibs.resolution import Resolution as _reso import pandas.tseries.offsets as offsets @@ -31,12 +33,12 @@ def period_code_item(request): ], ) def test_freq_code(freqstr, expected): - assert get_freq(freqstr) == expected + assert get_freq_code(freqstr)[0] == expected def test_freq_code_match(period_code_item): freqstr, code = period_code_item - assert get_freq(freqstr) == code + assert get_freq_code(freqstr)[0] == code @pytest.mark.parametrize( @@ -65,14 +67,14 @@ def test_freq_code_match(period_code_item): ], ) def test_freq_group(freqstr, expected): - assert resolution.get_freq_group(freqstr) == expected + assert get_freq_group(freqstr) == expected def test_freq_group_match(period_code_item): freqstr, code = period_code_item - str_group = resolution.get_freq_group(freqstr) - code_group = resolution.get_freq_group(code) + str_group = get_freq_group(freqstr) + code_group = get_freq_group(code) assert str_group == code_group == code // 1000 * 1000 @@ -82,14 +84,11 @@ def test_freq_group_match(period_code_item): [("D", "D"), ("W", "D"), ("M", "D"), ("S", "S"), ("T", "S"), ("H", "S")], ) def test_get_to_timestamp_base(freqstr, exp_freqstr): - tsb = libfrequencies.get_to_timestamp_base + tsb = get_to_timestamp_base assert tsb(get_freq_code(freqstr)[0]) == get_freq_code(exp_freqstr)[0] -_reso = resolution.Resolution - - @pytest.mark.parametrize( "freqstr,expected", [ @@ -111,13 +110,13 @@ def test_get_str_from_freq(freqstr, expected): @pytest.mark.parametrize("freq", ["A", "Q", "M", "D", "H", "T", "S", "L", "U", "N"]) def test_get_freq_roundtrip(freq): - result = _reso.get_freq(_reso.get_str_from_freq(freq)) + result = _attrname_to_abbrevs[_reso.get_str_from_freq(freq)] assert freq == result @pytest.mark.parametrize("freq", ["D", "H", "T", "S", "L", "U"]) def test_get_freq_roundtrip2(freq): - result = _reso.get_freq(_reso.get_str(_reso.get_reso_from_freq(freq))) + result = _attrname_to_abbrevs[_reso.get_str(_reso.get_reso_from_freq(freq))] assert freq == result @@ -156,31 +155,31 @@ def test_cat(args): "freq_input,expected", [ # Frequency string. - ("A", (get_freq("A"), 1)), - ("3D", (get_freq("D"), 3)), - ("-2M", (get_freq("M"), -2)), + ("A", (get_freq_code("A")[0], 1)), + ("3D", (get_freq_code("D")[0], 3)), + ("-2M", (get_freq_code("M")[0], -2)), # Tuple. - (("D", 1), (get_freq("D"), 1)), - (("A", 3), (get_freq("A"), 3)), - (("M", -2), (get_freq("M"), -2)), + (("D", 1), (get_freq_code("D")[0], 1)), + (("A", 3), (get_freq_code("A")[0], 3)), + (("M", -2), (get_freq_code("M")[0], -2)), ((5, "T"), (FreqGroup.FR_MIN, 5)), # Numeric Tuple. ((1000, 1), (1000, 1)), # Offsets. - (offsets.Day(), (get_freq("D"), 1)), - (offsets.Day(3), (get_freq("D"), 3)), - (offsets.Day(-2), (get_freq("D"), -2)), - (offsets.MonthEnd(), (get_freq("M"), 1)), - (offsets.MonthEnd(3), (get_freq("M"), 3)), - (offsets.MonthEnd(-2), (get_freq("M"), -2)), - (offsets.Week(), (get_freq("W"), 1)), - (offsets.Week(3), (get_freq("W"), 3)), - (offsets.Week(-2), (get_freq("W"), -2)), + (offsets.Day(), (get_freq_code("D")[0], 1)), + (offsets.Day(3), (get_freq_code("D")[0], 3)), + (offsets.Day(-2), (get_freq_code("D")[0], -2)), + (offsets.MonthEnd(), (get_freq_code("M")[0], 1)), + (offsets.MonthEnd(3), (get_freq_code("M")[0], 3)), + (offsets.MonthEnd(-2), (get_freq_code("M")[0], -2)), + (offsets.Week(), (get_freq_code("W")[0], 1)), + (offsets.Week(3), (get_freq_code("W")[0], 3)), + (offsets.Week(-2), (get_freq_code("W")[0], -2)), (offsets.Hour(), (FreqGroup.FR_HR, 1)), # Monday is weekday=0. - (offsets.Week(weekday=1), (get_freq("W-TUE"), 1)), - (offsets.Week(3, weekday=0), (get_freq("W-MON"), 3)), - (offsets.Week(-2, weekday=4), (get_freq("W-FRI"), -2)), + (offsets.Week(weekday=1), (get_freq_code("W-TUE")[0], 1)), + (offsets.Week(3, weekday=0), (get_freq_code("W-MON")[0], 3)), + (offsets.Week(-2, weekday=4), (get_freq_code("W-FRI")[0], -2)), ], ) def test_get_freq_code(freq_input, expected): diff --git a/pandas/tests/tslibs/test_period_asfreq.py b/pandas/tests/tslibs/test_period_asfreq.py index 5497cb65c5373..7205c3cc676cf 100644 --- a/pandas/tests/tslibs/test_period_asfreq.py +++ b/pandas/tests/tslibs/test_period_asfreq.py @@ -1,6 +1,6 @@ import pytest -from pandas._libs.tslibs.frequencies import get_freq +from pandas._libs.tslibs.frequencies import get_freq_code from pandas._libs.tslibs.period import period_asfreq, period_ordinal @@ -31,7 +31,10 @@ ], ) def test_intra_day_conversion_factors(freq1, freq2, expected): - assert period_asfreq(1, get_freq(freq1), get_freq(freq2), False) == expected + assert ( + period_asfreq(1, get_freq_code(freq1)[0], get_freq_code(freq2)[0], False) + == expected + ) @pytest.mark.parametrize( @@ -39,7 +42,7 @@ def test_intra_day_conversion_factors(freq1, freq2, expected): ) def test_period_ordinal_start_values(freq, expected): # information for Jan. 1, 1970. - assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq(freq)) == expected + assert period_ordinal(1970, 1, 1, 0, 0, 0, 0, 0, get_freq_code(freq)[0]) == expected @pytest.mark.parametrize( @@ -52,7 +55,7 @@ def test_period_ordinal_start_values(freq, expected): ], ) def test_period_ordinal_week(dt, expected): - args = dt + (get_freq("W"),) + args = dt + (get_freq_code("W")[0],) assert period_ordinal(*args) == expected @@ -74,5 +77,5 @@ def test_period_ordinal_week(dt, expected): ], ) def test_period_ordinal_business_day(day, expected): - args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq("B")) + args = (2013, 10, day, 0, 0, 0, 0, 0, get_freq_code("B")[0]) assert period_ordinal(*args) == expected diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 7846720c6db25..337a06b91e443 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -185,10 +185,10 @@ def test_series_equal_categorical_values_mismatch(check_less_precise): Series values are different \\(66\\.66667 %\\) \\[index\\]: \\[0, 1, 2\\] -\\[left\\]: \\['a', 'b', 'c'\\] -Categories \\(3, object\\): \\['a', 'b', 'c'\\] -\\[right\\]: \\['a', 'c', 'b'\\] -Categories \\(3, object\\): \\['a', 'b', 'c'\\]""" +\\[left\\]: \\[a, b, c\\] +Categories \\(3, object\\): \\[a, b, c\\] +\\[right\\]: \\[a, c, b\\] +Categories \\(3, object\\): \\[a, b, c\\]""" s1 = Series(Categorical(["a", "b", "c"])) s2 = Series(Categorical(["a", "c", "b"])) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index aaa7e9a34fadf..b57467385d371 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -88,15 +88,6 @@ def test_missing_minp_zero(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) -def test_iter_raises(klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - with pytest.raises(NotImplementedError): - iter(obj.expanding(2)) - - def test_expanding_axis(axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) @@ -131,3 +122,91 @@ def test_expanding_count_default_min_periods_with_null_values(constructor): result = constructor(values).expanding().count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2), + (DataFrame(), [({}, [])], 1), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 2, + ), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 1, + ), + ], +) +def test_iter_expanding_dataframe(df, expected, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, df.expanding(min_periods)): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,min_periods", + [ + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 3), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 2), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], 1), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2), + (Series([np.nan, 2]), [([np.nan], [0]), ([np.nan, 2], [0, 1])], 2), + (Series([], dtype="int64"), [], 2), + ], +) +def test_iter_expanding_series(ser, expected, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip(expected, ser.expanding(min_periods)): + tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index a7582a86c0848..f9b0e6856337b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -7,7 +7,7 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, date_range import pandas._testing as tm from pandas.core.window import Rolling @@ -310,18 +310,6 @@ def test_multi_index_names(): assert result.index.names == [None, "1", "2"] -@pytest.mark.parametrize("klass", [pd.Series, pd.DataFrame]) -def test_iter_raises(klass): - # https://github.com/pandas-dev/pandas/issues/11704 - # Iteration over a Window - obj = klass([1, 2, 3, 4]) - - msg = "See issue #11704 https://github.com/pandas-dev/pandas/issues/11704" - - with pytest.raises(NotImplementedError, match=msg): - iter(obj.rolling(2)) - - def test_rolling_axis_sum(axis_frame): # see gh-23372. df = DataFrame(np.ones((10, 20))) @@ -470,3 +458,208 @@ def test_rolling_count_default_min_periods_with_null_values(constructor): result = constructor(values).rolling(3).count() expected = constructor(expected_counts) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "df,expected,window,min_periods", + [ + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + 3, + None, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + 2, + 3, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 1, + ), + ( + DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + 1, + 2, + ), + (DataFrame({"A": [1], "B": [4]}), [], 2, None), + (DataFrame({"A": [1], "B": [4]}), [], 2, 1), + (DataFrame(), [({}, [])], 2, None), + ( + DataFrame({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}), + [ + ({"A": [1.0], "B": [np.nan]}, [0]), + ({"A": [1, np.nan], "B": [np.nan, 5]}, [0, 1]), + ({"A": [1, np.nan, 3], "B": [np.nan, 5, 6]}, [0, 1, 2]), + ], + 3, + 2, + ), + ], +) +def test_iter_rolling_dataframe(df, expected, window, min_periods): + # GH 11704 + expected = [DataFrame(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, df.rolling(window, min_periods=min_periods) + ): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,window", + [ + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [2, 3], "B": [5, 6]}, [1, 2]), + ], + "2D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [1, 2], "B": [4, 5]}, [0, 1]), + ({"A": [1, 2, 3], "B": [4, 5, 6]}, [0, 1, 2]), + ], + "3D", + ), + ( + [ + ({"A": [1], "B": [4]}, [0]), + ({"A": [2], "B": [5]}, [1]), + ({"A": [3], "B": [6]}, [2]), + ], + "1D", + ), + ], +) +def test_iter_rolling_on_dataframe(expected, window): + # GH 11704 + df = DataFrame( + { + "A": [1, 2, 3, 4, 5], + "B": [4, 5, 6, 7, 8], + "C": date_range(start="2016-01-01", periods=5, freq="D"), + } + ) + + expected = [DataFrame(values, index=index) for (values, index) in expected] + for (expected, actual) in zip(expected, df.rolling(window, on="C")): + tm.assert_frame_equal(actual, expected) + + +@pytest.mark.parametrize( + "ser,expected,window, min_periods", + [ + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + None, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([1, 2, 3], [0, 1, 2])], + 3, + 1, + ), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1), + (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 3), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), + (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 2), + (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), + (Series([], dtype="int64"), [], 2, 1), + ], +) +def test_iter_rolling_series(ser, expected, window, min_periods): + # GH 11704 + expected = [Series(values, index=index) for (values, index) in expected] + + for (expected, actual) in zip( + expected, ser.rolling(window, min_periods=min_periods) + ): + tm.assert_series_equal(actual, expected) + + +@pytest.mark.parametrize( + "expected,expected_index,window", + [ + ( + [[0], [1], [2], [3], [4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-02", periods=1, freq="D"), + date_range("2020-01-03", periods=1, freq="D"), + date_range("2020-01-04", periods=1, freq="D"), + date_range("2020-01-05", periods=1, freq="D"), + ], + "1D", + ), + ( + [[0], [0, 1], [1, 2], [2, 3], [3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-02", periods=2, freq="D"), + date_range("2020-01-03", periods=2, freq="D"), + date_range("2020-01-04", periods=2, freq="D"), + ], + "2D", + ), + ( + [[0], [0, 1], [0, 1, 2], [1, 2, 3], [2, 3, 4]], + [ + date_range("2020-01-01", periods=1, freq="D"), + date_range("2020-01-01", periods=2, freq="D"), + date_range("2020-01-01", periods=3, freq="D"), + date_range("2020-01-02", periods=3, freq="D"), + date_range("2020-01-03", periods=3, freq="D"), + ], + "3D", + ), + ], +) +def test_iter_rolling_datetime(expected, expected_index, window): + # GH 11704 + ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) + + expected = [ + Series(values, index=idx) for (values, idx) in zip(expected, expected_index) + ] + + for (expected, actual) in zip(expected, ser.rolling(window)): + tm.assert_series_equal(actual, expected) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 6213ea198f2cb..f907c5570bd18 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -11,8 +11,7 @@ from pandas._libs.tslibs.fields import build_field_sarray import pandas._libs.tslibs.frequencies as libfreqs from pandas._libs.tslibs.offsets import _offset_to_period_map -import pandas._libs.tslibs.resolution as libresolution -from pandas._libs.tslibs.resolution import Resolution +from pandas._libs.tslibs.resolution import Resolution, month_position_check from pandas._libs.tslibs.timezones import UTC from pandas._libs.tslibs.tzconversion import tz_convert from pandas.util._decorators import cache_readonly @@ -159,13 +158,13 @@ def to_offset(freq) -> Optional[DateOffset]: stride_sign = -1 if stride.startswith("-") else 1 if not stride: stride = 1 - if prefix in Resolution._reso_str_bump_map.keys(): + if prefix in Resolution.reso_str_bump_map: stride, name = Resolution.get_stride_from_decimal( float(stride), prefix ) stride = int(stride) offset = _get_offset(name) - offset = offset * int(np.fabs(stride) * stride_sign) + offset = offset * int(np.fabs(stride) * stride_sign) # type: ignore if delta is None: delta = offset else: @@ -403,7 +402,7 @@ def rep_stamp(self): return Timestamp(self.i8values[0]) def month_position_check(self): - return libresolution.month_position_check(self.fields, self.index.dayofweek) + return month_position_check(self.fields, self.index.dayofweek) @cache_readonly def mdiffs(self): diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 6a2896309658e..88f77a8d7f054 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -296,7 +296,7 @@ def is_on_offset(self, dt): return True -class SingleConstructorOffset(BaseOffset): +class SingleConstructorMixin: _params = cache_readonly(BaseOffset._params.fget) freqstr = cache_readonly(BaseOffset.freqstr.fget) @@ -308,6 +308,10 @@ def _from_name(cls, suffix=None): return cls() +class SingleConstructorOffset(SingleConstructorMixin, BaseOffset): + pass + + class BusinessDay(BusinessMixin, SingleConstructorOffset): """ DateOffset subclass representing possibly n business days. @@ -316,10 +320,6 @@ class BusinessDay(BusinessMixin, SingleConstructorOffset): _prefix = "B" _attributes = frozenset(["n", "normalize", "offset"]) - def __init__(self, n=1, normalize=False, offset=timedelta(0)): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - def _offset_str(self) -> str: def get_str(td): off_str = "" @@ -419,7 +419,15 @@ def is_on_offset(self, dt: datetime) -> bool: return dt.weekday() < 5 -class BusinessHourMixin(liboffsets.BusinessHourMixin): +class BusinessHour(SingleConstructorMixin, liboffsets.BusinessHourMixin): + """ + DateOffset subclass representing possibly n business hours. + """ + + _prefix = "BH" + _anchor = 0 + _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) + @cache_readonly def next_bday(self): """ @@ -679,22 +687,6 @@ def _is_on_offset(self, dt): return False -class BusinessHour(BusinessHourMixin, SingleConstructorOffset): - """ - DateOffset subclass representing possibly n business hours. - """ - - _prefix = "BH" - _anchor = 0 - _attributes = frozenset(["n", "normalize", "start", "end", "offset"]) - - def __init__( - self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0) - ): - BaseOffset.__init__(self, n, normalize) - super().__init__(start=start, end=end, offset=offset) - - class CustomBusinessDay(CustomMixin, BusinessDay): """ DateOffset subclass representing custom business days excluding holidays. @@ -727,9 +719,7 @@ def __init__( calendar=None, offset=timedelta(0), ): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - + BusinessDay.__init__(self, n, normalize, offset) CustomMixin.__init__(self, weekmask, holidays, calendar) @apply_wraps @@ -772,7 +762,7 @@ def is_on_offset(self, dt: datetime) -> bool: return np.is_busday(day64, busdaycal=self.calendar) -class CustomBusinessHour(CustomMixin, BusinessHourMixin, SingleConstructorOffset): +class CustomBusinessHour(CustomMixin, BusinessHour): """ DateOffset subclass representing possibly n custom business days. """ @@ -794,11 +784,8 @@ def __init__( end="17:00", offset=timedelta(0), ): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - + BusinessHour.__init__(self, n, normalize, start=start, end=end, offset=offset) CustomMixin.__init__(self, weekmask, holidays, calendar) - BusinessHourMixin.__init__(self, start=start, end=end, offset=offset) # --------------------------------------------------------------------- @@ -898,9 +885,7 @@ def __init__( calendar=None, offset=timedelta(0), ): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) - + BusinessMixin.__init__(self, n, normalize, offset) CustomMixin.__init__(self, weekmask, holidays, calendar) @cache_readonly @@ -980,9 +965,9 @@ def __init__(self, n=1, normalize=False, day_of_month=None): BaseOffset.__init__(self, n, normalize) if day_of_month is None: - object.__setattr__(self, "day_of_month", self._default_day_of_month) - else: - object.__setattr__(self, "day_of_month", int(day_of_month)) + day_of_month = self._default_day_of_month + + object.__setattr__(self, "day_of_month", int(day_of_month)) if not self._min_day_of_month <= self.day_of_month <= 27: raise ValueError( "day_of_month must be " @@ -1308,7 +1293,7 @@ def _from_name(cls, suffix=None): return cls(weekday=weekday) -class WeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): +class WeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): """ Describes monthly dates like "the Tuesday of the 2nd week of each month". @@ -1334,12 +1319,9 @@ class WeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): _attributes = frozenset(["n", "normalize", "week", "weekday"]) def __init__(self, n=1, normalize=False, week=0, weekday=0): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "weekday", weekday) + liboffsets.WeekOfMonthMixin.__init__(self, n, normalize, weekday) object.__setattr__(self, "week", week) - if self.weekday < 0 or self.weekday > 6: - raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") if self.week < 0 or self.week > 3: raise ValueError(f"Week must be 0<=week<=3, got {self.week}") @@ -1361,11 +1343,6 @@ def _get_offset_day(self, other: datetime) -> int: shift_days = (self.weekday - wday) % 7 return 1 + shift_days + self.week * 7 - @property - def rule_code(self) -> str: - weekday = ccalendar.int_to_weekday.get(self.weekday, "") - return f"{self._prefix}-{self.week + 1}{weekday}" - @classmethod def _from_name(cls, suffix=None): if not suffix: @@ -1377,7 +1354,7 @@ def _from_name(cls, suffix=None): return cls(week=week, weekday=weekday) -class LastWeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): +class LastWeekOfMonth(SingleConstructorMixin, liboffsets.WeekOfMonthMixin): """ Describes monthly dates in last week of month like "the last Tuesday of each month". @@ -1401,14 +1378,11 @@ class LastWeekOfMonth(liboffsets.WeekOfMonthMixin, SingleConstructorOffset): _attributes = frozenset(["n", "normalize", "weekday"]) def __init__(self, n=1, normalize=False, weekday=0): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "weekday", weekday) + liboffsets.WeekOfMonthMixin.__init__(self, n, normalize, weekday) if self.n == 0: raise ValueError("N cannot be 0") - - if self.weekday < 0 or self.weekday > 6: - raise ValueError(f"Day must be 0<=day<=6, got {self.weekday}") + object.__setattr__(self, "week", -1) def _get_offset_day(self, other: datetime) -> int: """ @@ -1429,11 +1403,6 @@ def _get_offset_day(self, other: datetime) -> int: shift_days = (wday - self.weekday) % 7 return dim - shift_days - @property - def rule_code(self) -> str: - weekday = ccalendar.int_to_weekday.get(self.weekday, "") - return f"{self._prefix}-{weekday}" - @classmethod def _from_name(cls, suffix=None): if not suffix: diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 92bfce7ec9c83..80286d5f138ad 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -329,7 +329,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]: return decorate -def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: +def doc(*args: Union[str, Callable], **kwargs) -> Callable[[F], F]: """ A decorator take docstring templates, concatenate them and perform string substitution on it. @@ -345,8 +345,8 @@ def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]: *args : str or callable The string / docstring / docstring template to be appended in order after default docstring under function. - **kwargs : str - The string which would be used to format docstring template. + **kwargs + The objects which would be used to format docstring template. """ def decorator(func: F) -> F: From a3c6eda4fb15f623ac3f890bdd30addb7dbc93e9 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 09:49:06 +0100 Subject: [PATCH 04/15] wip --- pandas/io/formats/format.py | 9 +++------ pandas/tests/arrays/categorical/test_repr.py | 1 - 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 54e4f6079827e..89de749ee67de 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -191,13 +191,11 @@ def _get_footer(self) -> str: return str(footer) def _get_formatted_values(self) -> List[str]: - breakpoint() return format_array( self.categorical._internal_get_values(), None, float_format=None, na_rep=self.na_rep, - quoting=2 ) def to_string(self) -> str: @@ -1088,7 +1086,6 @@ def format_array( justify: str = "right", decimal: str = ".", leading_space: Optional[bool] = None, - quoting=None ) -> List[str]: """ Format an array for printing. @@ -1151,11 +1148,11 @@ def format_array( justify=justify, decimal=decimal, leading_space=leading_space, - quoting=quoting ) + return fmt_obj.get_result() -import csv + class GenericArrayFormatter: def __init__( self, @@ -1218,7 +1215,7 @@ def _format(x): pass return self.na_rep elif isinstance(x, PandasObject): - return str(x) + return f"'{str(x)}'" else: # object dtype return str(formatter(x)) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index f7e19a911ff94..33918b6d87520 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -527,5 +527,4 @@ def test_categorical_index_repr_timedelta_ordered(self): def test_categorical_str_repr(self): result = repr(Categorical([1, "2", 3, 4])) expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" - breakpoint() assert result == expected From 9277d382a7cf1dfc09ad868ad7cf86515cdbccc5 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 09:52:35 +0100 Subject: [PATCH 05/15] remove vscode failures --- failures | 1584 ------------------------------------------------------ 1 file changed, 1584 deletions(-) delete mode 100644 failures diff --git a/failures b/failures deleted file mode 100644 index b30ae124e6201..0000000000000 --- a/failures +++ /dev/null @@ -1,1584 +0,0 @@ -_ TestIntervalIndexRendering.test_repr_missing[Series-(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c\ndtype: object] _ -[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = -constructor = -expected = '(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c\ndtype: object' - - @pytest.mark.parametrize( - "constructor,expected", - [ - ( - Series, - ( - "(0.0, 1.0] a\n" - "NaN b\n" - "(2.0, 3.0] c\n" - "dtype: object" - ), - ), - (DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")), - ], - ) - def test_repr_missing(self, constructor, expected): - # GH 25984 - index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) - obj = constructor(list("abc"), index=index) - result = repr(obj) -> assert result == expected -E AssertionError: assert '(0.0, 1.0] ...dtype: object' == '(0.0, 1.0] ...dtype: object' -E - (0.0, 1.0] a -E ? ^ -E + (0.0, 1.0] 'a' -E ? ^ + -E - NaN b -E ? ^ -E + NaN 'b'... -E -E ...Full output truncated (7 lines hidden), use '-vv' to show - -pandas/tests/indexes/interval/test_formats.py:38: AssertionError -_ TestIntervalIndexRendering.test_repr_missing[DataFrame- 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c] _ -[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = -constructor = -expected = ' 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c' - - @pytest.mark.parametrize( - "constructor,expected", - [ - ( - Series, - ( - "(0.0, 1.0] a\n" - "NaN b\n" - "(2.0, 3.0] c\n" - "dtype: object" - ), - ), - (DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")), - ], - ) - def test_repr_missing(self, constructor, expected): - # GH 25984 - index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)]) - obj = constructor(list("abc"), index=index) - result = repr(obj) -> assert result == expected -E assert " ...2.0, 3.0] 'c'" == ' ...(2.0, 3.0] c' -E - 0 -E + 0 -E ? + -E - (0.0, 1.0] a -E ? ^ -E + (0.0, 1.0] 'a' -E ? ^ +... -E -E ...Full output truncated (9 lines hidden), use '-vv' to show - -pandas/tests/indexes/interval/test_formats.py:38: AssertionError -_________________ TestReadHtml.test_multiple_header_rows[bs4] __________________ -[gw2] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_multiple_header_rows(self): - # Issue #13434 - expected_df = DataFrame( - data=[("Hillary", 68, "D"), ("Bernie", 74, "D"), ("Donald", 69, "R")] - ) - expected_df.columns = [ - ["Unnamed: 0_level_0", "Age", "Party"], - ["Name", "Unnamed: 1_level_1", "Unnamed: 2_level_1"], - ] - html = expected_df.to_html(index=False) - html_df = self.read_html(html)[0] -> tm.assert_frame_equal(expected_df, html_df) - -pandas/tests/io/test_html.py:1079: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -pandas/_libs/testing.pyx:68: in pandas._libs.testing.assert_almost_equal - cpdef assert_almost_equal(a, b, -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) -E AssertionError: DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") are different -E -E DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") values are different (100.0 %) -E [index]: [0, 1, 2] -E [left]: [Hillary, Bernie, Donald] -E [right]: ['Hillary', 'Bernie', 'Donald'] - -pandas/_libs/testing.pyx:183: AssertionError -_________________ TestReadHtml.test_multiple_header_rows[lxml] _________________ -[gw2] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_multiple_header_rows(self): - # Issue #13434 - expected_df = DataFrame( - data=[("Hillary", 68, "D"), ("Bernie", 74, "D"), ("Donald", 69, "R")] - ) - expected_df.columns = [ - ["Unnamed: 0_level_0", "Age", "Party"], - ["Name", "Unnamed: 1_level_1", "Unnamed: 2_level_1"], - ] - html = expected_df.to_html(index=False) - html_df = self.read_html(html)[0] -> tm.assert_frame_equal(expected_df, html_df) - -pandas/tests/io/test_html.py:1079: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -pandas/_libs/testing.pyx:68: in pandas._libs.testing.assert_almost_equal - cpdef assert_almost_equal(a, b, -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) -E AssertionError: DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") are different -E -E DataFrame.iloc[:, 0] (column name="('Unnamed: 0_level_0', 'Name')") values are different (100.0 %) -E [index]: [0, 1, 2] -E [left]: [Hillary, Bernie, Donald] -E [right]: ['Hillary', 'Bernie', 'Donald'] - -pandas/_libs/testing.pyx:183: AssertionError -_________________ TestDataFrameFormatting.test_repr_truncation _________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_repr_truncation(self): - max_len = 20 - with option_context("display.max_colwidth", max_len): - df = DataFrame( - { - "A": np.random.randn(10), - "B": [ - tm.rands(np.random.randint(max_len - 1, max_len + 1)) - for i in range(10) - ], - } - ) - r = repr(df) - r = r[r.find("\n") + 1 :] - - adj = fmt._get_adjustment() - - for line, value in zip(r.split("\n"), df["B"]): - if adj.len(value) + 1 > max_len: - assert "..." in line - else: -> assert "..." not in line -E AssertionError: assert '...' not in '0 -0.426290...7OEA0dVWe...' -E '...' is contained here: -E 0 -0.426290 '2fAo1sX7OEA0dVWe... -E ? +++ - -pandas/tests/io/formats/test_format.py:234: AssertionError -________________ TestDataFrameFormatting.test_str_max_colwidth _________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_str_max_colwidth(self): - # GH 7856 - df = pd.DataFrame( - [ - { - "a": "foo", - "b": "bar", - "c": "uncomfortably long line with lots of stuff", - "d": 1, - }, - {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, - ] - ) - df.set_index(["a", "b", "c"]) -> assert str(df) == ( - " a b c d\n" - "0 foo bar uncomfortably long line with lots of stuff 1\n" - "1 foo bar stuff 1" - ) -E assert " a ... 'stuff' 1" == ' a b ... stuff 1' -E - a b c d -E + a b c d -E ? + + + -E - 0 foo bar uncomfortably long line with lots of stuff 1 -E ? ^ ^^ ^^ -E + 0 'foo' 'bar' 'uncomfortably long line with lots of stuff' 1 -E ? ^ ^^^ ^^^ +... -E -E ...Full output truncated (3 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:524: AssertionError -_______________ TestDataFrameFormatting.test_to_string_truncate ________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_string_truncate(self): - # GH 9784 - dont truncate when calling DataFrame.to_string - df = pd.DataFrame( - [ - { - "a": "foo", - "b": "bar", - "c": "let's make this a very VERY long line that is longer " - "than the default 50 character limit", - "d": 1, - }, - {"a": "foo", "b": "bar", "c": "stuff", "d": 1}, - ] - ) - df.set_index(["a", "b", "c"]) -> assert df.to_string() == ( - " a b " - " c d\n" - "0 foo bar let's make this a very VERY long line t" - "hat is longer than the default 50 character limit 1\n" - "1 foo bar " - " stuff 1" - ) -E assert " a ... 'stuff' 1" == ' a b ... stuff 1' -E - a b c d -E + a b c d -E ? + + + -E - 0 foo bar let's make this a very VERY long line that is longer than the default 50 character limit 1 -E ? ^ ^^ ^^ -E + 0 'foo' 'bar' 'let's make this a very VERY long line that is longer than the default 50 character limit' 1 -E ? ^ ^^^ ^^^ ... -E -E ...Full output truncated (3 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:551: AssertionError -____________ TestDataFrameFormatting.test_east_asian_unicode_false _____________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_east_asian_unicode_false(self): - # not aligned properly because of east asian width - - # mid col - df = DataFrame( - {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, - index=["a", "bb", "c", "ddd"], - ) - expected = ( - " a b\na あ 1\n" - "bb いいい 222\nc う 33333\n" - "ddd ええええええ 4" - ) -> assert repr(df) == expected -E assert " a...えええええ' 4" == ' a ...ええええええ 4' -E - a b -E + a b -E ? + -E - a あ 1 -E ? ^ -E + a 'あ' 1 -E ? ^ +... -E -E ...Full output truncated (13 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:737: AssertionError -_____________ TestDataFrameFormatting.test_east_asian_unicode_true _____________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_east_asian_unicode_true(self): - # Enable Unicode option ----------------------------------------- - with option_context("display.unicode.east_asian_width", True): - - # mid col - df = DataFrame( - {"a": ["あ", "いいい", "う", "ええええええ"], "b": [1, 222, 33333, 4]}, - index=["a", "bb", "c", "ddd"], - ) - expected = ( - " a b\na あ 1\n" - "bb いいい 222\nc う 33333\n" - "ddd ええええええ 4" - ) -> assert repr(df) == expected -E assert " ...えええええ' 4" == ' ...ええええええ 4' -E - a b -E + a b -E ? + -E - a あ 1 -E ? ^ -E + a 'あ' 1 -E ? ^ +... -E -E ...Full output truncated (13 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:875: AssertionError -_________________ TestDataFrameFormatting.test_index_with_nan __________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_index_with_nan(self): - # GH 2850 - df = DataFrame( - { - "id1": {0: "1a3", 1: "9h4"}, - "id2": {0: np.nan, 1: "d67"}, - "id3": {0: "78d", 1: "79d"}, - "value": {0: 123, 1: 64}, - } - ) - - # multi-index - y = df.set_index(["id1", "id2", "id3"]) - result = y.to_string() - expected = ( - " value\nid1 id2 id3 \n" - "1a3 NaN 78d 123\n9h4 d67 79d 64" - ) - assert result == expected - - # index - y = df.set_index("id2") - result = y.to_string() - expected = ( - " id1 id3 value\nid2 \n" - "NaN 1a3 78d 123\nd67 9h4 79d 64" - ) -> assert result == expected -E assert " id1 ... '79d' 64" == ' id1 id...4 79d 64' -E - id1 id3 value -E + id1 id3 value -E ? + + -E - id2 -E + id2 -E ? ++ -E - NaN 1a3 78d 123... -E -E ...Full output truncated (8 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:1398: AssertionError -____________________ TestDataFrameFormatting.test_to_string ____________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_string(self): - - # big mixed - biggie = DataFrame( - {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, - index=np.arange(200), - ) - - biggie.loc[:20, "A"] = np.nan - biggie.loc[:20, "B"] = np.nan - s = biggie.to_string() - - buf = StringIO() - retval = biggie.to_string(buf=buf) - assert retval is None - assert buf.getvalue() == s - - assert isinstance(s, str) - - # print in right order - result = biggie.to_string( - columns=["B", "A"], col_space=17, float_format="%.5f".__mod__ - ) - lines = result.split("\n") - header = lines[0].strip().split() - joined = "\n".join(re.sub(r"\s+", " ", x).strip() for x in lines[1:]) - recons = read_csv(StringIO(joined), names=header, header=None, sep=" ") -> tm.assert_series_equal(recons["B"], biggie["B"]) - -pandas/tests/io/formats/test_format.py:1475: -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ -pandas/_libs/testing.pyx:68: in pandas._libs.testing.assert_almost_equal - cpdef assert_almost_equal(a, b, -_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ - -> raise_assert_detail(obj, msg, lobj, robj, index_values=index_values) -E AssertionError: Series are different -E -E Series values are different (89.5 %) -E [index]: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...] -E [left]: [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 'ReUMMYOzRc', 'm8JaaoOinz', 'sObNzAa9Sb', 'HRKG2ackB4', '0azcKU0pMp', '2mIvVr5itT', 'rGUpLDHyCl', '82UUP3PsXe', '05Wgq2rheA', 'E5jcBVizmI', 's2BXU0YHCQ', 'frVcLBb65o', 'wAMPDhKWPK', 'ASORqAK5Jk', 'YeJ7LtATFI', 'IzEShI7kI1', 'OABJwASxEY', 'Jr8okkyRpq', 'Rxj4uDCiyu', 'gmf0SkUai3', 'HpgiaSbjR2', 'xtZWYDfYEO', 'VUzBhy7urU', 'uLUGvTFnqx', 'FZx3FplLeC', 'U9hpEhF5ss', 'TpJdnJ7Nzd', '1phREHhRsM', 'NssXhnFpKV', 'hEqW5irrLk', 'Twuj74zdTH', '7ltutV1O47', '2Ipsj424K8', 'G3dbMMYkgz', 'rxAsSNQ8qn', 'RqrQj2Ozfo', 'A07eRJDOxX', 'x0Y1kwZMv2', 'k6jyXkGdWb', 'Yl5NkpyVKl', 'C03IGiW6zr', 'mVbiXcVTtM', 'VOmlVUmdh0', '481dbvfltM', 'lwNNDPVPSI', 'rha2YAX39o', 'zJzMvKWl7A', 'cWoM5F49zZ', 'zMqfkqXpdR', '7pvDnY8NR5', 'J8XExS3CMA', '5WNzD0xErD', 'cy9DDrahxY', 'IEgi5viuyF', 'hs8VgWm3C6', 'KP458GphgF', 'TO8oGkA5NL', 'dDuScWreSw', 'kyIHeh9Vwl', 'BZuhU8x2S9', 'f7p4PNZ8Vr', 'U1EiHy397b', 'OIvNL6oxOH', 'CuaVgCNtzX', 'vsFWaKUFbS', 'byfE947n2S', 'LctPcyhLuE', 't8JOJggu2o', 'nGJhNJvSSp', 'QmYVoLdp4Y', '1lr9fM7yVS', 'kNzzRNrcrm', '6uFClL9u62', 'gaTYFoh9zk', 'GoC88dpzsj', 'OLp31oEzCo', 'hdM3S6VYVH', 'jmBW6pvJbd', 'UpU6XuGjCP', ...] -E [right]: [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, ReUMMYOzRc, m8JaaoOinz, sObNzAa9Sb, HRKG2ackB4, 0azcKU0pMp, 2mIvVr5itT, rGUpLDHyCl, 82UUP3PsXe, 05Wgq2rheA, E5jcBVizmI, s2BXU0YHCQ, frVcLBb65o, wAMPDhKWPK, ASORqAK5Jk, YeJ7LtATFI, IzEShI7kI1, OABJwASxEY, Jr8okkyRpq, Rxj4uDCiyu, gmf0SkUai3, HpgiaSbjR2, xtZWYDfYEO, VUzBhy7urU, uLUGvTFnqx, FZx3FplLeC, U9hpEhF5ss, TpJdnJ7Nzd, 1phREHhRsM, NssXhnFpKV, hEqW5irrLk, Twuj74zdTH, 7ltutV1O47, 2Ipsj424K8, G3dbMMYkgz, rxAsSNQ8qn, RqrQj2Ozfo, A07eRJDOxX, x0Y1kwZMv2, k6jyXkGdWb, Yl5NkpyVKl, C03IGiW6zr, mVbiXcVTtM, VOmlVUmdh0, 481dbvfltM, lwNNDPVPSI, rha2YAX39o, zJzMvKWl7A, cWoM5F49zZ, zMqfkqXpdR, 7pvDnY8NR5, J8XExS3CMA, 5WNzD0xErD, cy9DDrahxY, IEgi5viuyF, hs8VgWm3C6, KP458GphgF, TO8oGkA5NL, dDuScWreSw, kyIHeh9Vwl, BZuhU8x2S9, f7p4PNZ8Vr, U1EiHy397b, OIvNL6oxOH, CuaVgCNtzX, vsFWaKUFbS, byfE947n2S, LctPcyhLuE, t8JOJggu2o, nGJhNJvSSp, QmYVoLdp4Y, 1lr9fM7yVS, kNzzRNrcrm, 6uFClL9u62, gaTYFoh9zk, GoC88dpzsj, OLp31oEzCo, hdM3S6VYVH, jmBW6pvJbd, UpU6XuGjCP, ...] - -pandas/_libs/testing.pyx:183: AssertionError -_______________ TestDataFrameFormatting.test_to_string_no_index ________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_string_no_index(self): - # GH 16839, GH 13032 - df = DataFrame({"x": [11, 22], "y": [33, -44], "z": ["AAA", " "]}) - - df_s = df.to_string(index=False) - # Leading space is expected for positive numbers. - expected = " x y z\n 11 33 AAA\n 22 -44 " -> assert df_s == expected -E assert " x y ... 22 -44 ' '" == ' x y z...n 22 -44 ' -E - x y z -E + x y z -E ? + -E - 11 33 AAA -E ? ^ -E + 11 33 'AAA' -E ? ^ +... -E -E ...Full output truncated (3 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:1522: AssertionError -_______________ TestDataFrameFormatting.test_to_string_format_na _______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_string_format_na(self): - tm.reset_display_options() - df = DataFrame( - { - "A": [np.nan, -1, -2.1234, 3, 4], - "B": [np.nan, "foo", "foooo", "fooooo", "bar"], - } - ) - result = df.to_string() - - expected = ( - " A B\n" - "0 NaN NaN\n" - "1 -1.0000 foo\n" - "2 -2.1234 foooo\n" - "3 3.0000 fooooo\n" - "4 4.0000 bar" - ) -> assert result == expected -E assert " A ...0000 'bar'" == ' A ....0000 bar' -E - A B -E + A B -E ? + -E - 0 NaN NaN -E + 0 NaN NaN -E ? + -E - 1 -1.0000 foo... -E -E ...Full output truncated (16 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:1731: AssertionError -_____________________ TestDataFrameFormatting.test_period ______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_period(self): - # GH 12615 - df = pd.DataFrame( - { - "A": pd.period_range("2013-01", periods=4, freq="M"), - "B": [ - pd.Period("2011-01", freq="M"), - pd.Period("2011-02-01", freq="D"), - pd.Period("2011-03-01 09:00", freq="H"), - pd.Period("2011-04", freq="M"), - ], - "C": list("abcd"), - } - ) - exp = ( - " A B C\n" - "0 2013-01 2011-01 a\n" - "1 2013-02 2011-02-01 b\n" - "2 2013-03 2011-03-01 09:00 c\n" - "3 2013-04 2011-04 d" - ) -> assert str(df) == exp -E assert " A ... 2011-04 'd'" == ' A ... 2011-04 d' -E - A B C -E + A B C -E ? + -E - 0 2013-01 2011-01 a -E ? ^ -E + 0 2013-01 2011-01 'a' -E ? ^ +... -E -E ...Full output truncated (13 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:2138: AssertionError -__________________ TestSeriesFormatting.test_to_string_mixed ___________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_string_mixed(self): - s = Series(["foo", np.nan, -1.23, 4.56]) - result = s.to_string() - expected = "0 foo\n" + "1 NaN\n" + "2 -1.23\n" + "3 4.56" - assert result == expected - - # but don't count NAs as floats - s = Series(["foo", np.nan, "bar", "baz"]) - result = s.to_string() - expected = "0 foo\n" + "1 NaN\n" + "2 bar\n" + "3 baz" -> assert result == expected -E assert "0 'foo'\n1...r'\n3 'baz'" == '0 foo\n1 ...bar\n3 baz' -E - 0 foo -E ? ^ -E + 0 'foo' -E ? ^ + -E - 1 NaN -E + 1 NaN -E ? +... -E -E ...Full output truncated (9 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:2210: AssertionError -_____________ TestSeriesFormatting.test_east_asian_unicode_series ______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_east_asian_unicode_series(self): - # not aligned properly because of east asian width - - # unicode index - s = Series(["a", "bb", "CCC", "D"], index=["あ", "いい", "ううう", "ええええ"]) - expected = "あ a\nいい bb\nううう CCC\nええええ D\ndtype: object" -> assert repr(s) == expected -E assert "あ 'a'...dtype: object" == 'あ a\...dtype: object' -E - あ a -E ? ^ -E + あ 'a' -E ? ^ + -E - いい bb -E ? ^ -E + いい 'bb'... -E -E ...Full output truncated (11 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:2249: AssertionError -__________________ TestSeriesFormatting.test_format_explicit ___________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_format_explicit(self): - test_sers = gen_series_formatting() - with option_context("display.max_rows", 4, "display.show_dimensions", False): - res = repr(test_sers["onel"]) - exp = "0 a\n1 a\n ..\n98 a\n99 a\ndtype: object" -> assert exp == res -E assert '0 a\n1 ...dtype: object' == "0 'a'\n1 ...dtype: object" -E - 0 'a' -E ? ^ - -E + 0 a -E ? ^ -E - 1 'a' -E ? ^ - -E + 1 a... -E -E ...Full output truncated (14 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_format.py:2666: AssertionError -_____________________ test_to_html_unicode[df1-unicode_2] ______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -df = A -0 'σ' -expected = '\n \n \n \n \n \n \n \n \n \n \n \n \n
A
0σ
' -datapath = .deco at 0x7f3c3989a1f0> - - @pytest.mark.parametrize( - "df,expected", - [ - (DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"), - (DataFrame({"A": ["\u03c3"]}), "unicode_2"), - ], - ) - def test_to_html_unicode(df, expected, datapath): - expected = expected_html(datapath, expected) - result = df.to_html() -> assert result == expected -E AssertionError: assert '\n
' == '\n
' -E Skipping 173 identical leading characters in diff, use -v to show -E - σ -E + 'σ' -E ? + + -E -E -E - -pandas/tests/io/formats/test_to_html.py:99: AssertionError -______________ test_to_html_escaped[kwargs0--escaped] ______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -kwargs = {}, string = "" -expected = '\n \n \n \n \n \n \n \n \n
co&l...p;amp;<type \'str\'><type \'str\'>
' -datapath = .deco at 0x7f3c33ab65e0> - - @pytest.mark.parametrize( - "kwargs,string,expected", - [ - (dict(), "", "escaped"), - (dict(escape=False), "bold", "escape_disabled"), - ], - ) - def test_to_html_escaped(kwargs, string, expected, datapath): - a = "strl2": {a: string, b: string}} - result = DataFrame(test_dict).to_html(**kwargs) - expected = expected_html(datapath, expected) -> assert result == expected -E AssertionError: assert '\n
' == '\n
' -E Skipping 224 identical leading characters in diff, use -v to show -E - <type 'str'> -E + '<type 'str'>' -E ? + + -E - <type 'str'> -E + '<type 'str'>' -E ? + +... -E -E ...Full output truncated (13 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:132: AssertionError -__________ test_to_html_escaped[kwargs1-bold-escape_disabled] ___________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -kwargs = {'escape': False}, string = 'bold' -expected = '\n \n \n \n \n \n \n \n \n
costri>ng2 &boldbold
' -datapath = .deco at 0x7f3c33ab6160> - - @pytest.mark.parametrize( - "kwargs,string,expected", - [ - (dict(), "", "escaped"), - (dict(escape=False), "bold", "escape_disabled"), - ], - ) - def test_to_html_escaped(kwargs, string, expected, datapath): - a = "strl2": {a: string, b: string}} - result = DataFrame(test_dict).to_html(**kwargs) - expected = expected_html(datapath, expected) -> assert result == expected -E AssertionError: assert '\n
' == '\n
' -E Skipping 211 identical leading characters in diff, use -v to show -E - bold -E + 'bold' -E ? + + -E - bold -E + 'bold' -E ? + +... -E -E ...Full output truncated (13 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:132: AssertionError -_____________ test_to_html_multiindex[columns0-left-multiindex_1] ______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -columns = MultiIndex([(0, 0), - (0, 1), - (1, 0), - (1, 1)], - names=['CL0', 'CL1']) -justify = 'left' -expected = '\n \n \n \n \n \n \n \n \n \n \n
CL00<... 1efgh
' -datapath = .deco at 0x7f3c33ab6820> - - @pytest.mark.parametrize( - "columns,justify,expected", - [ - ( - MultiIndex.from_tuples( - list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), - names=["CL0", "CL1"], - ), - "left", - "multiindex_1", - ), - ( - MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), - "right", - "multiindex_2", - ), - ], - ) - def test_to_html_multiindex(columns, justify, expected, datapath): - df = DataFrame([list("abcd"), list("efgh")], columns=columns) - result = df.to_html(justify=justify) - expected = expected_html(datapath, expected) -> assert result == expected -E AssertionError: assert '\n
' == '\n
' -E Skipping 324 identical leading characters in diff, use -v to show -E - a -E + 'a' -E ? + + -E - b -E + 'b' -E ? + +... -E -E ...Full output truncated (25 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:376: AssertionError -_____________ test_to_html_multiindex[columns1-right-multiindex_2] _____________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -columns = MultiIndex([(0, 0), - (1, 1), - (2, 0), - (3, 1)], - ) -justify = 'right' -expected = '\n \n \n \n \n \n ... \n \n \n \n \n \n \n
011efgh
' -datapath = .deco at 0x7f3c33ab6790> - - @pytest.mark.parametrize( - "columns,justify,expected", - [ - ( - MultiIndex.from_tuples( - list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))), - names=["CL0", "CL1"], - ), - "left", - "multiindex_1", - ), - ( - MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))), - "right", - "multiindex_2", - ), - ], - ) - def test_to_html_multiindex(columns, justify, expected, datapath): - df = DataFrame([list("abcd"), list("efgh")], columns=columns) - result = df.to_html(justify=justify) - expected = expected_html(datapath, expected) -> assert result == expected -E AssertionError: assert '\n
' == '\n
' -E Skipping 300 identical leading characters in diff, use -v to show -E - a -E + 'a' -E ? + + -E - b -E + 'b' -E ? + +... -E -E ...Full output truncated (25 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:376: AssertionError -______________________________ test_to_html_index ______________________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -datapath = .deco at 0x7f3c33ab6a60> - - def test_to_html_index(datapath): - # TODO: split this test - index = ["foo", "bar", "baz"] - df = DataFrame( - {"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]}, - columns=["A", "B", "C"], - index=index, - ) - expected_with_index = expected_html(datapath, "index_1") -> assert df.to_html() == expected_with_index -E AssertionError: assert '\n
' == '\n
' -E Skipping 245 identical leading characters in diff, use -v to show -E - one -E + 'one' -E ? + + -E -E -E bar... -E -E ...Full output truncated (15 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:410: AssertionError -______________ test_to_html_render_links[True-render_links_true] _______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -render_links = True -expected = '\n \n \n \n \n \n \n \n \n \n
foo<...th>10www.pydata.orgpydata.org
' -datapath = .deco at 0x7f3c33a40ca0> - - @pytest.mark.parametrize( - "render_links,expected", - [(True, "render_links_true"), (False, "render_links_false")], - ) - def test_to_html_render_links(render_links, expected, datapath): - # GH 2679 - data = [ - [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"], - [0, "www.pydata.org", "pydata.org"], - ] - df = DataFrame(data, columns=["foo", "bar", None]) - - result = df.to_html(render_links=render_links) - expected = expected_html(datapath, expected) -> assert result == expected -E assert '\n
' == '\n
' -E Skipping 231 identical leading characters in diff, use -v to show -E - https://pandas.pydata.org/?q1=a&q2=b -E + 'https://pandas.pydata.org/?q1=a&q2=b' -E - pydata.org -E + 'pydata.org' -E ? + + -E ... -E -E ...Full output truncated (13 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:698: AssertionError -_____________ test_to_html_render_links[False-render_links_false] ______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -render_links = False -expected = '\n \n \n \n \n \n \n \n \n \n
foo<...th>10www.pydata.orgpydata.org
' -datapath = .deco at 0x7f3c33a40b80> - - @pytest.mark.parametrize( - "render_links,expected", - [(True, "render_links_true"), (False, "render_links_false")], - ) - def test_to_html_render_links(render_links, expected, datapath): - # GH 2679 - data = [ - [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"], - [0, "www.pydata.org", "pydata.org"], - ] - df = DataFrame(data, columns=["foo", "bar", None]) - - result = df.to_html(render_links=render_links) - expected = expected_html(datapath, expected) -> assert result == expected -E AssertionError: assert '\n
' == '\n
' -E Skipping 231 identical leading characters in diff, use -v to show -E - https://pandas.pydata.org/?q1=a&q2=b -E + 'https://pandas.pydata.org/?q1=a&q2=b' -E ? + + -E - pydata.org -E + 'pydata.org' -E ? + +... -E -E ...Full output truncated (14 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_html.py:698: AssertionError -__________________________ TestToLatex.test_to_latex ___________________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = -float_frame = A B C D -uj3aupNrPB 0.764807 -0.195406 0.356168 -0.179633 -RKPro2v73m -2.46....453160 -0.238971 -1etvcSah90 0.276267 1.277534 0.532934 -0.552912 -R2nHvbi4bL -0.744058 -0.476803 -0.115628 -0.261460 - - def test_to_latex(self, float_frame): - # it works! - float_frame.to_latex() - - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - withindex_result = df.to_latex() - withindex_expected = r"""\begin{tabular}{lrl} - \toprule - {} & a & b \\ - \midrule - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \bottomrule - \end{tabular} - """ - -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{lrl} -E \toprule -E - {} & a & b \\ -E + {} & a & b \\ -E ? + -E \midrule -E - 0 & 1 & b1 \\... -E -E ...Full output truncated (10 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:48: AssertionError -_______________________ TestToLatex.test_to_latex_format _______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = -float_frame = A B C D -Wtc0ZRSd9n 0.818695 -0.157144 -0.145023 0.203286 -bkVL7MON9p 0.09....145206 -0.713457 -CAq6YHmEYL 0.443476 1.554402 1.031790 0.715314 -gRKxa8k8At -1.380405 0.488245 1.099418 -1.594554 - - def test_to_latex_format(self, float_frame): - # GH Bug #9402 - float_frame.to_latex(column_format="ccc") - - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - withindex_result = df.to_latex(column_format="ccc") - withindex_expected = r"""\begin{tabular}{ccc} - \toprule - {} & a & b \\ - \midrule - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \bottomrule - \end{tabular} - """ - -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{ccc} -E \toprule -E - {} & a & b \\ -E + {} & a & b \\ -E ? + -E \midrule -E - 0 & 1 & b1 \\... -E -E ...Full output truncated (10 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:79: AssertionError -_____________________ TestToLatex.test_to_latex_multiindex _____________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_multiindex(self): - df = DataFrame({("x", "y"): ["a"]}) - result = df.to_latex() - expected = r"""\begin{tabular}{ll} - \toprule - {} & x \\ - {} & y \\ - \midrule - 0 & a \\ - \bottomrule - \end{tabular} - """ - -> assert result == expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{ll} -E \toprule -E - {} & x \\ -E + {} & x \\ -E ? + -E - {} & y \\ -E + {} & y \\... -E -E ...Full output truncated (9 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:152: AssertionError -_______________________ TestToLatex.test_to_latex_escape _______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_escape(self): - a = "a" - b = "b" - - test_dict = {"co$e^x$": {a: "a", b: "b"}, "co^l1": {a: "a", b: "b"}} - - unescaped_result = DataFrame(test_dict).to_latex(escape=False) - escaped_result = DataFrame(test_dict).to_latex() # default: escape=True - - unescaped_expected = r"""\begin{tabular}{lll} - \toprule - {} & co$e^x$ & co^l1 \\ - \midrule - a & a & a \\ - b & b & b \\ - \bottomrule - \end{tabular} - """ - - escaped_expected = r"""\begin{tabular}{lll} - \toprule - {} & co\$e\textasciicircum x\$ & co\textasciicircum l1 \\ - \midrule - a & a & a \\ - b & b & b \\ - \bottomrule - \end{tabular} - """ - -> assert unescaped_result == unescaped_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E Skipping 61 identical leading characters in diff, use -v to show -E e -E - a & a & a \\ -E ? ^^ ^^ -E + a & 'a' & 'a' \\ -E ? ^ + ^ + -E - b & b & b \\... -E -E ...Full output truncated (6 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:372: AssertionError -___________________ TestToLatex.test_to_latex_special_escape ___________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_special_escape(self): - df = DataFrame([r"a\b\c", r"^a^b^c", r"~a~b~c"]) - - escaped_result = df.to_latex() - escaped_expected = r"""\begin{tabular}{ll} - \toprule - {} & 0 \\ - \midrule - 0 & a\textbackslash b\textbackslash c \\ - 1 & \textasciicircum a\textasciicircum b\textasciicircum c \\ - 2 & \textasciitilde a\textasciitilde b\textasciitilde c \\ - \bottomrule - \end{tabular} - """ -> assert escaped_result == escaped_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{ll} -E \toprule -E - {} & 0 \\ -E + {} & 0 \\ -E ? + -E \midrule -E - 0 & a\textbackslash b\textbackslash c \\... -E -E ...Full output truncated (14 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:389: AssertionError -_____________________ TestToLatex.test_to_latex_longtable ______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_longtable(self): - - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - withindex_result = df.to_latex(longtable=True) - withindex_expected = r"""\begin{longtable}{lrl} - \toprule - {} & a & b \\ - \midrule - \endhead - \midrule - \multicolumn{3}{r}{{Continued on next page}} \\ - \midrule - \endfoot - - \bottomrule - \endlastfoot - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \end{longtable} - """ -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{long...{longtable}\n' == '\\begin{long...{longtable}\n' -E Skipping 34 identical leading characters in diff, use -v to show -E - & a & b \\ -E + & a & b \\ -E ? + -E \midrule -E \endhead -E \midrule... -E -E ...Full output truncated (16 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:411: AssertionError -___________________ TestToLatex.test_to_latex_caption_label ____________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_caption_label(self): - # GH 25436 - the_caption = "a table in a \\texttt{table/tabular} environment" - the_label = "tab:table_tabular" - - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - - # test when only the caption is provided - result_c = df.to_latex(caption=the_caption) - - expected_c = r"""\begin{table} - \centering - \caption{a table in a \texttt{table/tabular} environment} - \begin{tabular}{lrl} - \toprule - {} & a & b \\ - \midrule - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \bottomrule - \end{tabular} - \end{table} - """ -> assert result_c == expected_c -E AssertionError: assert '\\begin{tabl...\end{table}\n' == '\\begin{tabl...\end{table}\n' -E Skipping 115 identical leading characters in diff, use -v to show -E - & a & b \\ -E + & a & b \\ -E ? + -E \midrule -E - 0 & 1 & b1 \\ -E ? ^... -E -E ...Full output truncated (10 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:464: AssertionError -______________ TestToLatex.test_to_latex_longtable_caption_label _______________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_longtable_caption_label(self): - # GH 25436 - the_caption = "a table in a \\texttt{longtable} environment" - the_label = "tab:longtable" - - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - - # test when only the caption is provided - result_c = df.to_latex(longtable=True, caption=the_caption) - - expected_c = r"""\begin{longtable}{lrl} - \caption{a table in a \texttt{longtable} environment}\\ - \toprule - {} & a & b \\ - \midrule - \endhead - \midrule - \multicolumn{3}{r}{{Continued on next page}} \\ - \midrule - \endfoot - - \bottomrule - \endlastfoot - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \end{longtable} - """ -> assert result_c == expected_c -E AssertionError: assert '\\begin{long...{longtable}\n' == '\\begin{long...{longtable}\n' -E Skipping 90 identical leading characters in diff, use -v to show -E - & a & b \\ -E + & a & b \\ -E ? + -E \midrule -E \endhead -E \midrule... -E -E ...Full output truncated (16 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:530: AssertionError -________________ TestToLatex.test_to_latex_escape_special_chars ________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_escape_special_chars(self): - special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"] - df = DataFrame(data=special_characters) - observed = df.to_latex() - expected = r"""\begin{tabular}{ll} - \toprule - {} & 0 \\ - \midrule - 0 & \& \\ - 1 & \% \\ - 2 & \$ \\ - 3 & \# \\ - 4 & \_ \\ - 5 & \{ \\ - 6 & \} \\ - 7 & \textasciitilde \\ - 8 & \textasciicircum \\ - 9 & \textbackslash \\ - \bottomrule - \end{tabular} - """ - -> assert observed == expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{ll} -E \toprule -E - {} & 0 \\ -E + {} & 0 \\ -E ? + -E \midrule -E - 0 & \& \\... -E -E ...Full output truncated (42 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:598: AssertionError -_____________________ TestToLatex.test_to_latex_no_header ______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_no_header(self): - # GH 7124 - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - withindex_result = df.to_latex(header=False) - withindex_expected = r"""\begin{tabular}{lrl} - \toprule - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \bottomrule - \end{tabular} - """ - -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{lrl} -E \toprule -E - 0 & 1 & b1 \\ -E ? ^ -E + 0 & 1 & 'b1' \\ -E ? ^ + -E - 1 & 2 & b2 \\... -E -E ...Full output truncated (6 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:612: AssertionError -__________________ TestToLatex.test_to_latex_specified_header __________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_specified_header(self): - # GH 7124 - df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - withindex_result = df.to_latex(header=["AA", "BB"]) - withindex_expected = r"""\begin{tabular}{lrl} - \toprule - {} & AA & BB \\ - \midrule - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \bottomrule - \end{tabular} - """ - -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{lrl} -E \toprule -E - {} & AA & BB \\ -E + {} & AA & BB \\ -E ? + -E \midrule -E - 0 & 1 & b1 \\... -E -E ...Full output truncated (10 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:639: AssertionError -______________________ TestToLatex.test_to_latex_decimal _______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = -float_frame = A B C D -2gSI2RndbR 0.423719 -1.539780 -0.691211 -0.112028 -BnGEeeJ54Q -1.45....071262 -0.213009 -vgARWWzFsz 0.958321 0.239552 -0.934697 -0.215599 -9vVX63NXHQ 0.843613 2.314023 -0.550290 -0.708401 - - def test_to_latex_decimal(self, float_frame): - # GH 12031 - float_frame.to_latex() - - df = DataFrame({"a": [1.0, 2.1], "b": ["b1", "b2"]}) - withindex_result = df.to_latex(decimal=",") - - withindex_expected = r"""\begin{tabular}{lrl} - \toprule - {} & a & b \\ - \midrule - 0 & 1,0 & b1 \\ - 1 & 2,1 & b2 \\ - \bottomrule - \end{tabular} - """ - -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E Skipping 34 identical leading characters in diff, use -v to show -E - a & b \\ -E + a & b \\ -E ? + -E \midrule -E - 0 & 1,0 & b1 \\ -E ? ^... -E -E ...Full output truncated (9 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:688: AssertionError -_______________________ TestToLatex.test_to_latex_series _______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_series(self): - s = Series(["a", "b", "c"]) - withindex_result = s.to_latex() - withindex_expected = r"""\begin{tabular}{ll} - \toprule - {} & 0 \\ - \midrule - 0 & a \\ - 1 & b \\ - 2 & c \\ - \bottomrule - \end{tabular} - """ -> assert withindex_result == withindex_expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{ll} -E \toprule -E - {} & 0 \\ -E + {} & 0 \\ -E ? + -E \midrule -E - 0 & a \\... -E -E ...Full output truncated (14 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:703: AssertionError -_____________________ TestToLatex.test_to_latex_bold_rows ______________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_bold_rows(self): - # GH 16707 - df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - observed = df.to_latex(bold_rows=True) - expected = r"""\begin{tabular}{lrl} - \toprule - {} & a & b \\ - \midrule - \textbf{0} & 1 & b1 \\ - \textbf{1} & 2 & b2 \\ - \bottomrule - \end{tabular} - """ -> assert observed == expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{lrl} -E \toprule -E - {} & a & b \\ -E + {} & a & b \\ -E ? + -E \midrule -E - \textbf{0} & 1 & b1 \\... -E -E ...Full output truncated (10 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:718: AssertionError -____________________ TestToLatex.test_to_latex_no_bold_rows ____________________ -[gw1] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_to_latex_no_bold_rows(self): - # GH 16707 - df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]}) - observed = df.to_latex(bold_rows=False) - expected = r"""\begin{tabular}{lrl} - \toprule - {} & a & b \\ - \midrule - 0 & 1 & b1 \\ - 1 & 2 & b2 \\ - \bottomrule - \end{tabular} - """ -> assert observed == expected -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{lrl} -E \toprule -E - {} & a & b \\ -E + {} & a & b \\ -E ? + -E \midrule -E - 0 & 1 & b1 \\... -E -E ...Full output truncated (10 lines hidden), use '-vv' to show - -pandas/tests/io/formats/test_to_latex.py:733: AssertionError -___________________________ TestSeriesRepr.test_repr ___________________________ -[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = -datetime_series = 2000-01-03 0.254242 -2000-01-04 -0.951122 -2000-01-05 -0.567236 -2000-01-06 0.653409 -2000-01-07 1.750258 -200...2-08 1.424223 -2000-02-09 0.800410 -2000-02-10 0.542263 -2000-02-11 0.676446 -Freq: B, Name: ts, dtype: float64 -string_series = viK3MdxVn7 0.742605 -scvE0uHTYw -1.052345 -Bv933O5t9b 0.384895 -AH9RcVYr6i 0.779294 -zpAzWStAW1 0.822783 -Eh0...wcFrd58 -1.094470 -pQfdyl6mPL 0.548832 -MXmCRYkSNF -0.467209 -daZyob0osC -0.113301 -Name: (α, bar), dtype: float64 -object_series = mYai0pomyX 'jMRkmUlN9P' -dQlypkXvbe 'TA150g5Yr3' -bcRrHo5Yoe 'FynxFVZt5v' -VU4HUw7Cyj 'zp7XBvYrE8' -yK9fmCi27h '...'585znXWXuS' -mL7428VEve 'jUPPIg5T1J' -r2uMk60SBz 'Ks2lEcpjSW' -lGX1GOuEGg 'ji0H1UTC0r' -Name: objects, dtype: object - - def test_repr(self, datetime_series, string_series, object_series): - str(datetime_series) - str(string_series) - str(string_series.astype(int)) - str(object_series) - - str(Series(tm.randn(1000), index=np.arange(1000))) - str(Series(tm.randn(1000), index=np.arange(1000, 0, step=-1))) - - # empty - str(Series(dtype=object)) - - # with NaNs - string_series[5:7] = np.NaN - str(string_series) - - # with Nones - ots = datetime_series.astype("O") - ots[::2] = None - repr(ots) - - # various names - for name in [ - "", - 1, - 1.2, - "foo", - "\u03B1\u03B2\u03B3", - "loooooooooooooooooooooooooooooooooooooooooooooooooooong", - ("foo", "bar", "baz"), - (1, 2), - ("foo", 1, 2.3), - ("\u03B1", "\u03B2", "\u03B3"), - ("\u03B1", "bar"), - ]: - string_series.name = name - repr(string_series) - - biggie = Series( - tm.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz") - ) - repr(biggie) - - # 0 as name - ser = Series(np.random.randn(100), name=0) - rep_str = repr(ser) - assert "Name: 0" in rep_str - - # tidy repr - ser = Series(np.random.randn(1001), name=0) - rep_str = repr(ser) - assert "Name: 0" in rep_str - - ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"]) -> assert "\t" not in repr(ser) -E assert '\t' not in "a\\n\\r\\tf...type: object" -E '\t' is contained here: -E a\n\r\tf 'a -E -E b' -E ? + -E Name: a\n\r\td, dtype: object - -pandas/tests/series/test_repr.py:122: AssertionError -________________________ TestSeriesRepr.test_latex_repr ________________________ -[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_latex_repr(self): - result = r"""\begin{tabular}{ll} - \toprule - {} & 0 \\ - \midrule - 0 & $\alpha$ \\ - 1 & b \\ - 2 & c \\ - \bottomrule - \end{tabular} - """ - with option_context("display.latex.escape", False, "display.latex.repr", True): - s = Series([r"$\alpha$", "b", "c"]) -> assert result == s._repr_latex_() -E AssertionError: assert '\\begin{tabu...nd{tabular}\n' == '\\begin{tabu...nd{tabular}\n' -E \begin{tabular}{ll} -E \toprule -E - {} & 0 \\ -E ? - -E + {} & 0 \\ -E \midrule -E - 0 & '$\alpha$' \\... -E -E ...Full output truncated (14 lines hidden), use '-vv' to show - -pandas/tests/series/test_repr.py:209: AssertionError -__________________ TestCategoricalRepr.test_categorical_repr ___________________ -[gw0] linux -- Python 3.8.2 /home/marco/.conda/envs/pandas-dev/bin/python - -self = - - def test_categorical_repr(self): - a = Series(Categorical([1, 2, 3, 4])) - exp = ( - "0 1\n1 2\n2 3\n3 4\n" - + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]" - ) - - assert exp == a.__str__() - - a = Series(Categorical(["a", "b"] * 25)) - exp = ( - "0 a\n1 b\n" - + " ..\n" - + "48 a\n49 b\n" - + "Length: 50, dtype: category\nCategories (2, object): [a, b]" - ) - with option_context("display.max_rows", 5): -> assert exp == repr(a) -E assert '0 a\n1 ...ject): [a, b]' == "0 'a'\n1 ...): ['a', 'b']" -E - 0 'a' -E ? ^ - -E + 0 a -E ? ^ -E - 1 'b' -E ? ^ - -E + 1 b... -E -E ...Full output truncated (17 lines hidden), use '-vv' to show - -pandas/tests/series/test_repr.py:276: AssertionError From b978bf97cdb00a698efe06fc9572002ddafafd23 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 09:53:12 +0100 Subject: [PATCH 06/15] merge error --- pandas/_libs/tslibs/offsets.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 9ed1b38c77ce2..df43ebcfd9df2 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1218,9 +1218,6 @@ cdef class BusinessMixin(SingleConstructorOffset): """ Mixin to business types to provide related functions. """ - def __init__(self, n=1, normalize=False, offset=timedelta(0)): - BaseOffset.__init__(self, n, normalize) - object.__setattr__(self, "_offset", offset) cdef readonly: timedelta _offset From 32159369d808bf5933a234ebe34c891d100ca496 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 10:25:36 +0100 Subject: [PATCH 07/15] quotestring --- pandas/core/arrays/categorical.py | 13 +++------- pandas/io/formats/format.py | 8 +++++-- pandas/tests/arrays/categorical/test_repr.py | 25 +++++++++++--------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9d18a411ea0b9..80fe1ac7ce619 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,4 +1,3 @@ -import csv import operator from shutil import get_terminal_size from typing import Dict, Hashable, List, Type, Union, cast @@ -1875,17 +1874,11 @@ def _repr_categories(self): if len(self.categories) > max_categories: num = max_categories // 2 - head = fmt.format_array( - self.categories[:num], None, quoting=csv.QUOTE_NONNUMERIC - ) - tail = fmt.format_array( - self.categories[-num:], None, quoting=csv.QUOTE_NONNUMERIC - ) + head = fmt.format_array(self.categories[:num], None) + tail = fmt.format_array(self.categories[-num:], None) category_strs = head + ["..."] + tail else: - category_strs = fmt.format_array( - self.categories, None, quoting=csv.QUOTE_NONNUMERIC - ) + category_strs = fmt.format_array(self.categories, None) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 89de749ee67de..6b0d76b2f6e45 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1196,7 +1196,11 @@ def _format_strings(self) -> List[str]: formatter = ( self.formatter if self.formatter is not None - else (lambda x: pprint_thing(x, escape_chars=("\t", "\r", "\n"))) + else ( + lambda x: pprint_thing( + x, escape_chars=("\t", "\r", "\n"), quote_strings=True + ) + ) ) def _format(x): @@ -1215,7 +1219,7 @@ def _format(x): pass return self.na_rep elif isinstance(x, PandasObject): - return f"'{str(x)}'" + return str(x) else: # object dtype return str(formatter(x)) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 33918b6d87520..3164b54653efd 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -14,7 +14,10 @@ class TestCategoricalReprWithFactor(TestCategorical): def test_print(self): - expected = ["[a, b, b, a, a, c, c, c]", "Categories (3, object): [a < b < c]"] + expected = [ + "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", + "Categories (3, object): ['a' < 'b' < 'c']", + ] expected = "\n".join(expected) actual = repr(self.factor) assert actual == expected @@ -24,9 +27,9 @@ class TestCategoricalRepr: def test_big_print(self): factor = Categorical([0, 1, 2, 0, 1, 2] * 100, ["a", "b", "c"], fastpath=True) expected = [ - "[a, b, c, a, b, ..., b, c, a, b, c]", + "['a', 'b', 'c', 'a', 'b', ..., 'b', 'c', 'a', 'b', 'c']", "Length: 600", - "Categories (3, object): [a, b, c]", + "Categories (3, object): ['a', 'b', 'c']", ] expected = "\n".join(expected) @@ -36,13 +39,13 @@ def test_big_print(self): def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) - expected = "[], Categories (3, object): [a, b, c]" + expected = "[], Categories (3, object): ['a', 'b', 'c']" actual = repr(factor) assert actual == expected assert expected == actual factor = Categorical([], ["a", "b", "c"], ordered=True) - expected = "[], Categories (3, object): [a < b < c]" + expected = "[], Categories (3, object): ['a' < 'b' < 'c']" actual = repr(factor) assert expected == actual @@ -64,17 +67,17 @@ def test_print_none_width(self): def test_unicode_print(self): c = Categorical(["aaaaa", "bb", "cccc"] * 20) expected = """\ -[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] +['aaaaa', 'bb', 'cccc', 'aaaaa', 'bb', ..., 'bb', 'cccc', 'aaaaa', 'bb', 'cccc'] Length: 60 -Categories (3, object): [aaaaa, bb, cccc]""" +Categories (3, object): ['aaaaa', 'bb', 'cccc']""" assert repr(c) == expected c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) expected = """\ -[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] +['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa assert repr(c) == expected @@ -83,9 +86,9 @@ def test_unicode_print(self): with option_context("display.unicode.east_asian_width", True): c = Categorical(["ああああ", "いいいいい", "ううううううう"] * 20) - expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] + expected = """['ああああ', 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', ..., 'いいいいい', 'ううううううう', 'ああああ', 'いいいいい', 'ううううううう'] Length: 60 -Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa +Categories (3, object): ['ああああ', 'いいいいい', 'ううううううう']""" # noqa assert repr(c) == expected From e6ce96f1b509a9738e86a77dbd99bee201754e5c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 11:34:34 +0100 Subject: [PATCH 08/15] only quote categorical --- pandas/core/arrays/categorical.py | 15 +++++++++++---- pandas/io/formats/format.py | 7 ++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 80fe1ac7ce619..2178fc31b537b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,3 +1,4 @@ +from csv import QUOTE_NONNUMERIC import operator from shutil import get_terminal_size from typing import Dict, Hashable, List, Type, Union, cast @@ -1874,11 +1875,17 @@ def _repr_categories(self): if len(self.categories) > max_categories: num = max_categories // 2 - head = fmt.format_array(self.categories[:num], None) - tail = fmt.format_array(self.categories[-num:], None) + head = fmt.format_array( + self.categories[:num], None, quoting=QUOTE_NONNUMERIC + ) + tail = fmt.format_array( + self.categories[-num:], None, quoting=QUOTE_NONNUMERIC + ) category_strs = head + ["..."] + tail else: - category_strs = fmt.format_array(self.categories, None) + category_strs = fmt.format_array( + self.categories, None, quoting=QUOTE_NONNUMERIC + ) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] @@ -1921,7 +1928,7 @@ def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter( - self, length=length, na_rep=na_rep, footer=footer + self, length=length, na_rep=na_rep, footer=footer, quoting=QUOTE_NONNUMERIC ) result = formatter.to_string() return str(result) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6b0d76b2f6e45..0e4a9af93e6d4 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -166,12 +166,14 @@ def __init__( length: bool = True, na_rep: str = "NaN", footer: bool = True, + quoting: Optional[int] = None, ): self.categorical = categorical self.buf = buf if buf is not None else StringIO("") self.na_rep = na_rep self.length = length self.footer = footer + self.quoting = quoting def _get_footer(self) -> str: footer = "" @@ -196,6 +198,7 @@ def _get_formatted_values(self) -> List[str]: None, float_format=None, na_rep=self.na_rep, + quoting=self.quoting, ) def to_string(self) -> str: @@ -1086,6 +1089,7 @@ def format_array( justify: str = "right", decimal: str = ".", leading_space: Optional[bool] = None, + quoting: Optional[int] = None, ) -> List[str]: """ Format an array for printing. @@ -1148,6 +1152,7 @@ def format_array( justify=justify, decimal=decimal, leading_space=leading_space, + quoting=quoting, ) return fmt_obj.get_result() @@ -1198,7 +1203,7 @@ def _format_strings(self) -> List[str]: if self.formatter is not None else ( lambda x: pprint_thing( - x, escape_chars=("\t", "\r", "\n"), quote_strings=True + x, escape_chars=("\t", "\r", "\n"), quote_strings=self.quoting ) ) ) From 197038b25dff6528321848a63bf40a1b315e3d7c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 11:39:50 +0100 Subject: [PATCH 09/15] fix some failing tests due to changes --- pandas/tests/series/test_repr.py | 4 ++-- pandas/tests/util/test_assert_series_equal.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 77f942a9e32ec..b861b37b49f89 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -270,7 +270,7 @@ def test_categorical_repr(self): "0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" - + "Length: 50, dtype: category\nCategories (2, object): [a, b]" + + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']" ) with option_context("display.max_rows", 5): assert exp == repr(a) @@ -279,7 +279,7 @@ def test_categorical_repr(self): a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) exp = ( "0 a\n1 b\n" + "dtype: category\n" - "Categories (26, object): [a < b < c < d ... w < x < y < z]" + "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']" ) assert exp == a.__str__() diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 337a06b91e443..7846720c6db25 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -185,10 +185,10 @@ def test_series_equal_categorical_values_mismatch(check_less_precise): Series values are different \\(66\\.66667 %\\) \\[index\\]: \\[0, 1, 2\\] -\\[left\\]: \\[a, b, c\\] -Categories \\(3, object\\): \\[a, b, c\\] -\\[right\\]: \\[a, c, b\\] -Categories \\(3, object\\): \\[a, b, c\\]""" +\\[left\\]: \\['a', 'b', 'c'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\] +\\[right\\]: \\['a', 'c', 'b'\\] +Categories \\(3, object\\): \\['a', 'b', 'c'\\]""" s1 = Series(Categorical(["a", "b", "c"])) s2 = Series(Categorical(["a", "c", "b"])) From f594fa1563cd64797f33d6b1f32e2462dcd451bf Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 11:44:03 +0100 Subject: [PATCH 10/15] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/arrays/categorical/test_repr.py | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index f216418c3a8b0..b2d49bb0970a0 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -991,6 +991,7 @@ I/O - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`) - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) +- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`) Plotting ^^^^^^^^ diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 3164b54653efd..735b062eae80e 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -528,6 +528,7 @@ def test_categorical_index_repr_timedelta_ordered(self): assert repr(i) == exp def test_categorical_str_repr(self): + # GH 33676 result = repr(Categorical([1, "2", 3, 4])) expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" assert result == expected From 4912ec3c0ce932e152a8bb58946d990015ef4ac0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Thu, 18 Jun 2020 11:57:32 +0100 Subject: [PATCH 11/15] set quote strings to boolean --- pandas/io/formats/format.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0e4a9af93e6d4..e2900cedb791a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -4,6 +4,7 @@ """ from contextlib import contextmanager +from csv import QUOTE_NONE from datetime import tzinfo import decimal from functools import partial @@ -1198,12 +1199,13 @@ def _format_strings(self) -> List[str]: else: float_format = self.float_format + quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE formatter = ( self.formatter if self.formatter is not None else ( lambda x: pprint_thing( - x, escape_chars=("\t", "\r", "\n"), quote_strings=self.quoting + x, escape_chars=("\t", "\r", "\n"), quote_strings=quote_strings ) ) ) From d57ae96eb998665a69a7b1a91f14fd92ffb3f653 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 22 Jun 2020 07:27:52 +0000 Subject: [PATCH 12/15] fix doctest --- pandas/core/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index e790b1d7f106e..1fa38ca24f589 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -743,8 +743,8 @@ def array(self) -> ExtensionArray: >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) >>> ser.array - [a, b, a] - Categories (2, object): [a, b] + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] """ raise AbstractMethodError(self) From aa62a24453028d607d2d7881829a8c146df9225c Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 23 Jun 2020 07:11:41 +0000 Subject: [PATCH 13/15] move whatsnew section, use partial, always quotenonnumeric in categorical formatter --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/arrays/categorical.py | 17 +++++++---------- pandas/io/formats/format.py | 22 ++++++++++------------ 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9967ff0ba60f9..7bce367a4c8a8 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -825,6 +825,7 @@ Categorical - Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`) - Bug where :class:`Categorical` comparison operator ``__ne__`` would incorrectly evaluate to ``False`` when either element was missing (:issue:`32276`) - :meth:`Categorical.fillna` now accepts :class:`Categorical` ``other`` argument (:issue:`32420`) +- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`) Datetimelike ^^^^^^^^^^^^ @@ -997,7 +998,6 @@ I/O - Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`) - Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`) - :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`) -- Repr of :class:`Categorical` was not distinguishing between int and str (:issue:`33676`) Plotting ^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2178fc31b537b..92a75df5c24a7 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,4 +1,5 @@ from csv import QUOTE_NONNUMERIC +from functools import partial import operator from shutil import get_terminal_size from typing import Dict, Hashable, List, Type, Union, cast @@ -1873,19 +1874,15 @@ def _repr_categories(self): ) from pandas.io.formats import format as fmt + format_array = partial(fmt.format_array, quoting=QUOTE_NONNUMERIC) + if len(self.categories) > max_categories: num = max_categories // 2 - head = fmt.format_array( - self.categories[:num], None, quoting=QUOTE_NONNUMERIC - ) - tail = fmt.format_array( - self.categories[-num:], None, quoting=QUOTE_NONNUMERIC - ) + head = format_array(self.categories[:num], None) + tail = format_array(self.categories[-num:], None) category_strs = head + ["..."] + tail else: - category_strs = fmt.format_array( - self.categories, None, quoting=QUOTE_NONNUMERIC - ) + category_strs = format_array(self.categories, None) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] @@ -1928,7 +1925,7 @@ def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter( - self, length=length, na_rep=na_rep, footer=footer, quoting=QUOTE_NONNUMERIC + self, length=length, na_rep=na_rep, footer=footer ) result = formatter.to_string() return str(result) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 4a6780181332f..3a96a9ba8ad69 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -4,7 +4,7 @@ """ from contextlib import contextmanager -from csv import QUOTE_NONE +from csv import QUOTE_NONE, QUOTE_NONNUMERIC from datetime import tzinfo import decimal from functools import partial @@ -171,14 +171,13 @@ def __init__( length: bool = True, na_rep: str = "NaN", footer: bool = True, - quoting: Optional[int] = None, ): self.categorical = categorical self.buf = buf if buf is not None else StringIO("") self.na_rep = na_rep self.length = length self.footer = footer - self.quoting = quoting + self.quoting = QUOTE_NONNUMERIC def _get_footer(self) -> str: footer = "" @@ -1222,16 +1221,15 @@ def _format_strings(self) -> List[str]: else: float_format = self.float_format - quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE - formatter = ( - self.formatter - if self.formatter is not None - else ( - lambda x: pprint_thing( - x, escape_chars=("\t", "\r", "\n"), quote_strings=quote_strings - ) + if self.formatter is not None: + formatter = self.formatter + else: + quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE + formatter = partial( + pprint_thing, + escape_chars=("\t", "\r", "\n"), + quote_strings=quote_strings, ) - ) def _format(x): if self.na_rep is not None and is_scalar(x) and isna(x): From 9562313ca21652b5b4c4dbc5e68cc9295876bbc0 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 23 Jun 2020 08:34:24 +0100 Subject: [PATCH 14/15] docstrings --- pandas/core/algorithms.py | 4 +-- pandas/core/arrays/base.py | 6 ++--- pandas/core/arrays/categorical.py | 25 ++++++++++--------- pandas/core/base.py | 4 +-- pandas/core/construction.py | 12 ++++----- pandas/core/dtypes/concat.py | 4 +-- pandas/core/series.py | 12 ++++----- web/pandas/community/blog/extension-arrays.md | 6 ++--- 8 files changed, 37 insertions(+), 36 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index dcf2015245518..9e3ca4cc53363 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -604,8 +604,8 @@ def factorize( >>> codes array([0, 0, 1]...) >>> uniques - [a, c] - Categories (3, object): [a, b, c] + ['a', 'c'] + Categories (3, object): ['a', 'b', 'c'] Notice that ``'b'`` is in ``uniques.categories``, despite not being present in ``cat.values``. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 7f2c61ff7d955..373edd0491c98 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -847,13 +847,13 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray" >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat [a, b, c] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> cat.repeat(2) [a, a, b, b, c, c] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> cat.repeat([1, 2, 3]) [a, b, b, c, c, c] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] """ @Substitution(klass="ExtensionArray") diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 92a75df5c24a7..b60a9724b3350 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -278,7 +278,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject): >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) [a, b, c, a, b, c] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] Ordered `Categoricals` can be sorted according to the custom order of the categories and can have a min and max value. @@ -1131,10 +1131,10 @@ def map(self, mapper): >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat [a, b, c] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> cat.map(lambda x: x.upper()) [A, B, C] - Categories (3, object): [A, B, C] + Categories (3, object): ['A', 'B', 'C'] >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) [first, second, third] Categories (3, object): [first, second, third] @@ -1874,15 +1874,16 @@ def _repr_categories(self): ) from pandas.io.formats import format as fmt - format_array = partial(fmt.format_array, quoting=QUOTE_NONNUMERIC) - + format_array = partial( + fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC + ) if len(self.categories) > max_categories: num = max_categories // 2 - head = format_array(self.categories[:num], None) - tail = format_array(self.categories[-num:], None) + head = format_array(self.categories[:num]) + tail = format_array(self.categories[-num:]) category_strs = head + ["..."] + tail else: - category_strs = format_array(self.categories, None) + category_strs = format_array(self.categories) # Strip all leading spaces, which format_array adds for columns... category_strs = [x.strip() for x in category_strs] @@ -2056,7 +2057,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: >>> c = pd.Categorical(list('aabca')) >>> c [a, a, b, c, a] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> c.categories Index(['a', 'b', 'c'], dtype='object') >>> c.codes @@ -2469,7 +2470,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> s.cat.categories Index(['a', 'b', 'c'], dtype='object') @@ -2523,7 +2524,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] >>> s.cat.set_categories(list("abcde")) 0 a @@ -2553,7 +2554,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] """ def __init__(self, data): diff --git a/pandas/core/base.py b/pandas/core/base.py index 1fa38ca24f589..813de491ffdb3 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1481,8 +1481,8 @@ def factorize(self, sort=False, na_sentinel=-1): ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True ... ) >>> ser - [apple, bread, bread, cheese, milk] - Categories (4, object): [apple < bread < cheese < milk] + ['apple', 'bread', 'bread', 'cheese', 'milk'] + Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] >>> ser.searchsorted('bread') 1 diff --git a/pandas/core/construction.py b/pandas/core/construction.py index b110a316a76d9..9ac661f97a56e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -217,15 +217,15 @@ def array( You can use the string alias for `dtype` >>> pd.array(['a', 'b', 'a'], dtype='category') - [a, b, a] - Categories (2, object): [a, b] + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] Or specify the actual dtype >>> pd.array(['a', 'b', 'a'], ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) - [a, b, a] - Categories (3, object): [a < b < c] + ['a', 'b', 'a'] + Categories (3, object): ['a' < 'b' < 'c'] If pandas does not infer a dedicated extension type a :class:`arrays.PandasArray` is returned. @@ -357,8 +357,8 @@ def extract_array(obj, extract_numpy: bool = False): Examples -------- >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) - [a, b, c] - Categories (3, object): [a, b, c] + ['a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] Other objects like lists, arrays, and DataFrames are just passed through. diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 71686bfc313fb..5c0963f7dde52 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -237,7 +237,7 @@ def union_categoricals( >>> union_categoricals([a, b], sort_categories=True) [b, c, a, b] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with the case of combining two categoricals of the same categories and order information (e.g. what @@ -267,7 +267,7 @@ def union_categoricals( >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) [a, b, c, c, b, a] - Categories (3, object): [a, b, c] + Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with a `CategoricalIndex`, or `Series` containing categorical data, but note that the resulting array will diff --git a/pandas/core/series.py b/pandas/core/series.py index cab8dd133b579..62dcb03e2d966 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -524,8 +524,8 @@ def values(self): array(['a', 'a', 'b', 'c'], dtype=object) >>> pd.Series(list('aabc')).astype('category').values - [a, a, b, c] - Categories (3, object): [a, b, c] + ['a', 'a', 'b', 'c'] + Categories (3, object): ['a', 'b', 'c'] Timezone aware datetime data is converted to UTC: @@ -1850,15 +1850,15 @@ def unique(self): appearance. >>> pd.Series(pd.Categorical(list('baabc'))).unique() - [b, a, c] - Categories (3, object): [b, a, c] + ['b', 'a', 'c'] + Categories (3, object): ['b', 'a', 'c'] An ordered Categorical preserves the category ordering. >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), ... ordered=True)).unique() - [b, a, c] - Categories (3, object): [a < b < c] + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] """ result = super().unique() return result diff --git a/web/pandas/community/blog/extension-arrays.md b/web/pandas/community/blog/extension-arrays.md index ea8a9a28ba242..61a77738a259c 100644 --- a/web/pandas/community/blog/extension-arrays.md +++ b/web/pandas/community/blog/extension-arrays.md @@ -117,11 +117,11 @@ library). For example, consider `Categorical`, 1 b 2 a dtype: category -Categories (3, object): [a, b, c] +Categories (3, object): ['a', 'b', 'c'] >>> ser.values [a, b, a] -Categories (3, object): [a, b, c] +Categories (3, object): ['a', 'b', 'c'] ``` In this case `.values` is a Categorical, not a NumPy array. For period-dtype @@ -143,7 +143,7 @@ So with our Categorical example, ```python >>> ser.array [a, b, a] -Categories (3, object): [a, b, c] +Categories (3, object): ['a', 'b', 'c'] >>> ser.to_numpy() array(['a', 'b', 'a'], dtype=object) From 457abe31c478e8a9072efb2ded46afaef5fe75d7 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Wed, 24 Jun 2020 09:29:37 +0100 Subject: [PATCH 15/15] fix failing doctests --- pandas/core/arrays/base.py | 6 +-- pandas/core/arrays/categorical.py | 80 +++++++++++++++---------------- pandas/core/dtypes/concat.py | 16 +++---- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/reshape/tile.py | 8 ++-- 5 files changed, 56 insertions(+), 56 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 373edd0491c98..5565b85f8d59a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -846,13 +846,13 @@ def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, "ExtensionArray" -------- >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat - [a, b, c] + ['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] >>> cat.repeat(2) - [a, a, b, b, c, c] + ['a', 'a', 'b', 'b', 'c', 'c'] Categories (3, object): ['a', 'b', 'c'] >>> cat.repeat([1, 2, 3]) - [a, b, b, c, c, c] + ['a', 'b', 'b', 'c', 'c', 'c'] Categories (3, object): ['a', 'b', 'c'] """ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b60a9724b3350..5b62566e1cfe9 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -277,7 +277,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject): Categories (3, int64): [1, 2, 3] >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) - [a, b, c, a, b, c] + ['a', 'b', 'c', 'a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] Ordered `Categoricals` can be sorted according to the custom order @@ -286,8 +286,8 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject): >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, ... categories=['c', 'b', 'a']) >>> c - [a, b, c, a, b, c] - Categories (3, object): [c < b < a] + ['a', 'b', 'c', 'a', 'b', 'c'] + Categories (3, object): ['c' < 'b' < 'a'] >>> c.min() 'c' """ @@ -600,8 +600,8 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): -------- >>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True) >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) - [a, b, a, b] - Categories (2, object): [a < b] + ['a', 'b', 'a', 'b'] + Categories (2, object): ['a' < 'b'] """ dtype = CategoricalDtype._from_values_or_dtype( categories=categories, ordered=ordered, dtype=dtype @@ -661,13 +661,13 @@ def _set_categories(self, categories, fastpath=False): -------- >>> c = pd.Categorical(['a', 'b']) >>> c - [a, b] - Categories (2, object): [a, b] + ['a', 'b'] + Categories (2, object): ['a', 'b'] >>> c._set_categories(pd.Index(['a', 'c'])) >>> c - [a, c] - Categories (2, object): [a, c] + ['a', 'c'] + Categories (2, object): ['a', 'c'] """ if fastpath: new_dtype = CategoricalDtype._from_fastpath(categories, self.ordered) @@ -887,14 +887,14 @@ def rename_categories(self, new_categories, inplace=False): categories not in the dictionary are passed through >>> c.rename_categories({'a': 'A', 'c': 'C'}) - [A, A, b] - Categories (2, object): [A, b] + ['A', 'A', 'b'] + Categories (2, object): ['A', 'b'] You may also provide a callable to create the new categories >>> c.rename_categories(lambda x: x.upper()) - [A, A, B] - Categories (2, object): [A, B] + ['A', 'A', 'B'] + Categories (2, object): ['A', 'B'] """ inplace = validate_bool_kwarg(inplace, "inplace") cat = self if inplace else self.copy() @@ -1130,22 +1130,22 @@ def map(self, mapper): -------- >>> cat = pd.Categorical(['a', 'b', 'c']) >>> cat - [a, b, c] + ['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] >>> cat.map(lambda x: x.upper()) - [A, B, C] + ['A', 'B', 'C'] Categories (3, object): ['A', 'B', 'C'] >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}) - [first, second, third] - Categories (3, object): [first, second, third] + ['first', 'second', 'third'] + Categories (3, object): ['first', 'second', 'third'] If the mapping is one-to-one the ordering of the categories is preserved: >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) >>> cat - [a, b, c] - Categories (3, object): [a < b < c] + ['a', 'b', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] >>> cat.map({'a': 3, 'b': 2, 'c': 1}) [3, 2, 1] Categories (3, int64): [3 < 2 < 1] @@ -1780,29 +1780,29 @@ def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T: -------- >>> cat = pd.Categorical(['a', 'a', 'b']) >>> cat - [a, a, b] - Categories (2, object): [a, b] + ['a', 'a', 'b'] + Categories (2, object): ['a', 'b'] Specify ``allow_fill==False`` to have negative indices mean indexing from the right. >>> cat.take([0, -1, -2], allow_fill=False) - [a, b, a] - Categories (2, object): [a, b] + ['a', 'b', 'a'] + Categories (2, object): ['a', 'b'] With ``allow_fill=True``, indices equal to ``-1`` mean "missing" values that should be filled with the `fill_value`, which is ``np.nan`` by default. >>> cat.take([0, -1, -1], allow_fill=True) - [a, NaN, NaN] - Categories (2, object): [a, b] + ['a', NaN, NaN] + Categories (2, object): ['a', 'b'] The fill value can be specified. >>> cat.take([0, -1, -1], allow_fill=True, fill_value='a') - [a, a, a] - Categories (2, object): [a, b] + ['a', 'a', 'a'] + Categories (2, object): ['a', 'b'] Specifying a fill value that's not in ``self.categories`` will raise a ``ValueError``. @@ -2056,7 +2056,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]: -------- >>> c = pd.Categorical(list('aabca')) >>> c - [a, a, b, c, a] + ['a', 'a', 'b', 'c', 'a'] Categories (3, object): ['a', 'b', 'c'] >>> c.categories Index(['a', 'b', 'c'], dtype='object') @@ -2204,20 +2204,20 @@ def unique(self): order of appearance. >>> pd.Categorical(list("baabc")).unique() - [b, a, c] - Categories (3, object): [b, a, c] + ['b', 'a', 'c'] + Categories (3, object): ['b', 'a', 'c'] >>> pd.Categorical(list("baabc"), categories=list("abc")).unique() - [b, a, c] - Categories (3, object): [b, a, c] + ['b', 'a', 'c'] + Categories (3, object): ['b', 'a', 'c'] An ordered Categorical preserves the category ordering. >>> pd.Categorical( ... list("baabc"), categories=list("abc"), ordered=True ... ).unique() - [b, a, c] - Categories (3, object): [a < b < c] + ['b', 'a', 'c'] + Categories (3, object): ['a' < 'b' < 'c'] """ # unlike np.unique, unique1d does not sort unique_codes = unique1d(self.codes) @@ -2483,7 +2483,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 a 5 a dtype: category - Categories (3, object): [c, b, a] + Categories (3, object): ['c', 'b', 'a'] >>> s.cat.reorder_categories(list("cba")) 0 a @@ -2493,7 +2493,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [c, b, a] + Categories (3, object): ['c', 'b', 'a'] >>> s.cat.add_categories(["d", "e"]) 0 a @@ -2503,7 +2503,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (5, object): [a, b, c, d, e] + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] >>> s.cat.remove_categories(["a", "c"]) 0 NaN @@ -2513,7 +2513,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 NaN 5 NaN dtype: category - Categories (1, object): [b] + Categories (1, object): ['b'] >>> s1 = s.cat.add_categories(["d", "e"]) >>> s1.cat.remove_unused_categories() @@ -2534,7 +2534,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (5, object): [a, b, c, d, e] + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] >>> s.cat.as_ordered() 0 a @@ -2544,7 +2544,7 @@ class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin): 4 c 5 c dtype: category - Categories (3, object): [a < b < c] + Categories (3, object): ['a' < 'b' < 'c'] >>> s.cat.as_unordered() 0 a diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 5c0963f7dde52..4b7c818f487ac 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -228,15 +228,15 @@ def union_categoricals( >>> a = pd.Categorical(["b", "c"]) >>> b = pd.Categorical(["a", "b"]) >>> union_categoricals([a, b]) - [b, c, a, b] - Categories (3, object): [b, c, a] + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] By default, the resulting categories will be ordered as they appear in the `categories` of the data. If you want the categories to be lexsorted, use `sort_categories=True` argument. >>> union_categoricals([a, b], sort_categories=True) - [b, c, a, b] + ['b', 'c', 'a', 'b'] Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with the case of combining two @@ -246,8 +246,8 @@ def union_categoricals( >>> a = pd.Categorical(["a", "b"], ordered=True) >>> b = pd.Categorical(["a", "b", "a"], ordered=True) >>> union_categoricals([a, b]) - [a, b, a, b, a] - Categories (2, object): [a < b] + ['a', 'b', 'a', 'b', 'a'] + Categories (2, object): ['a' < 'b'] Raises `TypeError` because the categories are ordered and not identical. @@ -266,7 +266,7 @@ def union_categoricals( >>> a = pd.Categorical(["a", "b", "c"], ordered=True) >>> b = pd.Categorical(["c", "b", "a"], ordered=True) >>> union_categoricals([a, b], ignore_order=True) - [a, b, c, c, b, a] + ['a', 'b', 'c', 'c', 'b', 'a'] Categories (3, object): ['a', 'b', 'c'] `union_categoricals` also works with a `CategoricalIndex`, or `Series` @@ -276,8 +276,8 @@ def union_categoricals( >>> a = pd.Series(["b", "c"], dtype='category') >>> b = pd.Series(["a", "b"], dtype='category') >>> union_categoricals([a, b]) - [b, c, a, b] - Categories (3, object): [b, c, a] + ['b', 'c', 'a', 'b'] + Categories (3, object): ['b', 'c', 'a'] """ from pandas import Categorical from pandas.core.arrays.categorical import recode_for_categories diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index b9d16ac5959e3..a9d2430717e4f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -230,7 +230,7 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): 2 a 3 NaN dtype: category - Categories (2, object): [b < a] + Categories (2, object): ['b' < 'a'] An empty CategoricalDtype with a specific dtype can be created by providing an empty index. As follows, diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b6735282acaff..f7723bee532ff 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -150,16 +150,16 @@ def cut( >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), ... 3, labels=["bad", "medium", "good"]) - [bad, good, medium, medium, good, bad] - Categories (3, object): [bad < medium < good] + ['bad', 'good', 'medium', 'medium', 'good', 'bad'] + Categories (3, object): ['bad' < 'medium' < 'good'] ``ordered=False`` will result in unordered categories when labels are passed. This parameter can be used to allow non-unique labels: >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, ... labels=["B", "A", "B"], ordered=False) - [B, B, A, A, B, B] - Categories (2, object): [A, B] + ['B', 'B', 'A', 'A', 'B', 'B'] + Categories (2, object): ['A', 'B'] ``labels=False`` implies you just want the bins back.