From 72b801168f0f34db49a6027c04845ff08a683e26 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Sep 2018 14:44:02 +0100 Subject: [PATCH 1/7] add code comment --- pandas/io/formats/html.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index a6b03c9c6dd23..d4e38dd00f077 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -342,6 +342,7 @@ def _write_body(self, indent): self._write_hierarchical_rows(fmt_values, indent) else: self._write_regular_rows(fmt_values, indent) +# GH 15019, GH 22783 add truncation logic below else: for i in range(min(len(self.frame), self.max_rows)): row = [fmt_values[j][i] for j in range(len(self.columns))] From a7b24248a75838ede41e50a1c073c9f03e33426b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 20 Sep 2018 15:04:02 +0100 Subject: [PATCH 2/7] add failing test cases --- pandas/tests/io/formats/conftest.py | 12 +++++++ .../formats/data/gh15019_expected_output.html | 31 +++++++++++++++++++ .../formats/data/gh22783_expected_output.html | 27 ++++++++++++++++ pandas/tests/io/formats/test_to_html.py | 25 +++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 pandas/tests/io/formats/conftest.py create mode 100644 pandas/tests/io/formats/data/gh15019_expected_output.html create mode 100644 pandas/tests/io/formats/data/gh22783_expected_output.html diff --git a/pandas/tests/io/formats/conftest.py b/pandas/tests/io/formats/conftest.py new file mode 100644 index 0000000000000..79afd2e95c08f --- /dev/null +++ b/pandas/tests/io/formats/conftest.py @@ -0,0 +1,12 @@ +import pytest + + +@pytest.fixture +def read_file(datapath): + """fixture factory to read text files from tests/io/formats/data""" + def _read_file(filename): + filepath = datapath('io', 'formats', 'data', filename) + with open(filepath) as f: + contents = f.read() + return contents + return _read_file diff --git a/pandas/tests/io/formats/data/gh15019_expected_output.html b/pandas/tests/io/formats/data/gh15019_expected_output.html new file mode 100644 index 0000000000000..8b4183dd19a03 --- /dev/null +++ b/pandas/tests/io/formats/data/gh15019_expected_output.html @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
01
1.7640520.400157
0.9787382.240893
......
0.950088-0.151357
-0.1032190.410599
+ \ No newline at end of file diff --git a/pandas/tests/io/formats/data/gh22783_expected_output.html b/pandas/tests/io/formats/data/gh22783_expected_output.html new file mode 100644 index 0000000000000..107db43c48639 --- /dev/null +++ b/pandas/tests/io/formats/data/gh22783_expected_output.html @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +
01...34
1.7640520.400157...2.2408931.867558
-0.9772780.950088...-0.1032190.410599
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 845fb1ee3dc3a..3885441c767f3 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -22,6 +22,15 @@ pass +@pytest.fixture +def expected_html(read_file): + def _expected_html(name): + filename = '.'.join([name, 'html']) + html = read_file(filename) + return html.rstrip() + return _expected_html + + class TestToHTML(object): def test_to_html_with_col_space(self): @@ -1905,6 +1914,22 @@ def test_to_html_multiindex_max_cols(self): """) assert result == expected + def test_to_html_truncation_index_false_max_rows(self, expected_html): + # GH 15019 + np.random.seed(seed=0) + df = pd.DataFrame(np.random.randn(5, 2)) + result = df.to_html(max_rows=4, index=False) + expected = expected_html('gh15019_expected_output') + assert result == expected + + def test_to_html_truncation_index_false_max_cols(self, expected_html): + # GH 22783 + np.random.seed(seed=0) + df = pd.DataFrame(np.random.randn(2, 5)) + result = df.to_html(max_cols=4, index=False) + expected = expected_html('gh22783_expected_output') + assert result == expected + def test_to_html_notebook_has_style(self): df = pd.DataFrame({"A": [1, 2, 3]}) result = df.to_html(notebook=True) From 8a82df5fa23a8687dfbaa117f2ccd1953dd9ff1f Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 21 Sep 2018 03:04:33 +0100 Subject: [PATCH 3/7] fix failing tests --- pandas/io/formats/html.py | 20 ++++++++++++++++--- pandas/tests/io/formats/conftest.py | 12 ----------- .../formats/data/gh15019_expected_output.html | 1 - pandas/tests/io/formats/test_to_html.py | 7 +++++-- 4 files changed, 22 insertions(+), 18 deletions(-) delete mode 100644 pandas/tests/io/formats/conftest.py diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index d4e38dd00f077..2d65c731169e4 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -306,6 +306,8 @@ def _column_header(): align = self.fmt.justify if truncate_h: + if self.fmt.index is False: + row_levels = 0 ins_col = row_levels + self.fmt.tr_col_num col_row.insert(ins_col, '...') @@ -342,10 +344,22 @@ def _write_body(self, indent): self._write_hierarchical_rows(fmt_values, indent) else: self._write_regular_rows(fmt_values, indent) -# GH 15019, GH 22783 add truncation logic below else: - for i in range(min(len(self.frame), self.max_rows)): - row = [fmt_values[j][i] for j in range(len(self.columns))] + truncate_h = self.fmt.truncate_h + truncate_v = self.fmt.truncate_v + ncols = len(self.fmt.tr_frame.columns) + nrows = len(self.fmt.tr_frame) + + row = [] + for i in range(nrows): + if truncate_v and i == (self.fmt.tr_row_num): + str_sep_row = ['...'] * len(row) + self.write_tr(str_sep_row, indent, + self.indent_delta, tags=None) + row = [fmt_values[j][i] for j in range(ncols)] + if truncate_h: + dot_col_ix = self.fmt.tr_col_num + row.insert(dot_col_ix, '...') self.write_tr(row, indent, self.indent_delta, tags=None) indent -= self.indent_delta diff --git a/pandas/tests/io/formats/conftest.py b/pandas/tests/io/formats/conftest.py deleted file mode 100644 index 79afd2e95c08f..0000000000000 --- a/pandas/tests/io/formats/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -import pytest - - -@pytest.fixture -def read_file(datapath): - """fixture factory to read text files from tests/io/formats/data""" - def _read_file(filename): - filepath = datapath('io', 'formats', 'data', filename) - with open(filepath) as f: - contents = f.read() - return contents - return _read_file diff --git a/pandas/tests/io/formats/data/gh15019_expected_output.html b/pandas/tests/io/formats/data/gh15019_expected_output.html index 8b4183dd19a03..5fb9d960f4465 100644 --- a/pandas/tests/io/formats/data/gh15019_expected_output.html +++ b/pandas/tests/io/formats/data/gh15019_expected_output.html @@ -28,4 +28,3 @@ - \ No newline at end of file diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 3885441c767f3..b4aae9669cbf4 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -23,10 +23,13 @@ @pytest.fixture -def expected_html(read_file): +def expected_html(datapath): + """fixture factory to read html files from tests/io/formats/data""" def _expected_html(name): filename = '.'.join([name, 'html']) - html = read_file(filename) + filepath = datapath('io', 'formats', 'data', filename) + with open(filepath) as f: + html = f.read() return html.rstrip() return _expected_html From f028b7d547677689556a98ab0f84fa9d95b9731c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 21 Sep 2018 10:30:18 +0100 Subject: [PATCH 4/7] requested changes --- pandas/tests/io/formats/test_to_html.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index b4aae9669cbf4..1f3777b65d116 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -22,16 +22,12 @@ pass -@pytest.fixture -def expected_html(datapath): - """fixture factory to read html files from tests/io/formats/data""" - def _expected_html(name): - filename = '.'.join([name, 'html']) - filepath = datapath('io', 'formats', 'data', filename) - with open(filepath) as f: - html = f.read() - return html.rstrip() - return _expected_html +def expected_html(datapath, name): + filename = '.'.join([name, 'html']) + filepath = datapath('io', 'formats', 'data', filename) + with open(filepath) as f: + html = f.read() + return html.rstrip() class TestToHTML(object): @@ -1917,20 +1913,20 @@ def test_to_html_multiindex_max_cols(self): """) assert result == expected - def test_to_html_truncation_index_false_max_rows(self, expected_html): + def test_to_html_truncation_index_false_max_rows(self, datapath): # GH 15019 np.random.seed(seed=0) df = pd.DataFrame(np.random.randn(5, 2)) result = df.to_html(max_rows=4, index=False) - expected = expected_html('gh15019_expected_output') + expected = expected_html(datapath, 'gh15019_expected_output') assert result == expected - def test_to_html_truncation_index_false_max_cols(self, expected_html): + def test_to_html_truncation_index_false_max_cols(self, datapath): # GH 22783 np.random.seed(seed=0) df = pd.DataFrame(np.random.randn(2, 5)) result = df.to_html(max_cols=4, index=False) - expected = expected_html('gh22783_expected_output') + expected = expected_html(datapath, 'gh22783_expected_output') assert result == expected def test_to_html_notebook_has_style(self): From 4c1bca83282ad476f93dc99242f2096b2e51f9c4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 24 Sep 2018 12:54:30 +0100 Subject: [PATCH 5/7] add docstring --- pandas/tests/io/formats/test_to_html.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 1f3777b65d116..f7ad08a634ff9 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -23,6 +23,20 @@ def expected_html(datapath, name): + """ + Read HTML file from formats data directory. + + Parameters + ---------- + datapath : pytest fixture + The datapath fixture injected into a test by pytest. + name : str + The name of the HTML file without the suffix. + + Returns + ------- + str : contents of HTML file. + """ filename = '.'.join([name, 'html']) filepath = datapath('io', 'formats', 'data', filename) with open(filepath) as f: From 2cd532ef27bfda44bf76f1eda5484ba36ad27a95 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 24 Sep 2018 12:54:46 +0100 Subject: [PATCH 6/7] add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9e2c20c78f489..e756d4cdbf2c5 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -751,6 +751,7 @@ I/O - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) +- Bug in :func:`to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`) Plotting ^^^^^^^^ From c6cb507c3e98a772a1af0c245c760cfdb23e00ae Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 12 Nov 2018 22:27:37 +0000 Subject: [PATCH 7/7] requested changes --- pandas/io/formats/html.py | 48 ++++++++++--------------- pandas/tests/io/formats/test_to_html.py | 23 +++++++----- 2 files changed, 33 insertions(+), 38 deletions(-) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 8957f5d101cd2..967e5fca5f711 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -305,7 +305,7 @@ def _column_header(): align = self.fmt.justify if truncate_h: - if self.fmt.index is False: + if not self.fmt.index: row_levels = 0 ins_col = row_levels + self.fmt.tr_col_num col_row.insert(ins_col, '...') @@ -338,28 +338,10 @@ def _write_body(self, indent): fmt_values[i] = self.fmt._format_col(i) # write values - if self.fmt.index: - if isinstance(self.frame.index, ABCMultiIndex): - self._write_hierarchical_rows(fmt_values, indent) - else: - self._write_regular_rows(fmt_values, indent) + if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex): + self._write_hierarchical_rows(fmt_values, indent) else: - truncate_h = self.fmt.truncate_h - truncate_v = self.fmt.truncate_v - ncols = len(self.fmt.tr_frame.columns) - nrows = len(self.fmt.tr_frame) - - row = [] - for i in range(nrows): - if truncate_v and i == (self.fmt.tr_row_num): - str_sep_row = ['...'] * len(row) - self.write_tr(str_sep_row, indent, - self.indent_delta, tags=None) - row = [fmt_values[j][i] for j in range(ncols)] - if truncate_h: - dot_col_ix = self.fmt.tr_col_num - row.insert(dot_col_ix, '...') - self.write_tr(row, indent, self.indent_delta, tags=None) + self._write_regular_rows(fmt_values, indent) indent -= self.indent_delta self.write('', indent) @@ -373,11 +355,16 @@ def _write_regular_rows(self, fmt_values, indent): ncols = len(self.fmt.tr_frame.columns) nrows = len(self.fmt.tr_frame) - fmt = self.fmt._get_formatter('__index__') - if fmt is not None: - index_values = self.fmt.tr_frame.index.map(fmt) + + if self.fmt.index: + fmt = self.fmt._get_formatter('__index__') + if fmt is not None: + index_values = self.fmt.tr_frame.index.map(fmt) + else: + index_values = self.fmt.tr_frame.index.format() + row_levels = 1 else: - index_values = self.fmt.tr_frame.index.format() + row_levels = 0 row = [] for i in range(nrows): @@ -385,17 +372,18 @@ def _write_regular_rows(self, fmt_values, indent): if truncate_v and i == (self.fmt.tr_row_num): str_sep_row = ['...'] * len(row) self.write_tr(str_sep_row, indent, self.indent_delta, - tags=None, nindex_levels=1) + tags=None, nindex_levels=row_levels) row = [] - row.append(index_values[i]) + if self.fmt.index: + row.append(index_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: - dot_col_ix = self.fmt.tr_col_num + 1 + dot_col_ix = self.fmt.tr_col_num + row_levels row.insert(dot_col_ix, '...') self.write_tr(row, indent, self.indent_delta, tags=None, - nindex_levels=1) + nindex_levels=row_levels) def _write_hierarchical_rows(self, fmt_values, indent): template = 'rowspan="{span}" valign="top"' diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index fbe0153633088..57a5f4e709db6 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -1903,19 +1903,26 @@ def test_to_html_multiindex_max_cols(self): """) assert result == expected - def test_to_html_truncation_index_false_max_rows(self, datapath): + @pytest.mark.parametrize('index', [False, 0]) + def test_to_html_truncation_index_false_max_rows(self, datapath, index): # GH 15019 - np.random.seed(seed=0) - df = pd.DataFrame(np.random.randn(5, 2)) - result = df.to_html(max_rows=4, index=False) + data = [[1.764052, 0.400157], + [0.978738, 2.240893], + [1.867558, -0.977278], + [0.950088, -0.151357], + [-0.103219, 0.410599]] + df = pd.DataFrame(data) + result = df.to_html(max_rows=4, index=index) expected = expected_html(datapath, 'gh15019_expected_output') assert result == expected - def test_to_html_truncation_index_false_max_cols(self, datapath): + @pytest.mark.parametrize('index', [False, 0]) + def test_to_html_truncation_index_false_max_cols(self, datapath, index): # GH 22783 - np.random.seed(seed=0) - df = pd.DataFrame(np.random.randn(2, 5)) - result = df.to_html(max_cols=4, index=False) + data = [[1.764052, 0.400157, 0.978738, 2.240893, 1.867558], + [-0.977278, 0.950088, -0.151357, -0.103219, 0.410599]] + df = pd.DataFrame(data) + result = df.to_html(max_cols=4, index=index) expected = expected_html(datapath, 'gh22783_expected_output') assert result == expected