From 5b9a825aef2cd246b97879d8a70edb9a68b5dbff Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 12:04:20 +0700 Subject: [PATCH 01/17] ENH: specific col names for col_space GH 28929 --- pandas/io/formats/format.py | 18 +++++++++++++++--- pandas/io/formats/html.py | 31 +++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 17cc897136aad..8dfdc48b15165 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -83,6 +83,10 @@ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] ] FloatFormatType = Union[str, Callable, "EngFormatter"] +ColspaceType = Union[ + str, int, + List[Union[str, int]], Tuple[Union[str, int], ...], Mapping[Union[str, int], Union[str, int]] +] common_docstring = """ Parameters @@ -540,7 +544,7 @@ def __init__( self, frame: "DataFrame", columns: Optional[Sequence[str]] = None, - col_space: Optional[Union[str, int]] = None, + col_space: Optional[ColspaceType] = None, header: Union[bool, Sequence[str]] = True, index: bool = True, na_rep: str = "NaN", @@ -575,12 +579,20 @@ def __init__( self.formatters = formatters else: raise ValueError( - f"Formatters length({len(formatters)}) should match " + f"Formatters length({len( )}) should match " f"DataFrame number of columns({len(frame.columns)})" ) self.na_rep = na_rep self.decimal = decimal - self.col_space = col_space + if col_space is None: + self.col_space = {} + elif isinstance(col_space, (dict, str, int)) or len(frame.columns) == len(col_space): + self.col_space = col_space + else: + raise ValueError( + f"Col_space length({len(col_space)}) should match " + f"DataFrame number of columns({len(frame.columns)})" + ) self.header = header self.index = index self.line_width = line_width diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 1be0f977f9b20..d1e96971c17ce 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -55,8 +55,29 @@ def __init__( self.border = border self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links - if isinstance(self.fmt.col_space, int): - self.fmt.col_space = f"{self.fmt.col_space}px" + if isinstance(self.fmt.col_space, dict): + self.fmt.col_space = { + column: f"{value}px" if isinstance(value, int) else value + for column, value in self.fmt.col_space.items() + } + elif isinstance(self.fmt.col_space, list): + self.fmt.col_space = dict( + zip( + self.frame.columns, + [ + f"{value}px" if isinstance(value, int) else value + for value in self.fmt.col_space + ], + ) + ) + elif isinstance(self.fmt.col_space, (int, str)): + col_space = f"{self.fmt.col_space}px" if isinstance(self.fmt.col_space, int) else self.fmt.col_space + self.fmt.col_space = {"": col_space} + self.fmt.col_space.update( + {column: col_space for column in self.frame.columns} + ) + else: + self.fmt.col_space = {} @property def show_row_idx_names(self) -> bool: @@ -122,9 +143,11 @@ def write_th( ------- A written cell. """ - if header and self.fmt.col_space is not None: + col_space = self.fmt.col_space.get(s,None) + + if header and col_space is not None: tags = tags or "" - tags += f'style="min-width: {self.fmt.col_space};"' + tags += f'style="min-width: {col_space};"' self._write_cell(s, kind="th", indent=indent, tags=tags) From d2a8ba82cfb80619c9577a3a3b80329c864b93e1 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 12:42:05 +0700 Subject: [PATCH 02/17] add tests and col name check --- pandas/io/formats/html.py | 6 +++++ pandas/tests/io/formats/test_to_html.py | 32 +++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index d1e96971c17ce..0f4b15d9a67dd 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -56,6 +56,12 @@ def __init__( self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links if isinstance(self.fmt.col_space, dict): + for column in self.fmt.col_space.keys(): + if column not in self.frame.columns: + raise ValueError( + f"Col_space is defined for an unknown column: {column}" + ) + self.fmt.col_space = { column: f"{value}px" if isinstance(value, int) else value for column, value in self.fmt.col_space.items() diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 9a14022d6f776..23e6699adcd81 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -77,6 +77,38 @@ def test_to_html_with_col_space(col_space): assert "min-width" in h assert str(col_space) in h +# @pytest.mark.parametrize("col_space", [ +# [30, 40], +# [30, 40, 50], +# [30, 'foo', '20em'], +# {}, +# {'a': 'foo', 'b': 23, 'd':34}, +# {'a': 'foo', 'b': 223, 'd':34} + +# ]) +def test_to_html_with_column_specific_col_space(): + df = DataFrame(np.random.random(size=(3, 3)), columns=['a','b','c']) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_html(col_space=[30, 40]) + + + with pytest.raises(ValueError, match=msg): + df.to_html(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_html(col_space={'a': 'foo', 'b': 23, 'd':34}) + + result = df.to_html(col_space={'a': '2em', 'b': 23}) + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 2em;">a' in hdrs[1] + assert 'min-width: 23px;">b' in hdrs[2] + assert 'c' in hdrs[3] def test_to_html_with_empty_string_label(): # GH 3547, to_html regards empty string labels as repeated labels From e5549e0d29f456f5bdee7dbdbcdf6a19a0cf626f Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 12:47:36 +0700 Subject: [PATCH 03/17] run through black --- pandas/io/formats/format.py | 11 ++++++++--- pandas/io/formats/html.py | 8 ++++++-- pandas/tests/io/formats/test_to_html.py | 22 +++++++--------------- 3 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 8dfdc48b15165..f79f14aeace23 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -84,8 +84,11 @@ ] FloatFormatType = Union[str, Callable, "EngFormatter"] ColspaceType = Union[ - str, int, - List[Union[str, int]], Tuple[Union[str, int], ...], Mapping[Union[str, int], Union[str, int]] + str, + int, + List[Union[str, int]], + Tuple[Union[str, int], ...], + Mapping[Union[str, int], Union[str, int]], ] common_docstring = """ @@ -586,7 +589,9 @@ def __init__( self.decimal = decimal if col_space is None: self.col_space = {} - elif isinstance(col_space, (dict, str, int)) or len(frame.columns) == len(col_space): + elif isinstance(col_space, (dict, str, int)) or len(frame.columns) == len( + col_space + ): self.col_space = col_space else: raise ValueError( diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 0f4b15d9a67dd..04ac115fd79ca 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -77,7 +77,11 @@ def __init__( ) ) elif isinstance(self.fmt.col_space, (int, str)): - col_space = f"{self.fmt.col_space}px" if isinstance(self.fmt.col_space, int) else self.fmt.col_space + col_space = ( + f"{self.fmt.col_space}px" + if isinstance(self.fmt.col_space, int) + else self.fmt.col_space + ) self.fmt.col_space = {"": col_space} self.fmt.col_space.update( {column: col_space for column in self.frame.columns} @@ -149,7 +153,7 @@ def write_th( ------- A written cell. """ - col_space = self.fmt.col_space.get(s,None) + col_space = self.fmt.col_space.get(s, None) if header and col_space is not None: tags = tags or "" diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 23e6699adcd81..fc858383279c7 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -77,17 +77,9 @@ def test_to_html_with_col_space(col_space): assert "min-width" in h assert str(col_space) in h -# @pytest.mark.parametrize("col_space", [ -# [30, 40], -# [30, 40, 50], -# [30, 'foo', '20em'], -# {}, -# {'a': 'foo', 'b': 23, 'd':34}, -# {'a': 'foo', 'b': 223, 'd':34} - -# ]) + def test_to_html_with_column_specific_col_space(): - df = DataFrame(np.random.random(size=(3, 3)), columns=['a','b','c']) + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) msg = ( "Col_space length\\(\\d+\\) should match " @@ -96,19 +88,19 @@ def test_to_html_with_column_specific_col_space(): with pytest.raises(ValueError, match=msg): df.to_html(col_space=[30, 40]) - with pytest.raises(ValueError, match=msg): df.to_html(col_space=[30, 40, 50, 60]) msg = "unknown column" with pytest.raises(ValueError, match=msg): - df.to_html(col_space={'a': 'foo', 'b': 23, 'd':34}) - - result = df.to_html(col_space={'a': '2em', 'b': 23}) + df.to_html(col_space={"a": "foo", "b": 23, "d": 34}) + + result = df.to_html(col_space={"a": "2em", "b": 23}) hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] assert 'min-width: 2em;">a' in hdrs[1] assert 'min-width: 23px;">b' in hdrs[2] - assert 'c' in hdrs[3] + assert "c" in hdrs[3] + def test_to_html_with_empty_string_label(): # GH 3547, to_html regards empty string labels as repeated labels From 1ce2230553add37ad762e6bdc54d76f2d5d3d161 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 13:11:29 +0700 Subject: [PATCH 04/17] minor fix --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f79f14aeace23..50c34fbddc72a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -582,7 +582,7 @@ def __init__( self.formatters = formatters else: raise ValueError( - f"Formatters length({len( )}) should match " + f"Formatters length({len(formatters)}) should match " f"DataFrame number of columns({len(frame.columns)})" ) self.na_rep = na_rep From 8cbb7588f879d25f5d99e0a126d38088bc731068 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 13:15:43 +0700 Subject: [PATCH 05/17] add test --- pandas/tests/io/formats/test_to_html.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index fc858383279c7..65c09e7e69691 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -101,6 +101,12 @@ def test_to_html_with_column_specific_col_space(): assert 'min-width: 23px;">b' in hdrs[2] assert "c" in hdrs[3] + result = df.to_html(col_space=["1em", 2, 3]) + hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 1em;">a' in hdrs[1] + assert 'min-width: 2px;">b' in hdrs[2] + assert 'min-width: 3px;">c' in hdrs[3] + def test_to_html_with_empty_string_label(): # GH 3547, to_html regards empty string labels as repeated labels From 5eabdcb99646e5589f2011b0acdaa95357e9d2b1 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 17:26:31 +0700 Subject: [PATCH 06/17] col specific col_space for to_latex and to_string --- pandas/io/formats/format.py | 47 +++++++++++++++---------- pandas/io/formats/html.py | 38 +++----------------- pandas/tests/io/formats/test_to_html.py | 3 +- 3 files changed, 36 insertions(+), 52 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 50c34fbddc72a..1d4ccf58e6c2a 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -84,6 +84,9 @@ ] FloatFormatType = Union[str, Callable, "EngFormatter"] ColspaceType = Union[ + Mapping[Union[str, int], Union[str, int]], +] +ColspaceArgType = Union[ str, int, List[Union[str, int]], @@ -543,11 +546,13 @@ class DataFrameFormatter(TableFormatter): __doc__ = __doc__ if __doc__ else "" __doc__ += common_docstring + return_docstring + col_space: ColspaceType + def __init__( self, frame: "DataFrame", columns: Optional[Sequence[str]] = None, - col_space: Optional[ColspaceType] = None, + col_space: Optional[ColspaceArgType] = None, header: Union[bool, Sequence[str]] = True, index: bool = True, na_rep: str = "NaN", @@ -589,15 +594,24 @@ def __init__( self.decimal = decimal if col_space is None: self.col_space = {} - elif isinstance(col_space, (dict, str, int)) or len(frame.columns) == len( - col_space - ): + elif isinstance(col_space, (int, str)): + self.col_space = {"": col_space} + self.col_space.update({column: col_space for column in self.frame.columns}) + elif isinstance(col_space, dict): + for column in col_space.keys(): + if column not in self.frame.columns and column != "": + raise ValueError( + f"Col_space is defined for an unknown column: {column}" + ) self.col_space = col_space else: - raise ValueError( - f"Col_space length({len(col_space)}) should match " - f"DataFrame number of columns({len(frame.columns)})" - ) + if len(frame.columns) != len(col_space): + raise ValueError( + f"Col_space length({len(col_space)}) should match " + f"DataFrame number of columns({len(frame.columns)})" + ) + self.col_space = dict(zip(self.frame.columns, col_space)) + self.header = header self.index = index self.line_width = line_width @@ -725,7 +739,7 @@ def _to_str_columns(self) -> List[List[str]]: """ # this method is not used by to_html where self.col_space # could be a string so safe to cast - self.col_space = cast(int, self.col_space) + col_space = {k: cast(int, v) for k, v in self.col_space.items()} frame = self.tr_frame # may include levels names also @@ -737,10 +751,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): fmt_values = self._format_col(i) fmt_values = _make_fixed_width( - fmt_values, - self.justify, - minimum=(self.col_space or 0), - adj=self.adj, + fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj, ) stringified.append(fmt_values) else: @@ -764,7 +775,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): cheader = str_columns[i] header_colwidth = max( - self.col_space or 0, *(self.adj.len(x) for x in cheader) + col_space.get(c, 0), *(self.adj.len(x) for x in cheader) ) fmt_values = self._format_col(i) fmt_values = _make_fixed_width( @@ -955,7 +966,7 @@ def _format_col(self, i: int) -> List[str]: formatter, float_format=self.float_format, na_rep=self.na_rep, - space=self.col_space, + space=self.col_space.get(frame.columns[i]), decimal=self.decimal, ) @@ -1048,10 +1059,10 @@ def show_col_idx_names(self) -> bool: def _get_formatted_index(self, frame: "DataFrame") -> List[str]: # Note: this is only used by to_string() and to_latex(), not by # to_html(). so safe to cast col_space here. - self.col_space = cast(int, self.col_space) + col_space = {k: cast(int, v) for k, v in self.col_space.items()} index = frame.index columns = frame.columns - fmt = self._get_formatter("__index__") + fmt = self._get_formatter("") if isinstance(index, ABCMultiIndex): fmt_index = index.format( @@ -1066,7 +1077,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: fmt_index = [ tuple( _make_fixed_width( - list(x), justify="left", minimum=(self.col_space or 0), adj=self.adj + list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj, ) ) for x in fmt_index diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 04ac115fd79ca..e4127bfcb6c53 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -55,39 +55,11 @@ def __init__( self.border = border self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links - if isinstance(self.fmt.col_space, dict): - for column in self.fmt.col_space.keys(): - if column not in self.frame.columns: - raise ValueError( - f"Col_space is defined for an unknown column: {column}" - ) - self.fmt.col_space = { - column: f"{value}px" if isinstance(value, int) else value - for column, value in self.fmt.col_space.items() - } - elif isinstance(self.fmt.col_space, list): - self.fmt.col_space = dict( - zip( - self.frame.columns, - [ - f"{value}px" if isinstance(value, int) else value - for value in self.fmt.col_space - ], - ) - ) - elif isinstance(self.fmt.col_space, (int, str)): - col_space = ( - f"{self.fmt.col_space}px" - if isinstance(self.fmt.col_space, int) - else self.fmt.col_space - ) - self.fmt.col_space = {"": col_space} - self.fmt.col_space.update( - {column: col_space for column in self.frame.columns} - ) - else: - self.fmt.col_space = {} + self.col_space = { + column: f"{value}px" if isinstance(value, int) else value + for column, value in self.fmt.col_space.items() + } @property def show_row_idx_names(self) -> bool: @@ -153,7 +125,7 @@ def write_th( ------- A written cell. """ - col_space = self.fmt.col_space.get(s, None) + col_space = self.col_space.get(s, None) if header and col_space is not None: tags = tags or "" diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 65c09e7e69691..515baefff26a0 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -95,8 +95,9 @@ def test_to_html_with_column_specific_col_space(): with pytest.raises(ValueError, match=msg): df.to_html(col_space={"a": "foo", "b": 23, "d": 34}) - result = df.to_html(col_space={"a": "2em", "b": 23}) + result = df.to_html(col_space={"a": "2em", "b": 23, "": "10%"}) hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] + assert 'min-width: 10%;">' in hdrs[0] assert 'min-width: 2em;">a' in hdrs[1] assert 'min-width: 23px;">b' in hdrs[2] assert "c" in hdrs[3] From 02d9fcd92341f16ac8e08dd2480644d7c50ef86d Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Sun, 22 Mar 2020 17:56:37 +0700 Subject: [PATCH 07/17] minor fix --- pandas/io/formats/format.py | 2 +- pandas/tests/io/formats/test_to_html.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 1d4ccf58e6c2a..9e06fbe225a84 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1062,7 +1062,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: col_space = {k: cast(int, v) for k, v in self.col_space.items()} index = frame.index columns = frame.columns - fmt = self._get_formatter("") + fmt = self._get_formatter("__index__") if isinstance(index, ABCMultiIndex): fmt_index = index.format( diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 515baefff26a0..65c09e7e69691 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -95,9 +95,8 @@ def test_to_html_with_column_specific_col_space(): with pytest.raises(ValueError, match=msg): df.to_html(col_space={"a": "foo", "b": 23, "d": 34}) - result = df.to_html(col_space={"a": "2em", "b": 23, "": "10%"}) + result = df.to_html(col_space={"a": "2em", "b": 23}) hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] - assert 'min-width: 10%;">' in hdrs[0] assert 'min-width: 2em;">a' in hdrs[1] assert 'min-width: 23px;">b' in hdrs[2] assert "c" in hdrs[3] From 0951dd3a9780e699ddcb2dc5eeae923ebf20d898 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Wed, 6 May 2020 14:49:31 +0700 Subject: [PATCH 08/17] add test fot to_string --- pandas/tests/io/formats/test_format.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c1850826926d8..3f4e4fd476068 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1047,6 +1047,30 @@ def test_to_string_with_col_space(self): no_header = df.to_string(col_space=20, header=False) assert len(with_header_row1) == len(no_header) + def test_to_string_with_column_specific_col_space(self): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + + msg = ( + "Col_space length\\(\\d+\\) should match " + "DataFrame number of columns\\(\\d+\\)" + ) + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40]) + + with pytest.raises(ValueError, match=msg): + df.to_string(col_space=[30, 40, 50, 60]) + + msg = "unknown column" + with pytest.raises(ValueError, match=msg): + df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) + + result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) + # 3 separating space + each col_space for (id, a, b, c) + assert len(result.split("\n")[1]) == (3+1+10+11+12) + + result = df.to_string(col_space=[10,11,12]) + assert len(result.split("\n")[1]) == (3+1+10+11+12) + def test_to_string_truncate_indices(self): for index in [ tm.makeStringIndex, From 3ad9422731f616b8491a3914f4bdffc7efbc4553 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Wed, 6 May 2020 15:44:23 +0700 Subject: [PATCH 09/17] run through black --- pandas/io/formats/latex.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 3a3ca84642d51..b50b77b7536cd 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -67,7 +67,7 @@ def write_result(self, buf: IO[str]) -> None: else: strcols = self.fmt._to_str_columns() - def get_col_type(dtype): + def get_col_type(dtype, col_space): if issubclass(dtype.type, np.number): return "r" else: @@ -107,7 +107,8 @@ def pad_empties(x): if self.column_format is None: dtypes = self.frame.dtypes._values - column_format = "".join(map(get_col_type, dtypes)) + col_spaces = [self.col_space[col] for col in self.frame.columns] + column_format = "".join(map(get_col_type, dtypes, col_spaces)) if self.fmt.index: index_format = "l" * self.frame.index.nlevels column_format = index_format + column_format From e1e0c90258eb0b06db05513a16803d7853630ad3 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Wed, 6 May 2020 16:14:40 +0700 Subject: [PATCH 10/17] black --- pandas/tests/io/formats/test_format.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 3f4e4fd476068..7808afb4475b9 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1066,10 +1066,10 @@ def test_to_string_with_column_specific_col_space(self): result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) # 3 separating space + each col_space for (id, a, b, c) - assert len(result.split("\n")[1]) == (3+1+10+11+12) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) - result = df.to_string(col_space=[10,11,12]) - assert len(result.split("\n")[1]) == (3+1+10+11+12) + result = df.to_string(col_space=[10, 11, 12]) + assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) def test_to_string_truncate_indices(self): for index in [ From b90b55faf550523b887fad2af8c251e0196ebbc7 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Wed, 6 May 2020 16:16:09 +0700 Subject: [PATCH 11/17] revert latext --- pandas/io/formats/latex.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index b50b77b7536cd..3a3ca84642d51 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -67,7 +67,7 @@ def write_result(self, buf: IO[str]) -> None: else: strcols = self.fmt._to_str_columns() - def get_col_type(dtype, col_space): + def get_col_type(dtype): if issubclass(dtype.type, np.number): return "r" else: @@ -107,8 +107,7 @@ def pad_empties(x): if self.column_format is None: dtypes = self.frame.dtypes._values - col_spaces = [self.col_space[col] for col in self.frame.columns] - column_format = "".join(map(get_col_type, dtypes, col_spaces)) + column_format = "".join(map(get_col_type, dtypes)) if self.fmt.index: index_format = "l" * self.frame.index.nlevels column_format = index_format + column_format From 11cc0db0bf30eb136786202e4711bac33f73b34b Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Thu, 7 May 2020 11:12:34 +0700 Subject: [PATCH 12/17] edit col_space type in docstrings --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2e56398db0c18..251fe9474153e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -780,7 +780,7 @@ def _repr_html_(self) -> Optional[str]: header="Write out the column names. If a list of strings " "is given, it is assumed to be aliases for the " "column names", - col_space_type="int", + col_space_type="int, list or dict of int", col_space="The minimum width of each column", ) @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) @@ -2241,7 +2241,7 @@ def to_parquet( @Substitution( header_type="bool", header="Whether to print column labels, default True", - col_space_type="str or int", + col_space_type="str or int, list or dict of int or str", col_space="The minimum width of each column in CSS length " "units. An int is assumed to be px units.\n\n" " .. versionadded:: 0.25.0\n" From 9d8fe9e7ef99218c29fd17f09502b48f86a63750 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Thu, 18 Jun 2020 11:19:28 +0700 Subject: [PATCH 13/17] split tests --- pandas/tests/io/formats/test_format.py | 5 ++++- pandas/tests/io/formats/test_to_html.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 2f1116500b400..2eb1916a5da7e 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1047,7 +1047,7 @@ def test_to_string_with_col_space(self): no_header = df.to_string(col_space=20, header=False) assert len(with_header_row1) == len(no_header) - def test_to_string_with_column_specific_col_space(self): + def test_to_string_with_column_specific_col_space_raises(self): df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) msg = ( @@ -1064,6 +1064,9 @@ def test_to_string_with_column_specific_col_space(self): with pytest.raises(ValueError, match=msg): df.to_string(col_space={"a": "foo", "b": 23, "d": 34}) + def test_to_string_with_column_specific_col_space(self): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) # 3 separating space + each col_space for (id, a, b, c) assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 65c09e7e69691..e85fd398964d0 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -78,7 +78,7 @@ def test_to_html_with_col_space(col_space): assert str(col_space) in h -def test_to_html_with_column_specific_col_space(): +def test_to_html_with_column_specific_col_space_raises(): df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) msg = ( @@ -95,6 +95,10 @@ def test_to_html_with_column_specific_col_space(): with pytest.raises(ValueError, match=msg): df.to_html(col_space={"a": "foo", "b": 23, "d": 34}) + +def test_to_html_with_column_specific_col_space(): + df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) + result = df.to_html(col_space={"a": "2em", "b": 23}) hdrs = [x for x in result.split("\n") if re.search(r"\s]", x)] assert 'min-width: 2em;">a' in hdrs[1] From aefe519de2577ae069d4d48a47592a69b9c39d3a Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Thu, 18 Jun 2020 11:27:17 +0700 Subject: [PATCH 14/17] fix typing --- pandas/io/formats/format.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index a7588fbd4e59c..0e5c4dd517894 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -38,7 +38,7 @@ from pandas._libs.tslib import format_array_from_datetime from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT from pandas._libs.tslibs.nattype import NaTType -from pandas._typing import FilePathOrBuffer +from pandas._typing import FilePathOrBuffer, Label from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import ( @@ -77,15 +77,12 @@ List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable] ] FloatFormatType = Union[str, Callable, "EngFormatter"] -ColspaceType = Union[ - Mapping[Union[str, int], Union[str, int]], -] +ColspaceType = Mapping[Label, Union[str, int]] ColspaceArgType = Union[ str, int, - List[Union[str, int]], - Tuple[Union[str, int], ...], - Mapping[Union[str, int], Union[str, int]], + Sequence[Union[str, int]], + Mapping[Label, Union[str, int]], ] common_docstring = """ From 788e0f24230290e527174f997ca3cea50f57fb29 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Thu, 18 Jun 2020 11:58:25 +0700 Subject: [PATCH 15/17] remove cast --- pandas/io/formats/format.py | 24 ++++++++++-------------- pandas/tests/io/formats/test_format.py | 2 +- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 0e5c4dd517894..8c36447827681 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -79,10 +79,7 @@ FloatFormatType = Union[str, Callable, "EngFormatter"] ColspaceType = Mapping[Label, Union[str, int]] ColspaceArgType = Union[ - str, - int, - Sequence[Union[str, int]], - Mapping[Label, Union[str, int]], + str, int, Sequence[Union[str, int]], Mapping[Label, Union[str, int]], ] common_docstring = """ @@ -728,10 +725,6 @@ def _to_str_columns(self) -> List[List[str]]: """ Render a DataFrame to a list of columns (as lists of strings). """ - # this method is not used by to_html where self.col_space - # could be a string so safe to cast - col_space = {k: cast(int, v) for k, v in self.col_space.items()} - frame = self.tr_frame # may include levels names also @@ -742,7 +735,10 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): fmt_values = self._format_col(i) fmt_values = _make_fixed_width( - fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj, + fmt_values, + self.justify, + minimum=self.col_space.get(c, 0), + adj=self.adj, ) stringified.append(fmt_values) else: @@ -766,7 +762,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): cheader = str_columns[i] header_colwidth = max( - col_space.get(c, 0), *(self.adj.len(x) for x in cheader) + self.col_space.get(c, 0), *(self.adj.len(x) for x in cheader) ) fmt_values = self._format_col(i) fmt_values = _make_fixed_width( @@ -1048,9 +1044,6 @@ def show_col_idx_names(self) -> bool: return all((self.has_column_names, self.show_index_names, self.header)) def _get_formatted_index(self, frame: "DataFrame") -> List[str]: - # Note: this is only used by to_string() and to_latex(), not by - # to_html(). so safe to cast col_space here. - col_space = {k: cast(int, v) for k, v in self.col_space.items()} index = frame.index columns = frame.columns fmt = self._get_formatter("__index__") @@ -1068,7 +1061,10 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: fmt_index = [ tuple( _make_fixed_width( - list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj, + list(x), + justify="left", + minimum=self.col_space.get("", 0), + adj=self.adj, ) ) for x in fmt_index diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 2eb1916a5da7e..3c40a2ae8d6b8 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1066,7 +1066,7 @@ def test_to_string_with_column_specific_col_space_raises(self): def test_to_string_with_column_specific_col_space(self): df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"]) - + result = df.to_string(col_space={"a": 10, "b": 11, "c": 12}) # 3 separating space + each col_space for (id, a, b, c) assert len(result.split("\n")[1]) == (3 + 1 + 10 + 11 + 12) From 7323e804a9d9a7279d3db57253bcd8a4921b7525 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Thu, 18 Jun 2020 12:31:30 +0700 Subject: [PATCH 16/17] add whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 10522ff797c59..c1ab04a136e0f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -294,6 +294,7 @@ Other enhancements - :meth:`~pandas.io.json.read_json` now accepts `nrows` parameter. (:issue:`33916`). - :meth `~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`). - :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`). +- :meth:`DataFrame.to_html` and :meth:`DataFrame.to_string`'s ``col_space`` parameter now accepts a list of dict to change only some specific columns' width (:issue:`28917`). .. --------------------------------------------------------------------------- From 7d14cff25d3a22b6d36351d1f84b0d81692bbb14 Mon Sep 17 00:00:00 2001 From: Quang Nguyen Date: Thu, 18 Jun 2020 13:38:24 +0700 Subject: [PATCH 17/17] Revert remove cast --- pandas/io/formats/format.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 8c36447827681..68a88fee83187 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -593,6 +593,7 @@ def __init__( ) self.col_space = col_space else: + col_space = cast(Sequence, col_space) if len(frame.columns) != len(col_space): raise ValueError( f"Col_space length({len(col_space)}) should match " @@ -725,6 +726,10 @@ def _to_str_columns(self) -> List[List[str]]: """ Render a DataFrame to a list of columns (as lists of strings). """ + # this method is not used by to_html where self.col_space + # could be a string so safe to cast + col_space = {k: cast(int, v) for k, v in self.col_space.items()} + frame = self.tr_frame # may include levels names also @@ -735,10 +740,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): fmt_values = self._format_col(i) fmt_values = _make_fixed_width( - fmt_values, - self.justify, - minimum=self.col_space.get(c, 0), - adj=self.adj, + fmt_values, self.justify, minimum=col_space.get(c, 0), adj=self.adj, ) stringified.append(fmt_values) else: @@ -762,7 +764,7 @@ def _to_str_columns(self) -> List[List[str]]: for i, c in enumerate(frame): cheader = str_columns[i] header_colwidth = max( - self.col_space.get(c, 0), *(self.adj.len(x) for x in cheader) + col_space.get(c, 0), *(self.adj.len(x) for x in cheader) ) fmt_values = self._format_col(i) fmt_values = _make_fixed_width( @@ -1044,6 +1046,9 @@ def show_col_idx_names(self) -> bool: return all((self.has_column_names, self.show_index_names, self.header)) def _get_formatted_index(self, frame: "DataFrame") -> List[str]: + # Note: this is only used by to_string() and to_latex(), not by + # to_html(). so safe to cast col_space here. + col_space = {k: cast(int, v) for k, v in self.col_space.items()} index = frame.index columns = frame.columns fmt = self._get_formatter("__index__") @@ -1061,10 +1066,7 @@ def _get_formatted_index(self, frame: "DataFrame") -> List[str]: fmt_index = [ tuple( _make_fixed_width( - list(x), - justify="left", - minimum=self.col_space.get("", 0), - adj=self.adj, + list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj, ) ) for x in fmt_index