Skip to content

Commit ddcaad8

Browse files
committed
Merge remote-tracking branch 'upstream/master' into excel-read-shared-init-to-baseclass
2 parents a77a4c7 + 9feb3ad commit ddcaad8

File tree

9 files changed

+100
-31
lines changed

9 files changed

+100
-31
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ I/O
365365
- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
366366
- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
367367
- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
368+
- Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`)
368369
- Fixed bug in loading objects from S3 that contain ``#`` characters in the URL (:issue:`25945`)
369370
- Adds ``use_bqstorage_api`` parameter to :func:`read_gbq` to speed up downloads of large data frames. This feature requires version 0.10.0 of the ``pandas-gbq`` library as well as the ``google-cloud-bigquery-storage`` and ``fastavro`` libraries. (:issue:`26104`)
370371

pandas/core/frame.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,9 @@ def _repr_html_(self):
658658

659659
@Substitution(header='Write out the column names. If a list of strings '
660660
'is given, it is assumed to be aliases for the '
661-
'column names')
661+
'column names',
662+
col_space_type='int',
663+
col_space='The minimum width of each column')
662664
@Substitution(shared_params=fmt.common_docstring,
663665
returns=fmt.return_docstring)
664666
def to_string(self, buf=None, columns=None, col_space=None, header=True,
@@ -2138,7 +2140,12 @@ def to_parquet(self, fname, engine='auto', compression='snappy',
21382140
compression=compression, index=index,
21392141
partition_cols=partition_cols, **kwargs)
21402142

2141-
@Substitution(header='Whether to print column labels, default True')
2143+
@Substitution(header='Whether to print column labels, default True',
2144+
col_space_type='str or int',
2145+
col_space='The minimum width of each column in CSS length '
2146+
'units. An int is assumed to be px units.\n\n'
2147+
' .. versionadded:: 0.25.0\n'
2148+
' Abillity to use str')
21422149
@Substitution(shared_params=fmt.common_docstring,
21432150
returns=fmt.return_docstring)
21442151
def to_html(self, buf=None, columns=None, col_space=None, header=True,

pandas/core/groupby/grouper.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -522,21 +522,17 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
522522
any_arraylike = any(isinstance(g, (list, tuple, Series, Index, np.ndarray))
523523
for g in keys)
524524

525-
try:
525+
# is this an index replacement?
526+
if (not any_callable and not any_arraylike and not any_groupers and
527+
match_axis_length and level is None):
526528
if isinstance(obj, DataFrame):
527-
all_in_columns_index = all(g in obj.columns or g in obj.index.names
528-
for g in keys)
529+
all_in_columns_index = all(g in obj.columns or g in
530+
obj.index.names for g in keys)
529531
elif isinstance(obj, Series):
530532
all_in_columns_index = all(g in obj.index.names for g in keys)
531-
else:
532-
all_in_columns_index = False
533-
except Exception:
534-
all_in_columns_index = False
535533

536-
if (not any_callable and not all_in_columns_index and
537-
not any_arraylike and not any_groupers and
538-
match_axis_length and level is None):
539-
keys = [com.asarray_tuplesafe(keys)]
534+
if not all_in_columns_index:
535+
keys = [com.asarray_tuplesafe(keys)]
540536

541537
if isinstance(level, (tuple, list)):
542538
if key is None:

pandas/core/internals/blocks.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@
2727
CategoricalDtype, ExtensionDtype, PandasExtensionDtype)
2828
from pandas.core.dtypes.generic import (
2929
ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass,
30-
ABCSeries)
30+
ABCPandasArray, ABCSeries)
3131
from pandas.core.dtypes.missing import (
3232
_isna_compat, array_equivalent, isna, notna)
3333

3434
import pandas.core.algorithms as algos
3535
from pandas.core.arrays import (
36-
Categorical, DatetimeArray, ExtensionArray, TimedeltaArray)
36+
Categorical, DatetimeArray, ExtensionArray, PandasDtype, TimedeltaArray)
3737
from pandas.core.base import PandasObject
3838
import pandas.core.common as com
3939
from pandas.core.indexes.datetimes import DatetimeIndex
@@ -576,23 +576,14 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
576576

577577
return self.make_block(Categorical(self.values, dtype=dtype))
578578

579-
# convert dtypes if needed
580579
dtype = pandas_dtype(dtype)
580+
581581
# astype processing
582582
if is_dtype_equal(self.dtype, dtype):
583583
if copy:
584584
return self.copy()
585585
return self
586586

587-
klass = None
588-
if is_sparse(self.values):
589-
# special case sparse, Series[Sparse].astype(object) is sparse
590-
klass = ExtensionBlock
591-
elif is_object_dtype(dtype):
592-
klass = ObjectBlock
593-
elif is_extension_array_dtype(dtype):
594-
klass = ExtensionBlock
595-
596587
try:
597588
# force the copy here
598589
if values is None:
@@ -624,7 +615,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
624615
pass
625616

626617
newb = make_block(values, placement=self.mgr_locs,
627-
klass=klass, ndim=self.ndim)
618+
ndim=self.ndim)
628619
except Exception: # noqa: E722
629620
if errors == 'raise':
630621
raise
@@ -3041,6 +3032,13 @@ def get_block_type(values, dtype=None):
30413032

30423033
def make_block(values, placement, klass=None, ndim=None, dtype=None,
30433034
fastpath=None):
3035+
# Ensure that we don't allow PandasArray / PandasDtype in internals.
3036+
# For now, blocks should be backed by ndarrays when possible.
3037+
if isinstance(values, ABCPandasArray):
3038+
values = values.to_numpy()
3039+
if isinstance(dtype, PandasDtype):
3040+
dtype = dtype.numpy_dtype
3041+
30443042
if fastpath is not None:
30453043
# GH#19265 pyarrow is passing this
30463044
warnings.warn("fastpath argument is deprecated, will be removed "

pandas/io/formats/format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
Buffer to write to.
4242
columns : sequence, optional, default None
4343
The subset of columns to write. Writes all columns by default.
44-
col_space : int, optional
45-
The minimum width of each column.
44+
col_space : %(col_space_type)s, optional
45+
%(col_space)s.
4646
header : bool, optional
4747
%(header)s.
4848
index : bool, optional, default True

pandas/io/formats/html.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ def __init__(self, formatter, classes=None, border=None):
4545
self.border = border
4646
self.table_id = self.fmt.table_id
4747
self.render_links = self.fmt.render_links
48+
if isinstance(self.fmt.col_space, int):
49+
self.fmt.col_space = ('{colspace}px'
50+
.format(colspace=self.fmt.col_space))
4851

4952
@property
5053
def show_row_idx_names(self):
@@ -84,8 +87,30 @@ def write(self, s, indent=0):
8487
rs = pprint_thing(s)
8588
self.elements.append(' ' * indent + rs)
8689

87-
def write_th(self, s, indent=0, tags=None):
88-
if self.fmt.col_space is not None and self.fmt.col_space > 0:
90+
def write_th(self, s, header=False, indent=0, tags=None):
91+
"""
92+
Method for writting a formatted <th> cell.
93+
94+
If col_space is set on the formatter then that is used for
95+
the value of min-width.
96+
97+
Parameters
98+
----------
99+
s : object
100+
The data to be written inside the cell.
101+
header : boolean, default False
102+
Set to True if the <th> is for use inside <thead>. This will
103+
cause min-width to be set if there is one.
104+
indent : int, default 0
105+
The indentation level of the cell.
106+
tags : string, default None
107+
Tags to include in the cell.
108+
109+
Returns
110+
-------
111+
A written <th> cell.
112+
"""
113+
if header and self.fmt.col_space is not None:
89114
tags = (tags or "")
90115
tags += ('style="min-width: {colspace};"'
91116
.format(colspace=self.fmt.col_space))
@@ -136,7 +161,7 @@ def write_tr(self, line, indent=0, indent_delta=0, header=False,
136161
for i, s in enumerate(line):
137162
val_tag = tags.get(i, None)
138163
if header or (self.bold_rows and i < nindex_levels):
139-
self.write_th(s, indent, tags=val_tag)
164+
self.write_th(s, indent=indent, header=header, tags=val_tag)
140165
else:
141166
self.write_td(s, indent, tags=val_tag)
142167

pandas/tests/internals/test_internals.py

+20
Original file line numberDiff line numberDiff line change
@@ -1291,3 +1291,23 @@ def test_block_shape():
12911291

12921292
assert (a._data.blocks[0].mgr_locs.indexer ==
12931293
b._data.blocks[0].mgr_locs.indexer)
1294+
1295+
1296+
def test_make_block_no_pandas_array():
1297+
# https://github.com/pandas-dev/pandas/pull/24866
1298+
arr = pd.array([1, 2])
1299+
1300+
# PandasArray, no dtype
1301+
result = make_block(arr, slice(len(arr)))
1302+
assert result.is_integer is True
1303+
assert result.is_extension is False
1304+
1305+
# PandasArray, PandasDtype
1306+
result = make_block(arr, slice(len(arr)), dtype=arr.dtype)
1307+
assert result.is_integer is True
1308+
assert result.is_extension is False
1309+
1310+
# ndarray, PandasDtype
1311+
result = make_block(arr.to_numpy(), slice(len(arr)), dtype=arr.dtype)
1312+
assert result.is_integer is True
1313+
assert result.is_extension is False

pandas/tests/io/formats/test_to_html.py

+14
Original file line numberDiff line numberDiff line change
@@ -641,3 +641,17 @@ def test_to_html_round_column_headers():
641641
notebook = df.to_html(notebook=True)
642642
assert "0.55555" in html
643643
assert "0.556" in notebook
644+
645+
646+
@pytest.mark.parametrize("unit", ['100px', '10%', '5em', 150])
647+
def test_to_html_with_col_space_units(unit):
648+
# GH 25941
649+
df = DataFrame(np.random.random(size=(1, 3)))
650+
result = df.to_html(col_space=unit)
651+
result = result.split('tbody')[0]
652+
hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
653+
if isinstance(unit, int):
654+
unit = str(unit) + 'px'
655+
for h in hdrs:
656+
expected = '<th style="min-width: {unit};">'.format(unit=unit)
657+
assert expected in h

pandas/tests/series/test_internals.py

+8
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,14 @@ def test_constructor_no_pandas_array(self):
312312
tm.assert_series_equal(ser, result)
313313
assert isinstance(result._data.blocks[0], IntBlock)
314314

315+
def test_astype_no_pandas_dtype(self):
316+
# https://github.com/pandas-dev/pandas/pull/24866
317+
ser = pd.Series([1, 2], dtype="int64")
318+
# Don't have PandasDtype in the public API, so we use `.array.dtype`,
319+
# which is a PandasDtype.
320+
result = ser.astype(ser.array.dtype)
321+
tm.assert_series_equal(result, ser)
322+
315323
def test_from_array(self):
316324
result = pd.Series(pd.array(['1H', '2H'], dtype='timedelta64[ns]'))
317325
assert result._data.blocks[0].is_extension is False

0 commit comments

Comments
 (0)