Skip to content

Commit 3e4df3b

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 9ff1343 + 87d7855 commit 3e4df3b

File tree

17 files changed

+753
-449
lines changed

17 files changed

+753
-449
lines changed

ci/deps/actions-39-slow.yaml

+13-9
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,29 @@ dependencies:
1414

1515
# pandas dependencies
1616
- beautifulsoup4
17-
- fsspec>=0.7.4, <2021.6.0
17+
- bottleneck
18+
- fsspec>=0.8.0, <2021.6.0
19+
- gcsfs
1820
- html5lib
21+
- jinja2
1922
- lxml
2023
- matplotlib
24+
- moto>=1.3.14
25+
- flask
2126
- numexpr
2227
- numpy
2328
- openpyxl
24-
- patsy
25-
- psycopg2
26-
- pymysql
29+
- pyarrow
2730
- pytables
2831
- python-dateutil
2932
- pytz
30-
- s3fs>=0.4.0
31-
- moto>=1.3.14
33+
- s3fs>=0.4.2
3234
- scipy
3335
- sqlalchemy
3436
- xlrd
3537
- xlsxwriter
36-
- moto
37-
- flask
38-
- numba
38+
- xlwt
39+
- pyreadstat
40+
- pip
41+
- pip:
42+
- pyxlsb

ci/deps/actions-39.yaml

+23-4
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,30 @@ dependencies:
1212
- hypothesis>=5.5.3
1313

1414
# pandas dependencies
15+
- beautifulsoup4
16+
- bottleneck
17+
- fsspec>=0.8.0, <2021.6.0
18+
- gcsfs
19+
- html5lib
20+
- jinja2
21+
- lxml
22+
- matplotlib
23+
- moto>=1.3.14
24+
- flask
25+
- numexpr
1526
- numpy
27+
- openpyxl
28+
- pyarrow
29+
- pytables
1630
- python-dateutil
1731
- pytz
18-
19-
# optional dependencies
20-
- pytables
32+
- s3fs>=0.4.2
2133
- scipy
22-
- pyarrow=1.0
34+
- sqlalchemy
35+
- xlrd
36+
- xlsxwriter
37+
- xlwt
38+
- pyreadstat
39+
- pip
40+
- pip:
41+
- pyxlsb

doc/source/user_guide/style.ipynb

+13-9
Original file line numberDiff line numberDiff line change
@@ -1190,9 +1190,9 @@
11901190
"cell_type": "markdown",
11911191
"metadata": {},
11921192
"source": [
1193-
"In version 0.20.0 the ability to customize the bar chart further was given. You can now have the `df.style.bar` be centered on zero or midpoint value (in addition to the already existing way of having the min value at the left side of the cell), and you can pass a list of `[color_negative, color_positive]`.\n",
1193+
"Additional keyword arguments give more control on centering and positioning, and you can pass a list of `[color_negative, color_positive]` to highlight lower and higher values.\n",
11941194
"\n",
1195-
"Here's how you can change the above with the new `align='mid'` option:"
1195+
"Here's how you can change the above with the new `align` option, combined with setting `vmin` and `vmax` limits, the `width` of the figure, and underlying css `props` of cells, leaving space to display the text and the bars:"
11961196
]
11971197
},
11981198
{
@@ -1201,7 +1201,8 @@
12011201
"metadata": {},
12021202
"outputs": [],
12031203
"source": [
1204-
"df2.style.bar(subset=['A', 'B'], align='mid', color=['#d65f5f', '#5fba7d'])"
1204+
"df2.style.bar(align=0, vmin=-2.5, vmax=2.5, color=['#d65f5f', '#5fba7d'],\n",
1205+
" width=60, props=\"width: 120px; border-right: 1px solid black;\").format('{:.3f}', na_rep=\"\")"
12051206
]
12061207
},
12071208
{
@@ -1225,28 +1226,31 @@
12251226
"\n",
12261227
"# Test series\n",
12271228
"test1 = pd.Series([-100,-60,-30,-20], name='All Negative')\n",
1228-
"test2 = pd.Series([10,20,50,100], name='All Positive')\n",
1229-
"test3 = pd.Series([-10,-5,0,90], name='Both Pos and Neg')\n",
1229+
"test2 = pd.Series([-10,-5,0,90], name='Both Pos and Neg')\n",
1230+
"test3 = pd.Series([10,20,50,100], name='All Positive')\n",
1231+
"test4 = pd.Series([100, 103, 101, 102], name='Large Positive')\n",
1232+
"\n",
12301233
"\n",
12311234
"head = \"\"\"\n",
12321235
"<table>\n",
12331236
" <thead>\n",
12341237
" <th>Align</th>\n",
12351238
" <th>All Negative</th>\n",
1236-
" <th>All Positive</th>\n",
12371239
" <th>Both Neg and Pos</th>\n",
1240+
" <th>All Positive</th>\n",
1241+
" <th>Large Positive</th>\n",
12381242
" </thead>\n",
12391243
" </tbody>\n",
12401244
"\n",
12411245
"\"\"\"\n",
12421246
"\n",
1243-
"aligns = ['left','zero','mid']\n",
1247+
"aligns = ['left', 'right', 'zero', 'mid', 'mean', 99]\n",
12441248
"for align in aligns:\n",
12451249
" row = \"<tr><th>{}</th>\".format(align)\n",
1246-
" for series in [test1,test2,test3]:\n",
1250+
" for series in [test1,test2,test3, test4]:\n",
12471251
" s = series.copy()\n",
12481252
" s.name=''\n",
1249-
" row += \"<td>{}</td>\".format(s.to_frame().style.bar(align=align, \n",
1253+
" row += \"<td>{}</td>\".format(s.to_frame().style.hide_index().bar(align=align, \n",
12501254
" color=['#d65f5f', '#5fba7d'], \n",
12511255
" width=100).render()) #testn['width']\n",
12521256
" row += '</tr>'\n",

doc/source/whatsnew/v1.3.1.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Fixed regressions
1717
- Pandas could not be built on PyPy (:issue:`42355`)
1818
- :class:`DataFrame` constructed with with an older version of pandas could not be unpickled (:issue:`42345`)
1919
- Performance regression in constructing a :class:`DataFrame` from a dictionary of dictionaries (:issue:`42338`)
20+
- Fixed regression in :meth:`DataFrame.agg` dropping values when the DataFrame had an Extension Array dtype, a duplicate index, and ``axis=1`` (:issue:`42380`)
2021
-
2122

2223
.. ---------------------------------------------------------------------------
@@ -25,7 +26,7 @@ Fixed regressions
2526

2627
Bug fixes
2728
~~~~~~~~~
28-
-
29+
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
2930
-
3031

3132
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ enhancement2
3030
Other enhancements
3131
^^^^^^^^^^^^^^^^^^
3232
- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
33+
- Additional options added to :meth:`.Styler.bar` to control alignment and display (:issue:`26070`)
3334
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
3435
-
3536

@@ -160,7 +161,7 @@ Deprecations
160161
Performance improvements
161162
~~~~~~~~~~~~~~~~~~~~~~~~
162163
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
163-
-
164+
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
164165

165166
.. ---------------------------------------------------------------------------
166167

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3342,8 +3342,8 @@ def transpose(self, *args, copy: bool = False) -> DataFrame:
33423342
values = self.values
33433343

33443344
new_values = [arr_type._from_sequence(row, dtype=dtype) for row in values]
3345-
result = self._constructor(
3346-
dict(zip(self.index, new_values)), index=self.columns
3345+
result = type(self)._from_arrays(
3346+
new_values, index=self.columns, columns=self.index
33473347
)
33483348

33493349
else:

pandas/core/groupby/generic.py

+44-26
Original file line numberDiff line numberDiff line change
@@ -1308,42 +1308,35 @@ def _transform_general(self, func, *args, **kwargs):
13081308
gen = self.grouper.get_iterator(obj, axis=self.axis)
13091309
fast_path, slow_path = self._define_paths(func, *args, **kwargs)
13101310

1311-
for name, group in gen:
1312-
if group.size == 0:
1313-
continue
1311+
# Determine whether to use slow or fast path by evaluating on the first group.
1312+
# Need to handle the case of an empty generator and process the result so that
1313+
# it does not need to be computed again.
1314+
try:
1315+
name, group = next(gen)
1316+
except StopIteration:
1317+
pass
1318+
else:
13141319
object.__setattr__(group, "name", name)
1315-
1316-
# Try slow path and fast path.
13171320
try:
13181321
path, res = self._choose_path(fast_path, slow_path, group)
13191322
except TypeError:
13201323
return self._transform_item_by_item(obj, fast_path)
13211324
except ValueError as err:
13221325
msg = "transform must return a scalar value for each group"
13231326
raise ValueError(msg) from err
1324-
1325-
if isinstance(res, Series):
1326-
1327-
# we need to broadcast across the
1328-
# other dimension; this will preserve dtypes
1329-
# GH14457
1330-
if res.index.is_(obj.index):
1331-
r = concat([res] * len(group.columns), axis=1)
1332-
r.columns = group.columns
1333-
r.index = group.index
1334-
else:
1335-
r = self.obj._constructor(
1336-
np.concatenate([res.values] * len(group.index)).reshape(
1337-
group.shape
1338-
),
1339-
columns=group.columns,
1340-
index=group.index,
1341-
)
1342-
1343-
applied.append(r)
1344-
else:
1327+
if group.size > 0:
1328+
res = _wrap_transform_general_frame(self.obj, group, res)
13451329
applied.append(res)
13461330

1331+
# Compute and process with the remaining groups
1332+
for name, group in gen:
1333+
if group.size == 0:
1334+
continue
1335+
object.__setattr__(group, "name", name)
1336+
res = path(group)
1337+
res = _wrap_transform_general_frame(self.obj, group, res)
1338+
applied.append(res)
1339+
13471340
concat_index = obj.columns if self.axis == 0 else obj.index
13481341
other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1
13491342
concatenated = concat(applied, axis=self.axis, verify_integrity=False)
@@ -1853,3 +1846,28 @@ def func(df):
18531846
return self._python_apply_general(func, self._obj_with_exclusions)
18541847

18551848
boxplot = boxplot_frame_groupby
1849+
1850+
1851+
def _wrap_transform_general_frame(
1852+
obj: DataFrame, group: DataFrame, res: DataFrame | Series
1853+
) -> DataFrame:
1854+
from pandas import concat
1855+
1856+
if isinstance(res, Series):
1857+
# we need to broadcast across the
1858+
# other dimension; this will preserve dtypes
1859+
# GH14457
1860+
if res.index.is_(obj.index):
1861+
res_frame = concat([res] * len(group.columns), axis=1)
1862+
res_frame.columns = group.columns
1863+
res_frame.index = group.index
1864+
else:
1865+
res_frame = obj._constructor(
1866+
np.concatenate([res.values] * len(group.index)).reshape(group.shape),
1867+
columns=group.columns,
1868+
index=group.index,
1869+
)
1870+
assert isinstance(res_frame, DataFrame)
1871+
return res_frame
1872+
else:
1873+
return res

0 commit comments

Comments
 (0)