Skip to content

Commit 3d65dbf

Browse files
author
MarcoGorelli
committed
Merge remote-tracking branch 'upstream/main' into allow-mixed-iso
2 parents 6e6d579 + 0cee41f commit 3d65dbf

File tree

6 files changed

+116
-6
lines changed

6 files changed

+116
-6
lines changed

.circleci/config.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@ jobs:
1414
steps:
1515
- checkout
1616
- run: .circleci/setup_env.sh
17-
- run: PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH ci/run_tests.sh
17+
- run: >
18+
PATH=$HOME/miniconda3/envs/pandas-dev/bin:$HOME/miniconda3/condabin:$PATH
19+
LD_PRELOAD=$HOME/miniconda3/envs/pandas-dev/lib/libgomp.so.1:$LD_PRELOAD
20+
ci/run_tests.sh
1821
1922
workflows:
2023
test:

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ Other enhancements
160160
- Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`)
161161
- Improve exception message when using :func:`assert_frame_equal` on a :class:`DataFrame` to include the column that is compared (:issue:`50323`)
162162
- Improved error message for :func:`merge_asof` when join-columns were duplicated (:issue:`50102`)
163+
- Added support for extension array dtypes to :func:`get_dummies` (:func:`32430`)
163164
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
164165
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
165166
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
@@ -1106,6 +1107,7 @@ Groupby/resample/rolling
11061107
- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` would raise incorrectly when grouper had ``axis=1`` for ``"idxmin"`` and ``"idxmax"`` arguments (:issue:`45986`)
11071108
- Bug in :class:`.DataFrameGroupBy` would raise when used with an empty DataFrame, categorical grouper, and ``dropna=False`` (:issue:`50634`)
11081109
- Bug in :meth:`.SeriesGroupBy.value_counts` did not respect ``sort=False`` (:issue:`50482`)
1110+
- Bug in :meth:`.DataFrameGroupBy.resample` raises ``KeyError`` when getting the result from a key list when resampling on time index (:issue:`50840`)
11091111
-
11101112

11111113
Reshaping

pandas/core/resample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1202,7 +1202,7 @@ def _gotitem(self, key, ndim, subset=None):
12021202

12031203
# Try to select from a DataFrame, falling back to a Series
12041204
try:
1205-
if isinstance(key, list) and self.key not in key:
1205+
if isinstance(key, list) and self.key not in key and self.key is not None:
12061206
key.append(self.key)
12071207
groupby = self._groupby[key]
12081208
except IndexError:

pandas/core/reshape/encoding.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
is_integer_dtype,
1717
is_list_like,
1818
is_object_dtype,
19+
pandas_dtype,
1920
)
2021

2122
from pandas.core.arrays import SparseArray
@@ -240,9 +241,9 @@ def _get_dummies_1d(
240241

241242
if dtype is None:
242243
dtype = np.dtype(bool)
243-
dtype = np.dtype(dtype)
244+
_dtype = pandas_dtype(dtype)
244245

245-
if is_object_dtype(dtype):
246+
if is_object_dtype(_dtype):
246247
raise ValueError("dtype=object is not a valid dtype for get_dummies")
247248

248249
def get_empty_frame(data) -> DataFrame:
@@ -317,7 +318,12 @@ def get_empty_frame(data) -> DataFrame:
317318

318319
else:
319320
# take on axis=1 + transpose to ensure ndarray layout is column-major
320-
dummy_mat = np.eye(number_of_cols, dtype=dtype).take(codes, axis=1).T
321+
eye_dtype: NpDtype
322+
if isinstance(_dtype, np.dtype):
323+
eye_dtype = _dtype
324+
else:
325+
eye_dtype = np.bool_
326+
dummy_mat = np.eye(number_of_cols, dtype=eye_dtype).take(codes, axis=1).T
321327

322328
if not dummy_na:
323329
# reset NaN GH4446
@@ -327,7 +333,7 @@ def get_empty_frame(data) -> DataFrame:
327333
# remove first GH12042
328334
dummy_mat = dummy_mat[:, 1:]
329335
dummy_cols = dummy_cols[1:]
330-
return DataFrame(dummy_mat, index=index, columns=dummy_cols)
336+
return DataFrame(dummy_mat, index=index, columns=dummy_cols, dtype=_dtype)
331337

332338

333339
def from_dummies(

pandas/tests/resample/test_resampler_grouper.py

+79
Original file line numberDiff line numberDiff line change
@@ -536,3 +536,82 @@ def test_groupby_resample_size_all_index_same():
536536
),
537537
)
538538
tm.assert_series_equal(result, expected)
539+
540+
541+
def test_groupby_resample_on_index_with_list_of_keys():
542+
# GH 50840
543+
df = DataFrame(
544+
data={
545+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
546+
"val": [3, 1, 4, 1, 5, 9, 2, 6],
547+
},
548+
index=Series(
549+
date_range(start="2016-01-01", periods=8),
550+
name="date",
551+
),
552+
)
553+
result = df.groupby("group").resample("2D")[["val"]].mean()
554+
expected = DataFrame(
555+
data={
556+
"val": [2.0, 2.5, 7.0, 4.0],
557+
},
558+
index=Index(
559+
data=[
560+
(0, Timestamp("2016-01-01")),
561+
(0, Timestamp("2016-01-03")),
562+
(1, Timestamp("2016-01-05")),
563+
(1, Timestamp("2016-01-07")),
564+
],
565+
name=("group", "date"),
566+
),
567+
)
568+
tm.assert_frame_equal(result, expected)
569+
570+
571+
def test_groupby_resample_on_index_with_list_of_keys_multi_columns():
572+
# GH 50876
573+
df = DataFrame(
574+
data={
575+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
576+
"first_val": [3, 1, 4, 1, 5, 9, 2, 6],
577+
"second_val": [2, 7, 1, 8, 2, 8, 1, 8],
578+
"third_val": [1, 4, 1, 4, 2, 1, 3, 5],
579+
},
580+
index=Series(
581+
date_range(start="2016-01-01", periods=8),
582+
name="date",
583+
),
584+
)
585+
result = df.groupby("group").resample("2D")[["first_val", "second_val"]].mean()
586+
expected = DataFrame(
587+
data={
588+
"first_val": [2.0, 2.5, 7.0, 4.0],
589+
"second_val": [4.5, 4.5, 5.0, 4.5],
590+
},
591+
index=Index(
592+
data=[
593+
(0, Timestamp("2016-01-01")),
594+
(0, Timestamp("2016-01-03")),
595+
(1, Timestamp("2016-01-05")),
596+
(1, Timestamp("2016-01-07")),
597+
],
598+
name=("group", "date"),
599+
),
600+
)
601+
tm.assert_frame_equal(result, expected)
602+
603+
604+
def test_groupby_resample_on_index_with_list_of_keys_missing_column():
605+
# GH 50876
606+
df = DataFrame(
607+
data={
608+
"group": [0, 0, 0, 0, 1, 1, 1, 1],
609+
"val": [3, 1, 4, 1, 5, 9, 2, 6],
610+
},
611+
index=Series(
612+
date_range(start="2016-01-01", periods=8),
613+
name="date",
614+
),
615+
)
616+
with pytest.raises(KeyError, match="Columns not found"):
617+
df.groupby("group").resample("2D")[["val_not_in_dataframe"]].mean()

pandas/tests/reshape/test_get_dummies.py

+20
Original file line numberDiff line numberDiff line change
@@ -657,3 +657,23 @@ def test_get_dummies_with_string_values(self, values):
657657

658658
with pytest.raises(TypeError, match=msg):
659659
get_dummies(df, columns=values)
660+
661+
def test_get_dummies_ea_dtype_series(self, any_numeric_ea_dtype):
662+
# GH#32430
663+
ser = Series(list("abca"))
664+
result = get_dummies(ser, dtype=any_numeric_ea_dtype)
665+
expected = DataFrame(
666+
{"a": [1, 0, 0, 1], "b": [0, 1, 0, 0], "c": [0, 0, 1, 0]},
667+
dtype=any_numeric_ea_dtype,
668+
)
669+
tm.assert_frame_equal(result, expected)
670+
671+
def test_get_dummies_ea_dtype_dataframe(self, any_numeric_ea_dtype):
672+
# GH#32430
673+
df = DataFrame({"x": list("abca")})
674+
result = get_dummies(df, dtype=any_numeric_ea_dtype)
675+
expected = DataFrame(
676+
{"x_a": [1, 0, 0, 1], "x_b": [0, 1, 0, 0], "x_c": [0, 0, 1, 0]},
677+
dtype=any_numeric_ea_dtype,
678+
)
679+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)