diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 2f0f40de92056..a28e20a636ce2 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -288,6 +288,26 @@ def time_values_mixed_wide(self): self.df_mixed_wide.values +class ToRecords: + def setup(self): + N = 100_000 + data = np.random.randn(N, 2) + mi = MultiIndex.from_arrays( + [ + np.arange(N), + date_range("1970-01-01", periods=N, freq="ms"), + ] + ) + self.df = DataFrame(data) + self.df_mi = DataFrame(data, index=mi) + + def time_to_records(self): + self.df.to_records(index=True) + + def time_to_records_multiindex(self): + self.df_mi.to_records(index=True) + + class Repr: def setup(self): nrows = 10000 diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8a947e4505ec5..ac2faa673ca48 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -701,6 +701,7 @@ Performance improvements - Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`) - Performance improvement in :meth:`.GroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`) - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`45681`, :issue:`46040`, :issue:`46330`) +- Performance improvement in :meth:`DataFrame.to_records` when the index is a :class:`MultiIndex` (:issue:`47263`) - Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`) - Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`) - Performance improvement in :meth:`DataFrame.join` when left and/or right are empty (:issue:`46015`) @@ -762,6 +763,7 @@ Conversion - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) - Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) - Bug in metaclass of generic abstract dtypes causing :meth:`DataFrame.apply` and :meth:`Series.apply` to raise for the built-in function ``type`` (:issue:`46684`) +- Bug in :meth:`DataFrame.to_records` returning inconsistent numpy types if the index was a :class:`MultiIndex` (:issue:`47263`) - Bug in :meth:`DataFrame.to_dict` for ``orient="list"`` or ``orient="index"`` was not returning native types (:issue:`46751`) Strings diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 54ee5ed2f35d1..ed5ecd67b1ef3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2416,13 +2416,10 @@ def to_records( dtype=[('I', 'S1'), ('A', '