pandas-dev
diff --git a/‎.github/workflows/sdist.yml
+64 b/‎.github/workflows/sdist.yml
+64
diff --git a/‎asv_bench/benchmarks/algos/isin.py
+10 b/‎asv_bench/benchmarks/algos/isin.py
+10
diff --git a/‎doc/source/user_guide/indexing.rst
+4-5 b/‎doc/source/user_guide/indexing.rst
+4-5
diff --git a/‎doc/source/whatsnew/v1.2.5.rst
+7-27 b/‎doc/source/whatsnew/v1.2.5.rst
+7-27
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
+3 b/‎doc/source/whatsnew/v1.3.0.rst
+3
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
+1-1 b/‎doc/source/whatsnew/v1.4.0.rst
+1-1
diff --git a/‎pandas/_libs/src/klib/khash_python.h
+3 b/‎pandas/_libs/src/klib/khash_python.h
+3
diff --git a/‎pandas/_libs/tslibs/timestamps.pyx
+8-1 b/‎pandas/_libs/tslibs/timestamps.pyx
+8-1
diff --git a/‎pandas/core/algorithms.py
+5-1 b/‎pandas/core/algorithms.py
+5-1
diff --git a/‎pandas/core/arrays/categorical.py
+5-1 b/‎pandas/core/arrays/categorical.py
+5-1
diff --git a/‎pandas/core/frame.py
+74-23 b/‎pandas/core/frame.py
+74-23
diff --git a/‎pandas/core/groupby/groupby.py
+5-1 b/‎pandas/core/groupby/groupby.py
+5-1
@@ -0,0 +1,64 @@
+name: sdist
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+      - 1.2.x
+      - 1.3.x
+    paths-ignore:
+      - "doc/**"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.7", "3.8", "3.9"]
+
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        fetch-depth: 0
+
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip setuptools wheel
+
+        # GH 39416
+        pip install numpy
+
+    - name: Build pandas sdist
+      run: |
+        pip list
+        python setup.py sdist --formats=gztar
+
+    - uses: conda-incubator/setup-miniconda@v2
+      with:
+        activate-environment: pandas-sdist
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install pandas from sdist
+      run: |
+        conda list
+        python -m pip install dist/*.gz
+
+    - name: Import pandas
+      run: |
+        cd ..
+        conda list
+        python -c "import pandas; pandas.show_versions();"
@@ -325,3 +325,13 @@ def setup(self, dtype, series_type):
 
     def time_isin(self, dtypes, series_type):
         self.series.isin(self.values)
+
+
+class IsInWithLongTupples:
+    def setup(self):
+        t = tuple(range(1000))
+        self.series = Series([t] * 1000)
+        self.values = [t]
+
+    def time_isin(self):
+        self.series.isin(self.values)
@@ -1523,18 +1523,17 @@ Looking up values by index/column labels
 ----------------------------------------
 
 Sometimes you want to extract a set of values given a sequence of row labels
-and column labels, this can be achieved by ``DataFrame.melt`` combined by filtering the corresponding
-rows with ``DataFrame.loc``.  For instance:
+and column labels, this can be achieved by ``pandas.factorize``  and NumPy indexing.
+For instance:
 
 .. ipython:: python
 
     df = pd.DataFrame({'col': ["A", "A", "B", "B"],
                        'A': [80, 23, np.nan, 22],
                        'B': [80, 55, 76, 67]})
     df
-    melt = df.melt('col')
-    melt = melt.loc[melt['col'] == melt['variable'], 'value']
-    melt.reset_index(drop=True)
+    idx, cols = pd.factorize(df['col'])
+    df.reindex(cols, axis=1).to_numpy()[np.arange(len(df)), idx]
 
 Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method
 which was deprecated in version 1.2.0.
 
@@ -1,7 +1,7 @@
 .. _whatsnew_125:
 
-What's new in 1.2.5 (May ??, 2021)
-----------------------------------
+What's new in 1.2.5 (June 22, 2021)
+-----------------------------------
 
 These are the changes in pandas 1.2.5. See :ref:`release` for a full changelog
 including other versions of pandas.
@@ -14,32 +14,12 @@ including other versions of pandas.
 
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
-- Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
+- Fixed regression in :func:`concat` between two :class:`DataFrame` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
 - Fixed regression in :meth:`DataFrame.sum` and :meth:`DataFrame.prod` when ``min_count`` and ``numeric_only`` are both given (:issue:`41074`)
-- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
-- Regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
-- Regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
-
-.. ---------------------------------------------------------------------------
-
-
-.. _whatsnew_125.bug_fixes:
-
-Bug fixes
-~~~~~~~~~
-
--
--
-
-.. ---------------------------------------------------------------------------
-
-.. _whatsnew_125.other:
-
-Other
-~~~~~
-
--
--
+- Fixed regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
+- Fixed regression in :meth:`DataFrame.replace` and :meth:`Series.replace` when the values to replace is a NumPy float array (:issue:`40371`)
+- Fixed regression in :func:`ExcelFile` when a corrupt file is opened but not closed (:issue:`41778`)
+- Fixed regression in :meth:`DataFrame.astype` with ``dtype=str`` failing to convert ``NaN`` in categorical columns (:issue:`41797`)
 
 .. ---------------------------------------------------------------------------
 
 
@@ -269,12 +269,14 @@ Other enhancements
 - :meth:`read_csv` and :meth:`read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
 - :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
+- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising with ``object`` data containing ``pd.NA`` even when ``skipna=True`` (:issue:`37501`)
 - :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`)
 - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`)
 - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`)
 - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`)
 - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`)
 - Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`)
+- :meth:`DataFrame.explode` now supports exploding multiple columns. Its ``column`` argument now also accepts a list of str or tuples for exploding on multiple columns at the same time (:issue:`39240`)
 
 .. ---------------------------------------------------------------------------
 
@@ -914,6 +916,7 @@ Datetimelike
 - Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise a ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`)
 - Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype or a ``timedelta`` object out of bounds for ``timedelta64[ns]`` dtype (:issue:`38792`, :issue:`38965`)
 - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
+- Bug in :meth:`DatetimeIndex.intersection` giving incorrect results with non-Tick frequencies with ``n != 1`` (:issue:`42104`)
 - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
 - Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
 - Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`)
 
@@ -96,7 +96,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
--
+- Deprecated :meth:`Index.is_type_compatible` (:issue:`42113`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -226,6 +226,9 @@ int PANDAS_INLINE tupleobject_cmp(PyTupleObject* a, PyTupleObject* b){
 
 
 int PANDAS_INLINE pyobject_cmp(PyObject* a, PyObject* b) {
+    if (a == b) {
+        return 1;
+    }
     if (Py_TYPE(a) == Py_TYPE(b)) {
         // special handling for some built-in types which could have NaNs
         // as we would like to have them equivalent, but the usual
 
@@ -129,6 +129,13 @@ cdef inline object create_timestamp_from_ts(int64_t value,
     return ts_base
 
 
+def _unpickle_timestamp(value, freq, tz):
+    # GH#41949 dont warn on unpickle if we have a freq
+    ts = Timestamp(value, tz=tz)
+    ts._set_freq(freq)
+    return ts
+
+
 # ----------------------------------------------------------------------
 
 def integer_op_not_supported(obj):
@@ -725,7 +732,7 @@ cdef class _Timestamp(ABCTimestamp):
 
     def __reduce__(self):
         object_state = self.value, self._freq, self.tzinfo
-        return (Timestamp, object_state)
+        return (_unpickle_timestamp, object_state)
 
     # -----------------------------------------------------------------
     # Rendering Methods
 
@@ -140,7 +140,11 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
             return np.asarray(values).view("uint8"), values.dtype
         else:
             # i.e. all-bool Categorical, BooleanArray
-            return np.asarray(values).astype("uint8", copy=False), values.dtype
+            try:
+                return np.asarray(values).astype("uint8", copy=False), values.dtype
+            except TypeError:
+                # GH#42107 we have pd.NAs present
+                return np.asarray(values), values.dtype
 
     elif is_integer_dtype(values.dtype):
         return np.asarray(values), values.dtype
 
@@ -26,6 +26,7 @@
     NaT,
     algos as libalgos,
     hashtable as htable,
+    lib,
 )
 from pandas._libs.arrays import NDArrayBacked
 from pandas._libs.lib import no_default
@@ -523,14 +524,17 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
             try:
                 new_cats = np.asarray(self.categories)
                 new_cats = new_cats.astype(dtype=dtype, copy=copy)
+                fill_value = lib.item_from_zerodim(np.array(np.nan).astype(dtype))
             except (
                 TypeError,  # downstream error msg for CategoricalIndex is misleading
                 ValueError,
             ):
                 msg = f"Cannot cast {self.categories.dtype} dtype to {dtype}"
                 raise ValueError(msg)
 
-            result = take_nd(new_cats, ensure_platform_int(self._codes))
+            result = take_nd(
+                new_cats, ensure_platform_int(self._codes), fill_value=fill_value
+            )
 
         return result
 
 
@@ -8144,16 +8144,27 @@ def stack(self, level: Level = -1, dropna: bool = True):
 
         return result.__finalize__(self, method="stack")
 
-    def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
+    def explode(
+        self,
+        column: str | tuple | list[str | tuple],
+        ignore_index: bool = False,
+    ) -> DataFrame:
         """
         Transform each element of a list-like to a row, replicating index values.
 
         .. versionadded:: 0.25.0
 
         Parameters
         ----------
-        column : str or tuple
-            Column to explode.
+        column : str or tuple or list thereof
+            Column(s) to explode.
+            For multiple columns, specify a non-empty list with each element
+            be str or tuple, and all specified columns their list-like data
+            on same row of the frame must have matching length.
+
+            .. versionadded:: 1.3.0
+                Multi-column explode
+
         ignore_index : bool, default False
             If True, the resulting index will be labeled 0, 1, …, n - 1.
 
@@ -8168,7 +8179,10 @@ def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
         Raises
         ------
         ValueError :
-            if columns of the frame are not unique.
+            * If columns of the frame are not unique.
+            * If specified columns to explode is empty list.
+            * If specified columns to explode have not matching count of
+              elements rowwise in the frame.
 
         See Also
         --------
@@ -8187,32 +8201,69 @@ def explode(self, column: str | tuple, ignore_index: bool = False) -> DataFrame:
 
         Examples
         --------
-        >>> df = pd.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
+        >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]],
+        ...                    'B': 1,
+        ...                    'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]})
         >>> df
-                   A  B
-        0  [1, 2, 3]  1
-        1        foo  1
-        2         []  1
-        3     [3, 4]  1
+                   A  B          C
+        0  [0, 1, 2]  1  [a, b, c]
+        1        foo  1        NaN
+        2         []  1         []
+        3     [3, 4]  1     [d, e]
+
+        Single-column explode.
 
         >>> df.explode('A')
-             A  B
-        0    1  1
-        0    2  1
-        0    3  1
-        1  foo  1
-        2  NaN  1
-        3    3  1
-        3    4  1
-        """
-        if not (is_scalar(column) or isinstance(column, tuple)):
-            raise ValueError("column must be a scalar")
+             A  B          C
+        0    0  1  [a, b, c]
+        0    1  1  [a, b, c]
+        0    2  1  [a, b, c]
+        1  foo  1        NaN
+        2  NaN  1         []
+        3    3  1     [d, e]
+        3    4  1     [d, e]
+
+        Multi-column explode.
+
+        >>> df.explode(list('AC'))
+             A  B    C
+        0    0  1    a
+        0    1  1    b
+        0    2  1    c
+        1  foo  1  NaN
+        2  NaN  1  NaN
+        3    3  1    d
+        3    4  1    e
+        """
         if not self.columns.is_unique:
             raise ValueError("columns must be unique")
 
+        columns: list[str | tuple]
+        if is_scalar(column) or isinstance(column, tuple):
+            assert isinstance(column, (str, tuple))
+            columns = [column]
+        elif isinstance(column, list) and all(
+            map(lambda c: is_scalar(c) or isinstance(c, tuple), column)
+        ):
+            if not column:
+                raise ValueError("column must be nonempty")
+            if len(column) > len(set(column)):
+                raise ValueError("column must be unique")
+            columns = column
+        else:
+            raise ValueError("column must be a scalar, tuple, or list thereof")
+
         df = self.reset_index(drop=True)
-        result = df[column].explode()
-        result = df.drop([column], axis=1).join(result)
+        if len(columns) == 1:
+            result = df[columns[0]].explode()
+        else:
+            mylen = lambda x: len(x) if is_list_like(x) else -1
+            counts0 = self[columns[0]].apply(mylen)
+            for c in columns[1:]:
+                if not all(counts0 == self[c].apply(mylen)):
+                    raise ValueError("columns must have matching element counts")
+            result = DataFrame({c: df[c].explode() for c in columns})
+        result = df.drop(columns, axis=1).join(result)
         if ignore_index:
             result.index = ibase.default_index(len(result))
         else:
 
@@ -1519,7 +1519,11 @@ def _bool_agg(self, val_test, skipna):
 
         def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
             if is_object_dtype(vals):
-                vals = np.array([bool(x) for x in vals])
+                # GH#37501: don't raise on pd.NA when skipna=True
+                if skipna:
+                    vals = np.array([bool(x) if not isna(x) else True for x in vals])
+                else:
+                    vals = np.array([bool(x) for x in vals])
             elif isinstance(vals, BaseMaskedArray):
                 vals = vals._data.astype(bool, copy=False)
             else:
Original file line number	Diff line number	Diff line change
`@@ -96,7 +96,7 @@ Other API changes`
`96`	`96`
`97`	`97`	`Deprecations`
`98`	`98`	`~~~~~~~~~~~~`
`99`		`--`
	`99`	+- Deprecated :meth:`Index.is_type_compatible` (:issue:`42113`)
`100`	`100`	`-`
`101`	`101`
`102`	`102`	`.. ---------------------------------------------------------------------------`