vamsi-verma-s
diff --git a/‎.github/workflows/python-dev.yml
+3-3 b/‎.github/workflows/python-dev.yml
+3-3
diff --git a/‎.github/workflows/wheels.yml
+2-12 b/‎.github/workflows/wheels.yml
+2-12
diff --git a/‎.pre-commit-config.yaml
+7 b/‎.pre-commit-config.yaml
+7
diff --git a/‎README.md
-2 b/‎README.md
-2
diff --git a/‎asv_bench/benchmarks/index_object.py
+9 b/‎asv_bench/benchmarks/index_object.py
+9
diff --git a/‎doc/source/development/maintaining.rst
+43 b/‎doc/source/development/maintaining.rst
+43
diff --git a/‎doc/source/user_guide/io.rst
+8 b/‎doc/source/user_guide/io.rst
+8
diff --git a/‎doc/source/whatsnew/v1.5.1.rst
+1 b/‎doc/source/whatsnew/v1.5.1.rst
+1
diff --git a/‎doc/source/whatsnew/v1.6.0.rst
+6-1 b/‎doc/source/whatsnew/v1.6.0.rst
+6-1
diff --git a/‎environment.yml
+1-1 b/‎environment.yml
+1-1
diff --git a/‎pandas/_libs/parsers.pyx
+13-3 b/‎pandas/_libs/parsers.pyx
+13-3
diff --git a/‎pandas/_libs/tslibs/parsing.pyx
-4 b/‎pandas/_libs/tslibs/parsing.pyx
-4
diff --git a/‎pandas/_libs/tslibs/timedeltas.pxd
+1 b/‎pandas/_libs/tslibs/timedeltas.pxd
+1
@@ -54,7 +54,7 @@ jobs:
         os: [ubuntu-latest, macOS-latest, windows-latest]
 
     name: actions-311-dev
-    timeout-minutes: 80
+    timeout-minutes: 120
 
     concurrency:
       #https://github.community/t/concurrecy-not-work-for-push/183068/7
@@ -75,7 +75,7 @@ jobs:
       run: |
         python --version
         python -m pip install --upgrade pip setuptools wheel
-        python -m pip install git+https://github.com/numpy/numpy.git
+        python -m pip install -i https://pypi.anaconda.org/scipy-wheels-nightly/simple numpy
         python -m pip install git+https://github.com/nedbat/coveragepy.git
         python -m pip install python-dateutil pytz cython hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-cov pytest-asyncio>=0.17
         python -m pip list
@@ -84,7 +84,7 @@ jobs:
     - name: Build Pandas
       run: |
         python setup.py build_ext -q -j1
-        python -m pip install -e . --no-build-isolation --no-use-pep517
+        python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
 
     - name: Build Version
       run: |
 
@@ -54,7 +54,6 @@ jobs:
         # TODO: support PyPy?
         python: [["cp38", "3.8"], ["cp39", "3.9"], ["cp310", "3.10"],  ["cp311", "3.11-dev"]]# "pp38", "pp39"]
     env:
-      IS_32_BIT: ${{ matrix.buildplat[1] == 'win32' }}
       IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
       IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
     steps:
@@ -72,15 +71,6 @@ jobs:
         uses: pypa/[email protected]
         env:
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
-          CIBW_ENVIRONMENT: IS_32_BIT='${{ env.IS_32_BIT }}'
-          # We can't test directly with cibuildwheel, since we need to have to wheel location
-          # to mount into the docker image
-          CIBW_TEST_COMMAND_LINUX: "python {project}/ci/test_wheels.py"
-          CIBW_TEST_COMMAND_MACOS: "python {project}/ci/test_wheels.py"
-          CIBW_TEST_REQUIRES: hypothesis==6.52.1 pytest>=6.2.5 pytest-xdist pytest-asyncio>=0.17
-          CIBW_REPAIR_WHEEL_COMMAND_WINDOWS: "python ci/fix_wheels.py {wheel} {dest_dir}"
-          CIBW_ARCHS_MACOS: x86_64 universal2
-          CIBW_BUILD_VERBOSITY: 3
 
       # Used to test the built wheels
       - uses: actions/setup-python@v3
@@ -118,7 +108,7 @@ jobs:
 
       - name: Upload wheels
         if: success()
-        shell: bash
+        shell: bash -el {0}
         env:
           PANDAS_STAGING_UPLOAD_TOKEN: ${{ secrets.PANDAS_STAGING_UPLOAD_TOKEN }}
           PANDAS_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.PANDAS_NIGHTLY_UPLOAD_TOKEN }}
@@ -195,7 +185,7 @@ jobs:
 
       - name: Upload sdist
         if: success()
-        shell: bash
+        shell: bash -el {0}
         env:
           PANDAS_STAGING_UPLOAD_TOKEN: ${{ secrets.PANDAS_STAGING_UPLOAD_TOKEN }}
           PANDAS_NIGHTLY_UPLOAD_TOKEN: ${{ secrets.PANDAS_NIGHTLY_UPLOAD_TOKEN }}
 
@@ -226,6 +226,13 @@ repos:
         entry: python scripts/no_bool_in_generic.py
         language: python
         files: ^pandas/core/generic\.py$
+    -   id: no-return-exception
+        name: Use raise instead of return for exceptions
+        language: pygrep
+        entry: 'return [A-Za-z]+(Error|Exit|Interrupt|Exception|Iteration)'
+        files: ^pandas/
+        types: [python]
+        exclude: ^pandas/tests/
     -   id: pandas-errors-documented
         name: Ensure pandas errors are documented in doc/source/reference/testing.rst
         entry: python scripts/pandas_errors_documented.py
 
@@ -128,8 +128,6 @@ or for installing in [development mode](https://pip.pypa.io/en/latest/cli/pip_in
 python -m pip install -e . --no-build-isolation --no-use-pep517
 ```
 
-If you have `make`, you can also use `make develop` to run the same command.
-
 or alternatively
 
 ```sh
 
@@ -65,6 +65,15 @@ def time_datetime_difference_disjoint(self):
         self.datetime_left.difference(self.datetime_right)
 
 
+class UnionWithDuplicates:
+    def setup(self):
+        self.left = Index(np.repeat(np.arange(1000), 100))
+        self.right = Index(np.tile(np.arange(500, 1500), 50))
+
+    def time_union_with_duplicates(self):
+        self.left.union(self.right)
+
+
 class Range:
     def setup(self):
         self.idx_inc = RangeIndex(start=0, stop=10**6, step=3)
 
@@ -121,6 +121,49 @@ Here's a typical workflow for triaging a newly opened issue.
    unless it's know that this issue should be addressed in a specific release (say
    because it's a large regression).
 
+.. _maintaining.regressions:
+
+Investigating regressions
+-------------------------
+
+Regressions are bugs that unintentionally break previously working code. The common way
+to  investigate regressions is by using
+`git bisect <https://git-scm.com/docs/git-bisect>`_,
+which finds the first commit that introduced the bug.
+
+For example: a user reports that ``pd.Series([1, 1]).sum()`` returns ``3``
+in pandas version ``1.5.0`` while in version ``1.4.0`` it returned ``2``. To begin,
+create a file ``t.py`` in your pandas directory, which contains
+
+.. code-block:: python
+
+    import pandas as pd
+    assert pd.Series([1, 1]).sum() == 2
+
+and then run::
+
+    git bisect start
+    git bisect good v1.4.0
+    git bisect bad v1.5.0
+    git bisect run bash -c "python setup.py build_ext -j 4; python t.py"
+
+This finds the first commit that changed the behavior. The C extensions have to be
+rebuilt at every step, so the search can take a while.
+
+Exit bisect and rebuild the current version::
+
+    git bisect reset
+    python setup.py build_ext -j 4
+
+Report your findings under the corresponding issue and ping the commit author to get
+their input.
+
+.. note::
+    In the ``bisect run`` command above, commits are considered good if ``t.py`` exits
+    with ``0`` and bad otherwise. When raising an exception is the desired behavior,
+    wrap the code in an appropriate ``try/except`` statement. See :issue:`35685` for
+    more examples.
+
 .. _maintaining.closing:
 
 Closing issues
 
@@ -197,6 +197,14 @@ dtype : Type name or dict of column -> type, default ``None``
      Support for defaultdict was added. Specify a defaultdict as input where
      the default determines the dtype of the columns which are not explicitly
      listed.
+
+use_nullable_dtypes : bool = False
+    Whether or not to use nullable dtypes as default when reading data. If
+    set to True, nullable dtypes are used for all dtypes that have a nullable
+    implementation, even if no nulls are present.
+
+    .. versionadded:: 2.0
+
 engine : {``'c'``, ``'python'``, ``'pyarrow'``}
   Parser engine to use. The C and pyarrow engines are faster, while the python engine
   is currently more feature-complete. Multithreading is currently only supported by
 
@@ -79,6 +79,7 @@ Fixed regressions
 - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
 - Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`)
 - Fixed regression in :func:`to_datetime` when ``arg`` was a date string with nanosecond and ``format`` contained ``%f`` would raise a ``ValueError`` (:issue:`48767`)
+- Fixed regression in :func:`assert_frame_equal` raising for :class:`MultiIndex` with :class:`Categorical` and ``check_like=True`` (:issue:`48975`)
 - Fixed regression in :meth:`DataFrame.fillna` replacing wrong values for ``datetime64[ns]`` dtype and ``inplace=True`` (:issue:`48863`)
 - Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
 - Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
 
@@ -32,6 +32,7 @@ Other enhancements
 - :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
 - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
 - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
+- Added new argument ``use_nullable_dtypes`` to :func:`read_csv` to enable automatic conversion to nullable dtypes (:issue:`36712`)
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
 - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
 - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
@@ -118,6 +119,7 @@ Other API changes
 - Passing ``nanoseconds`` greater than 999 or less than 0 in :class:`Timestamp` now raises a ``ValueError`` (:issue:`48538`, :issue:`48255`)
 - :func:`read_csv`: specifying an incorrect number of columns with ``index_col`` of now raises ``ParserError`` instead of ``IndexError`` when using the c parser.
 - :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting datetime64 data to any of "datetime64[s]", "datetime64[ms]", "datetime64[us]" will return an object with the given resolution instead of coercing back to "datetime64[ns]" (:issue:`48928`)
+- :meth:`DataFrame.astype`, :meth:`Series.astype`, and :meth:`DatetimeIndex.astype` casting timedelta64 data to any of "timedelta64[s]", "timedelta64[ms]", "timedelta64[us]" will return an object with the given resolution instead of coercing to "float64" dtype (:issue:`48963`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -140,6 +142,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.difference` (:issue:`48606`)
 - Performance improvement in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.var`, and :meth:`.SeriesGroupBy.var` for extension array dtypes (:issue:`37493`)
 - Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`)
+- Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`)
 - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`)
 - Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`)
 - Performance improvement for :class:`DatetimeIndex` constructor passing a list (:issue:`48609`)
@@ -151,6 +154,7 @@ Performance improvements
 - Performance improvement in ``var`` for nullable dtypes (:issue:`48379`).
 - Performance improvements to :func:`read_sas` (:issue:`47403`, :issue:`47405`, :issue:`47656`, :issue:`48502`)
 - Memory improvement in :meth:`RangeIndex.sort_values` (:issue:`48801`)
+- Performance improvement in :class:`DataFrameGroupBy` and :class:`SeriesGroupBy` when ``by`` is a categorical type and ``sort=False`` (:issue:`48976`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_160.bug_fixes:
@@ -219,11 +223,12 @@ Missing
 
 MultiIndex
 ^^^^^^^^^^
+- Bug in :meth:`MultiIndex.argsort` raising ``TypeError`` when index contains :attr:`NA` (:issue:`48495`)
 - Bug in :meth:`MultiIndex.difference` losing extension array dtype (:issue:`48606`)
 - Bug in :class:`MultiIndex.set_levels` raising ``IndexError`` when setting empty level (:issue:`48636`)
 - Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`)
 - Bug in :meth:`MultiIndex.intersection` losing extension array (:issue:`48604`)
-- Bug in :meth:`MultiIndex.union` losing extension array (:issue:`48498`, :issue:`48505`)
+- Bug in :meth:`MultiIndex.union` losing extension array (:issue:`48498`, :issue:`48505`, :issue:`48900`)
 - Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
 - Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
 -
 
@@ -100,7 +100,7 @@ dependencies:
   - natsort  # DataFrame.sort_values doctest
   - numpydoc
   - pandas-dev-flaker=0.5.0
-  - pydata-sphinx-theme
+  - pydata-sphinx-theme<0.11
   - pytest-cython  # doctest
   - sphinx
   - sphinx-panels
 
@@ -342,6 +342,7 @@ cdef class TextReader:
         object index_col
         object skiprows
         object dtype
+        bint use_nullable_dtypes
         object usecols
         set unnamed_cols  # set[str]
 
@@ -380,7 +381,8 @@ cdef class TextReader:
                   bint mangle_dupe_cols=True,
                   float_precision=None,
                   bint skip_blank_lines=True,
-                  encoding_errors=b"strict"):
+                  encoding_errors=b"strict",
+                  use_nullable_dtypes=False):
 
         # set encoding for native Python and C library
         if isinstance(encoding_errors, str):
@@ -505,6 +507,7 @@ cdef class TextReader:
         # - DtypeObj
         # - dict[Any, DtypeObj]
         self.dtype = dtype
+        self.use_nullable_dtypes = use_nullable_dtypes
 
         # XXX
         self.noconvert = set()
@@ -933,6 +936,7 @@ cdef class TextReader:
             bint na_filter = 0
             int64_t num_cols
             dict result
+            bint use_nullable_dtypes
 
         start = self.parser_start
 
@@ -1053,8 +1057,14 @@ cdef class TextReader:
                     self._free_na_set(na_hashset)
 
             # don't try to upcast EAs
-            if na_count > 0 and not is_extension_array_dtype(col_dtype):
-                col_res = _maybe_upcast(col_res)
+            if (
+                na_count > 0 and not is_extension_array_dtype(col_dtype)
+                or self.use_nullable_dtypes
+            ):
+                use_nullable_dtypes = self.use_nullable_dtypes and col_dtype is None
+                col_res = _maybe_upcast(
+                    col_res, use_nullable_dtypes=use_nullable_dtypes
+                )
 
             if col_res is None:
                 raise ParserError(f'Unable to parse column {i}')
 
@@ -963,10 +963,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
         datetime format string (for `strftime` or `strptime`),
         or None if it can't be guessed.
     """
-
-    if not isinstance(dt_str, str):
-        return None
-
     day_attribute_and_format = (('day',), '%d', 2)
 
     # attr name, format, padding (if any)
 
@@ -25,3 +25,4 @@ cdef class _Timedelta(timedelta):
     cdef _ensure_components(_Timedelta self)
     cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op)
     cdef _Timedelta _as_reso(self, NPY_DATETIMEUNIT reso, bint round_ok=*)
+    cpdef _maybe_cast_to_matching_resos(self, _Timedelta other)