sthagen
diff --git a/‎.pre-commit-config.yaml
+4-3 b/‎.pre-commit-config.yaml
+4-3
diff --git a/‎ci/deps/actions-38-db.yaml
+1-1 b/‎ci/deps/actions-38-db.yaml
+1-1
diff --git a/‎ci/deps/azure-windows-38.yaml
+1-1 b/‎ci/deps/azure-windows-38.yaml
+1-1
diff --git a/‎doc/source/user_guide/duplicates.rst
+1 b/‎doc/source/user_guide/duplicates.rst
+1
diff --git a/‎doc/source/whatsnew/v1.3.2.rst
+1-1 b/‎doc/source/whatsnew/v1.3.2.rst
+1-1
diff --git a/‎doc/source/whatsnew/v1.4.0.rst
+3-1 b/‎doc/source/whatsnew/v1.4.0.rst
+3-1
diff --git a/‎environment.yml
+1-1 b/‎environment.yml
+1-1
diff --git a/‎flake8/cython.cfg
+15-1 b/‎flake8/cython.cfg
+15-1
diff --git a/‎pandas/_libs/algos.pyx
+12-6 b/‎pandas/_libs/algos.pyx
+12-6
diff --git a/‎pandas/_libs/khash.pxd
+4-4 b/‎pandas/_libs/khash.pxd
+4-4
diff --git a/‎pandas/_libs/lib.pyx
+3-3 b/‎pandas/_libs/lib.pyx
+3-3
diff --git a/‎pandas/_libs/parsers.pyx
+3-3 b/‎pandas/_libs/parsers.pyx
+3-3
diff --git a/‎pandas/_libs/tslibs/conversion.pyx
+2-2 b/‎pandas/_libs/tslibs/conversion.pyx
+2-2
diff --git a/‎pandas/_libs/tslibs/np_datetime.pyx
+1-1 b/‎pandas/_libs/tslibs/np_datetime.pyx
+1-1
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
+2-2 b/‎pandas/_libs/tslibs/offsets.pyx
+2-2
diff --git a/‎pandas/_libs/tslibs/period.pyx
+1-1 b/‎pandas/_libs/tslibs/period.pyx
+1-1
diff --git a/‎pandas/_libs/tslibs/strptime.pyx
+4-4 b/‎pandas/_libs/tslibs/strptime.pyx
+4-4
diff --git a/‎pandas/_libs/tslibs/timedeltas.pyx
+2-1 b/‎pandas/_libs/tslibs/timedeltas.pyx
+2-1
diff --git a/‎pandas/_libs/tslibs/timestamps.pyx
+1-1 b/‎pandas/_libs/tslibs/timestamps.pyx
+1-1
diff --git a/‎pandas/_testing/__init__.py
+1-1 b/‎pandas/_testing/__init__.py
+1-1
diff --git a/‎pandas/core/arrays/categorical.py
+4-1 b/‎pandas/core/arrays/categorical.py
+4-1
diff --git a/‎pandas/core/arrays/interval.py
+7-1 b/‎pandas/core/arrays/interval.py
+7-1
diff --git a/‎pandas/core/dtypes/dtypes.py
+1-1 b/‎pandas/core/dtypes/dtypes.py
+1-1
diff --git a/‎pandas/core/groupby/generic.py
+1-1 b/‎pandas/core/groupby/generic.py
+1-1
diff --git a/‎pandas/core/groupby/grouper.py
+1-1 b/‎pandas/core/groupby/grouper.py
+1-1
@@ -9,7 +9,7 @@ repos:
     -   id: absolufy-imports
         files: ^pandas/
 -   repo: https://github.com/python/black
-    rev: 21.6b0
+    rev: 21.7b0
     hooks:
     -   id: black
 -   repo: https://github.com/codespell-project/codespell
@@ -44,6 +44,7 @@ repos:
             - flake8-bugbear==21.3.2
             - pandas-dev-flaker==0.2.0
     -   id: flake8
+        alias: flake8-cython
         name: flake8 (cython)
         types: [cython]
         args: [--append-config=flake8/cython.cfg]
@@ -53,11 +54,11 @@ repos:
         types: [text]
         args: [--append-config=flake8/cython-template.cfg]
 -   repo: https://github.com/PyCQA/isort
-    rev: 5.9.2
+    rev: 5.9.3
     hooks:
     -   id: isort
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.21.0
+    rev: v2.23.3
     hooks:
     -   id: pyupgrade
         args: [--py38-plus]
 
@@ -15,7 +15,7 @@ dependencies:
   - beautifulsoup4
   - botocore>=1.11
   - dask
-  - fastparquet>=0.4.0, < 0.7.0
+  - fastparquet>=0.4.0
   - fsspec>=0.7.4, <2021.6.0
   - gcsfs>=0.6.0
   - geopandas
 
@@ -15,7 +15,7 @@ dependencies:
   # pandas dependencies
   - blosc
   - bottleneck
-  - fastparquet>=0.4.0, <0.7.0
+  - fastparquet>=0.4.0
   - flask
   - fsspec>=0.8.0, <2021.6.0
   - matplotlib=3.3.2
 
@@ -28,6 +28,7 @@ duplicates present. The output can't be determined, and so pandas raises.
 
 .. ipython:: python
    :okexcept:
+   :okwarning:
 
    s1 = pd.Series([0, 1, 2], index=["a", "b", "b"])
    s1.reindex(["a", "b", "c"])
 
@@ -44,7 +44,7 @@ Bug fixes
 
 Other
 ~~~~~
--
+- :meth:`pandas.read_parquet` now supports reading nullable dtypes with ``fastparquet`` versions above 0.7.1.
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -34,7 +34,7 @@ Other enhancements
 - :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`)
 -  Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
 - :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
-- Add keyword ``levels`` to :meth:`.Styler.hide_index` for optionally controlling hidden levels in a MultiIndex (:issue:`25475`)
+- Add keyword ``level`` to :meth:`.Styler.hide_index` and :meth:`.Styler.hide_columns` for optionally controlling hidden levels in a MultiIndex (:issue:`25475`)
 - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
 - Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`)
 - Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
@@ -162,6 +162,7 @@ Deprecations
 - Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)
 - Creating an empty Series without a dtype will now raise a more visible ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`30017`)
 - Deprecated the 'kind' argument in :meth:`Index.get_slice_bound`, :meth:`Index.slice_indexer`, :meth:`Index.slice_locs`; in a future version passing 'kind' will raise (:issue:`42857`)
+- Deprecated :meth:`Index.reindex` with a non-unique index (:issue:`42568`)
 -
 
 .. ---------------------------------------------------------------------------
@@ -232,6 +233,7 @@ Interval
 Indexing
 ^^^^^^^^
 - Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's Index has a length greater than one but only one unique value (:issue:`42365`)
+- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` with a :class:`MultiIndex` when indexing with a tuple in which one of the levels is also a tuple (:issue:`27591`)
 - Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`)
 - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`)
 - Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`)
 
@@ -99,7 +99,7 @@ dependencies:
   - xlwt
   - odfpy
 
-  - fastparquet>=0.4.0, <0.7.0  # pandas.read_parquet, DataFrame.to_parquet
+  - fastparquet>=0.4.0  # pandas.read_parquet, DataFrame.to_parquet
   - pyarrow>=0.17.0  # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
   - python-snappy  # required by pyarrow
 
 
@@ -1,3 +1,17 @@
 [flake8]
 filename = *.pyx,*.pxd
-select=E501,E302,E203,E111,E114,E221,E303,E128,E231,E126,E265,E305,E301,E127,E261,E271,E129,W291,E222,E241,E123,F403,C400,C401,C402,C403,C404,C405,C406,C407,C408,C409,C410,C411
+extend_ignore=
+    # whitespace before '('
+    E211,
+    # missing whitespace around operator
+    E225,
+    # missing whitespace around arithmetic operator
+    E226,
+    # missing whitespace around bitwise or shift operator
+    E227,
+    # ambiguous variable name (# FIXME maybe this one can be fixed)
+    E741,
+    # invalid syntax
+    E999,
+    # invalid escape sequence (# FIXME maybe this one can be fixed)
+    W605,
@@ -274,16 +274,22 @@ cdef inline numeric kth_smallest_c(numeric* arr, Py_ssize_t k, Py_ssize_t n) nog
         j = m
 
         while 1:
-            while arr[i] < x: i += 1
-            while x < arr[j]: j -= 1
+            while arr[i] < x:
+                i += 1
+            while x < arr[j]:
+                j -= 1
             if i <= j:
                 swap(&arr[i], &arr[j])
-                i += 1; j -= 1
+                i += 1
+                j -= 1
 
-            if i > j: break
+            if i > j:
+                break
 
-        if j < k: l = i
-        if k < i: m = j
+        if j < k:
+            l = i
+        if k < i:
+            m = j
     return arr[k]
 
 
 
@@ -26,20 +26,20 @@ cdef extern from "khash_python.h":
         double imag
 
     bint are_equivalent_khcomplex128_t \
-    "kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil
+        "kh_complex_hash_equal" (khcomplex128_t a, khcomplex128_t b) nogil
 
     ctypedef struct khcomplex64_t:
         float real
         float imag
 
     bint are_equivalent_khcomplex64_t \
-    "kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil
+        "kh_complex_hash_equal" (khcomplex64_t a, khcomplex64_t b) nogil
 
     bint are_equivalent_float64_t \
-    "kh_floats_hash_equal" (float64_t a, float64_t b) nogil
+        "kh_floats_hash_equal" (float64_t a, float64_t b) nogil
 
     bint are_equivalent_float32_t \
-    "kh_floats_hash_equal" (float32_t a, float32_t b) nogil
+        "kh_floats_hash_equal" (float32_t a, float32_t b) nogil
 
     uint32_t kh_python_hash_func(object key)
     bint kh_python_hash_equal(object a, object b)
 
@@ -2107,9 +2107,9 @@ cpdef bint is_interval_array(ndarray values):
                 return False
             elif numeric:
                 if not (
-                        util.is_float_object(val.left)
-                        or util.is_integer_object(val.left)
-                    ):
+                    util.is_float_object(val.left)
+                    or util.is_integer_object(val.left)
+                ):
                     # i.e. datetime64 or timedelta64
                     return False
             elif td64:
 
@@ -356,7 +356,7 @@ cdef class TextReader:
                   thousands=None,       # bytes | str
                   dtype=None,
                   usecols=None,
-                  on_bad_lines = ERROR,
+                  on_bad_lines=ERROR,
                   bint na_filter=True,
                   na_values=None,
                   na_fvalues=None,
@@ -1442,7 +1442,7 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
 
             if na_filter:
                 if kh_get_str_starts_item(na_hashset, word):
-                # is in NA values
+                    # is in NA values
                     na_count += 1
                     codes[i] = NA
                     continue
@@ -1578,7 +1578,7 @@ cdef inline int _try_double_nogil(parser_t *parser,
                             strcasecmp(word, cposinfty) == 0):
                         data[0] = INF
                     elif (strcasecmp(word, cneginf) == 0 or
-                            strcasecmp(word, cneginfty) == 0 ):
+                            strcasecmp(word, cneginfty) == 0):
                         data[0] = NEGINF
                     else:
                         return 1
 
@@ -200,7 +200,7 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def ensure_datetime64ns(arr: ndarray, copy: bool=True):
+def ensure_datetime64ns(arr: ndarray, copy: bool = True):
     """
     Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
 
@@ -260,7 +260,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool=True):
     return result
 
 
-def ensure_timedelta64ns(arr: ndarray, copy: bool=True):
+def ensure_timedelta64ns(arr: ndarray, copy: bool = True):
     """
     Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
 
 
@@ -38,7 +38,7 @@ cdef extern from "src/datetime/np_datetime.h":
     void pandas_timedelta_to_timedeltastruct(npy_timedelta val,
                                              NPY_DATETIMEUNIT fr,
                                              pandas_timedeltastruct *result
-                                            ) nogil
+                                             ) nogil
 
     npy_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
 
 
@@ -1454,7 +1454,7 @@ cdef class BusinessHour(BusinessMixin):
 
     def __init__(
             self, n=1, normalize=False, start="09:00", end="17:00", offset=timedelta(0)
-        ):
+    ):
         BusinessMixin.__init__(self, n, normalize, offset)
 
         # must be validated here to equality check
@@ -3897,7 +3897,7 @@ cdef ndarray[int64_t] _shift_bdays(const int64_t[:] i8other, int periods):
     return result.base
 
 
-def shift_month(stamp: datetime, months: int, day_opt: object=None) -> datetime:
+def shift_month(stamp: datetime, months: int, day_opt: object = None) -> datetime:
     """
     Given a datetime (or Timestamp) `stamp`, an integer `months` and an
     option `day_opt`, return a new datetimelike that many months later,
 
@@ -197,7 +197,7 @@ cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil:
             return <freq_conv_func>asfreq_BtoW
         elif to_group == FR_BUS:
             return <freq_conv_func>no_op
-        elif to_group  in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
+        elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
             return <freq_conv_func>asfreq_BtoDT
         else:
             return <freq_conv_func>nofunc
 
@@ -199,17 +199,17 @@ def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='
                 year = int(found_dict['Y'])
             elif parse_code == 2:
                 month = int(found_dict['m'])
-            elif parse_code == 3:
             # elif group_key == 'B':
+            elif parse_code == 3:
                 month = locale_time.f_month.index(found_dict['B'].lower())
-            elif parse_code == 4:
             # elif group_key == 'b':
+            elif parse_code == 4:
                 month = locale_time.a_month.index(found_dict['b'].lower())
-            elif parse_code == 5:
             # elif group_key == 'd':
+            elif parse_code == 5:
                 day = int(found_dict['d'])
-            elif parse_code == 6:
             # elif group_key == 'H':
+            elif parse_code == 6:
                 hour = int(found_dict['H'])
             elif parse_code == 7:
                 hour = int(found_dict['I'])
 
@@ -641,7 +641,8 @@ def _binary_op_method_timedeltalike(op, name):
             return NaT
 
         elif is_datetime64_object(other) or (
-           PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)):
+            PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)
+        ):
             # this case is for a datetime object that is specifically
             # *not* a Timestamp, as the Timestamp case will be
             # handled after `_validate_ops_compat` returns False below
 
@@ -1958,7 +1958,7 @@ default 'raise'
                  self.second / 3600.0 +
                  self.microsecond / 3600.0 / 1e+6 +
                  self.nanosecond / 3600.0 / 1e+9
-                ) / 24.0)
+                 ) / 24.0)
 
 
 # Aliases
 
@@ -219,7 +219,7 @@ def box_expected(expected, box_cls, transpose=True):
         else:
             expected = pd.array(expected)
     elif box_cls is Index:
-        expected = Index(expected)
+        expected = Index._with_infer(expected)
     elif box_cls is Series:
         expected = Series(expected)
     elif box_cls is DataFrame:
 
@@ -2031,7 +2031,9 @@ def _validate_listlike(self, value):
         from pandas import Index
 
         # tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914
-        to_add = Index(value, tupleize_cols=False).difference(self.categories)
+        to_add = Index._with_infer(value, tupleize_cols=False).difference(
+            self.categories
+        )
 
         # no assignments of values not in categories, but it's always ok to set
         # something to np.nan
@@ -2741,6 +2743,7 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]:
         # as values but its codes are by def [0, ..., len(n_categories) - 1]
         cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype)
         cat = Categorical.from_codes(cat_codes, dtype=values.dtype)
+
         categories = CategoricalIndex(cat)
         codes = values.codes
     else:
 
@@ -16,7 +16,10 @@
 
 from pandas._config import get_option
 
-from pandas._libs import NaT
+from pandas._libs import (
+    NaT,
+    lib,
+)
 from pandas._libs.interval import (
     VALID_CLOSED,
     Interval,
@@ -225,6 +228,9 @@ def __new__(
             left, right, infer_closed = intervals_to_interval_bounds(
                 data, validate_closed=closed is None
             )
+            if left.dtype == object:
+                left = lib.maybe_convert_objects(left)
+                right = lib.maybe_convert_objects(right)
             closed = closed or infer_closed
 
         return cls._simple_new(
 
@@ -529,7 +529,7 @@ def validate_categories(categories, fastpath: bool = False) -> Index:
                 f"Parameter 'categories' must be list-like, was {repr(categories)}"
             )
         elif not isinstance(categories, ABCIndex):
-            categories = Index(categories, tupleize_cols=False)
+            categories = Index._with_infer(categories, tupleize_cols=False)
 
         if not fastpath:
 
 
@@ -455,7 +455,7 @@ def _get_index() -> Index:
             if self.grouper.nkeys > 1:
                 index = MultiIndex.from_tuples(keys, names=self.grouper.names)
             else:
-                index = Index(keys, name=self.grouper.names[0])
+                index = Index._with_infer(keys, name=self.grouper.names[0])
             return index
 
         if isinstance(values[0], dict):
 
@@ -646,7 +646,7 @@ def group_index(self) -> Index:
             return self._group_index
 
         uniques = self._codes_and_uniques[1]
-        return Index(uniques, name=self.name)
+        return Index._with_infer(uniques, name=self.name)
 
     @cache_readonly
     def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
Original file line number	Diff line number	Diff line change
`@@ -44,7 +44,7 @@ Bug fixes`
`44`	`44`
`45`	`45`	`Other`
`46`	`46`	`~~~~~`
`47`		`--`
	`47`	+- :meth:`pandas.read_parquet` now supports reading nullable dtypes with ``fastparquet`` versions above 0.7.1.
`48`	`48`	`-`
`49`	`49`
`50`	`50`	`.. ---------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -529,7 +529,7 @@ def validate_categories(categories, fastpath: bool = False) -> Index:`
`529`	`529`	`f"Parameter 'categories' must be list-like, was {repr(categories)}"`
`530`	`530`	`)`
`531`	`531`	`elif not isinstance(categories, ABCIndex):`
`532`		`- categories = Index(categories, tupleize_cols=False)`
	`532`	`+ categories = Index._with_infer(categories, tupleize_cols=False)`
`533`	`533`
`534`	`534`	`if not fastpath:`
`535`	`535`