pandas-dev
diff --git a/‎.github/workflows/ci.yml
+9-11 b/‎.github/workflows/ci.yml
+9-11
diff --git a/‎.pre-commit-config.yaml
+1-2 b/‎.pre-commit-config.yaml
+1-2
diff --git a/‎doc/source/_static/css/pandas.css
+1-1 b/‎doc/source/_static/css/pandas.css
+1-1
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
+2-2 b/‎doc/source/whatsnew/v1.3.0.rst
+2-2
diff --git a/‎environment.yml
+2-2 b/‎environment.yml
+2-2
diff --git a/‎pandas/_libs/algos_take_helper.pxi.in
+2-2 b/‎pandas/_libs/algos_take_helper.pxi.in
+2-2
diff --git a/‎pandas/_libs/internals.pyx
+1 b/‎pandas/_libs/internals.pyx
+1
diff --git a/‎pandas/_libs/parsers.pyx
+17-114 b/‎pandas/_libs/parsers.pyx
+17-114
diff --git a/‎pandas/_testing/asserters.py
+2-2 b/‎pandas/_testing/asserters.py
+2-2
diff --git a/‎pandas/_typing.py
+3-3 b/‎pandas/_typing.py
+3-3
@@ -155,35 +155,33 @@ jobs:
       run: |
         source activate pandas-dev
 
-        pytest pandas/tests/frame/methods
-        pytest pandas/tests/frame/test_constructors.py
-        pytest pandas/tests/frame/test_*
-        pytest pandas/tests/frame/test_reductions.py
+        pytest pandas/tests/frame/
         pytest pandas/tests/reductions/
         pytest pandas/tests/generic/test_generic.py
         pytest pandas/tests/arithmetic/
         pytest pandas/tests/groupby/
         pytest pandas/tests/resample/
         pytest pandas/tests/reshape/merge
-
-        pytest pandas/tests/series/methods
-        pytest pandas/tests/series/test_*
+        pytest pandas/tests/series/
 
         # indexing subset (temporary since other tests don't pass yet)
-        pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
-        pytest pandas/tests/frame/indexing/test_where.py
-        pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
-        pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
         pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
         pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
 
         pytest pandas/tests/api/
+        pytest pandas/tests/arrays/
         pytest pandas/tests/base/
         pytest pandas/tests/computation/
         pytest pandas/tests/config/
         pytest pandas/tests/dtypes/
         pytest pandas/tests/generic/
         pytest pandas/tests/indexes/
+        pytest pandas/tests/io/test_* -m "not slow and not clipboard"
+        pytest pandas/tests/io/excel/ -m "not slow and not clipboard"
+        pytest pandas/tests/io/formats/ -m "not slow and not clipboard"
+        pytest pandas/tests/io/parser/ -m "not slow and not clipboard"
+        pytest pandas/tests/io/sas/ -m "not slow and not clipboard"
+        pytest pandas/tests/io/xml/ -m "not slow and not clipboard"
         pytest pandas/tests/libs/
         pytest pandas/tests/plotting/
         pytest pandas/tests/scalar/
 
@@ -16,7 +16,6 @@ repos:
     -   id: codespell
         types_or: [python, rst, markdown]
         files: ^(pandas|doc)/
-        exclude: ^pandas/tests/
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.4.0
     hooks:
@@ -95,7 +94,7 @@ repos:
         entry: python scripts/check_for_inconsistent_pandas_namespace.py
         language: python
         types: [python]
-        files: ^pandas/tests/frame/
+        files: ^pandas/tests/
     -   id: incorrect-code-directives
         name: Check for incorrect code block or IPython directives
         language: pygrep
 
@@ -2,7 +2,7 @@
 
 :root {
   /* Use softer blue from bootstrap's default info color */
-  --color-info: 23, 162, 184;
+  --pst-color-info: 23, 162, 184;
 }
 
 /* Getting started index page */
 
@@ -591,7 +591,7 @@ Reshaping
 - Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``datetime64`` and ``timedelta64`` dtypes (:issue:`39574`)
 - Bug in :meth:`DataFrame.pivot_table` returning a ``MultiIndex`` for a single value when operating on and empty ``DataFrame`` (:issue:`13483`)
 - Allow :class:`Index` to be passed to the :func:`numpy.all` function (:issue:`40180`)
--
+- Bug in :meth:`DataFrame.stack` not preserving ``CategoricalDtype`` in a ``MultiIndex`` (:issue:`36991`)
 
 Sparse
 ^^^^^^
@@ -613,7 +613,7 @@ Other
 - Bug in :func:`pandas.api.types.infer_dtype` not recognizing Series, Index or array with a period dtype (:issue:`23553`)
 - Bug in :func:`pandas.api.types.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`)
 - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
-- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
+- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`, :issue:`40334`)
 - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`)
 - Bug in :meth:`Series.where` with numeric dtype and ``other = None`` not casting to ``nan`` (:issue:`39761`)
 - :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
 
@@ -3,7 +3,7 @@ channels:
   - conda-forge
 dependencies:
   # required
-  - numpy>=1.16.5, <1.20 # gh-39513
+  - numpy>=1.16.5
   - python=3
   - python-dateutil>=2.7.3
   - pytz
@@ -113,5 +113,5 @@ dependencies:
   - tabulate>=0.8.3  # DataFrame.to_markdown
   - natsort  # DataFrame.sort_values
   - pip:
-    - git+https://github.com/pandas-dev/pydata-sphinx-theme.git@2488b7defbd3d753dd5fcfc890fc4a7e79d25103
+    - git+https://github.com/pydata/pydata-sphinx-theme.git@master
     - numpydoc < 1.2  # 2021-02-09 1.2dev breaking CI
@@ -230,10 +230,10 @@ ctypedef fused take_t:
     object
 
 
-cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
+cdef _take_2d(ndarray[take_t, ndim=2] values, ndarray[intp_t, ndim=2] idx):
     cdef:
         Py_ssize_t i, j, N, K
-        ndarray[Py_ssize_t, ndim=2, cast=True] indexer = idx
+        ndarray[intp_t, ndim=2, cast=True] indexer = idx
         ndarray[take_t, ndim=2] result
 
     N, K = (<object>values).shape
 
@@ -25,6 +25,7 @@ from pandas._libs.util cimport is_integer_object
 
 
 @cython.final
+@cython.freelist(32)
 cdef class BlockPlacement:
     # __slots__ = '_as_slice', '_as_array', '_len'
     cdef:
 
@@ -36,6 +36,7 @@ from cpython.ref cimport (
 from cpython.unicode cimport (
     PyUnicode_AsUTF8String,
     PyUnicode_Decode,
+    PyUnicode_DecodeUTF8,
 )
 
 
@@ -321,7 +322,6 @@ cdef class TextReader:
         bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns
         uint64_t parser_start
         list clocks
-        char *c_encoding
         const char *encoding_errors
         kh_str_starts_t *false_set
         kh_str_starts_t *true_set
@@ -337,7 +337,7 @@ cdef class TextReader:
         object skiprows
         object dtype
         object usecols
-        list dtype_cast_order
+        list dtype_cast_order  # list[np.dtype]
         set unnamed_cols
         set noconvert
 
@@ -381,7 +381,6 @@ cdef class TextReader:
                   encoding_errors=b"strict"):
 
         # set encoding for native Python and C library
-        self.c_encoding = NULL
         if isinstance(encoding_errors, str):
             encoding_errors = encoding_errors.encode("utf-8")
         Py_INCREF(encoding_errors)
@@ -638,7 +637,6 @@ cdef class TextReader:
             char *word
             object name, old_name
             uint64_t hr, data_line = 0
-            StringPath path = _string_path(self.c_encoding)
             list header = []
             set unnamed_cols = set()
 
@@ -678,8 +676,8 @@ cdef class TextReader:
                 for i in range(field_count):
                     word = self.parser.words[start + i]
 
-                    name = PyUnicode_Decode(word, strlen(word),
-                                            self.c_encoding, self.encoding_errors)
+                    name = PyUnicode_DecodeUTF8(word, strlen(word),
+                                                self.encoding_errors)
 
                     # We use this later when collecting placeholder names.
                     old_name = name
@@ -987,8 +985,7 @@ cdef class TextReader:
                                    f"for column {name} - only the converter will "
                                    f"be used"), ParserWarning,
                                   stacklevel=5)
-                results[i] = _apply_converter(conv, self.parser, i, start, end,
-                                              self.c_encoding)
+                results[i] = _apply_converter(conv, self.parser, i, start, end)
                 continue
 
             # Collect the list of NaN values associated with the column.
@@ -1102,8 +1099,7 @@ cdef class TextReader:
             # TODO: I suspect that _categorical_convert could be
             # optimized when dtype is an instance of CategoricalDtype
             codes, cats, na_count = _categorical_convert(
-                self.parser, i, start, end, na_filter,
-                na_hashset, self.c_encoding)
+                self.parser, i, start, end, na_filter, na_hashset)
 
             # Method accepts list of strings, not encoded ones.
             true_values = [x.decode() for x in self.true_values]
@@ -1199,14 +1195,8 @@ cdef class TextReader:
     cdef _string_convert(self, Py_ssize_t i, int64_t start, int64_t end,
                          bint na_filter, kh_str_starts_t *na_hashset):
 
-        cdef StringPath path = _string_path(self.c_encoding)
-
-        if path == UTF8:
-            return _string_box_utf8(self.parser, i, start, end, na_filter,
-                                    na_hashset, self.encoding_errors)
-        elif path == ENCODED:
-            return _string_box_decode(self.parser, i, start, end,
-                                      na_filter, na_hashset, self.c_encoding)
+        return _string_box_utf8(self.parser, i, start, end, na_filter,
+                                na_hashset, self.encoding_errors)
 
     def _get_converter(self, i, name):
         if self.converters is None:
@@ -1336,18 +1326,6 @@ def _maybe_upcast(arr):
     return arr
 
 
-cdef enum StringPath:
-    UTF8
-    ENCODED
-
-
-# factored out logic to pick string converter
-cdef inline StringPath _string_path(char *encoding):
-    if encoding != NULL and encoding != b"utf-8":
-        return ENCODED
-    return UTF8
-
-
 # ----------------------------------------------------------------------
 # Type conversions / inference support code
 
@@ -1406,68 +1384,10 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
     return result, na_count
 
 
-cdef _string_box_decode(parser_t *parser, int64_t col,
-                        int64_t line_start, int64_t line_end,
-                        bint na_filter, kh_str_starts_t *na_hashset,
-                        char *encoding):
-    cdef:
-        int na_count = 0
-        Py_ssize_t i, size, lines
-        coliter_t it
-        const char *word = NULL
-        ndarray[object] result
-
-        int ret = 0
-        kh_strbox_t *table
-
-        char *errors = "strict"
-
-        object pyval
-
-        object NA = na_values[np.object_]
-        khiter_t k
-
-    table = kh_init_strbox()
-    lines = line_end - line_start
-    result = np.empty(lines, dtype=np.object_)
-    coliter_setup(&it, parser, col, line_start)
-
-    for i in range(lines):
-        COLITER_NEXT(it, word)
-
-        if na_filter:
-            if kh_get_str_starts_item(na_hashset, word):
-            # in the hash table
-                na_count += 1
-                result[i] = NA
-                continue
-
-        k = kh_get_strbox(table, word)
-
-        # in the hash table
-        if k != table.n_buckets:
-            # this increments the refcount, but need to test
-            pyval = <object>table.vals[k]
-        else:
-            # box it. new ref?
-            size = strlen(word)
-            pyval = PyUnicode_Decode(word, size, encoding, errors)
-
-            k = kh_put_strbox(table, word, &ret)
-            table.vals[k] = <PyObject *>pyval
-
-        result[i] = pyval
-
-    kh_destroy_strbox(table)
-
-    return result, na_count
-
-
 @cython.boundscheck(False)
 cdef _categorical_convert(parser_t *parser, int64_t col,
                           int64_t line_start, int64_t line_end,
-                          bint na_filter, kh_str_starts_t *na_hashset,
-                          char *encoding):
+                          bint na_filter, kh_str_starts_t *na_hashset):
     "Convert column data into codes, categories"
     cdef:
         int na_count = 0
@@ -1480,7 +1400,6 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
         int64_t current_category = 0
 
         char *errors = "strict"
-        StringPath path = _string_path(encoding)
 
         int ret = 0
         kh_str_t *table
@@ -1516,16 +1435,9 @@ cdef _categorical_convert(parser_t *parser, int64_t col,
 
     # parse and box categories to python strings
     result = np.empty(table.n_occupied, dtype=np.object_)
-    if path == ENCODED:
-        for k in range(table.n_buckets):
-            if kh_exist_str(table, k):
-                size = strlen(table.keys[k])
-                result[table.vals[k]] = PyUnicode_Decode(
-                    table.keys[k], size, encoding, errors)
-    elif path == UTF8:
-        for k in range(table.n_buckets):
-            if kh_exist_str(table, k):
-                result[table.vals[k]] = PyUnicode_FromString(table.keys[k])
+    for k in range(table.n_buckets):
+        if kh_exist_str(table, k):
+            result[table.vals[k]] = PyUnicode_FromString(table.keys[k])
 
     kh_destroy_str(table)
     return np.asarray(codes), result, na_count
@@ -2064,13 +1976,11 @@ for k in list(na_values):
 
 
 cdef _apply_converter(object f, parser_t *parser, int64_t col,
-                      int64_t line_start, int64_t line_end,
-                      char* c_encoding):
+                      int64_t line_start, int64_t line_end):
     cdef:
         Py_ssize_t i, lines
         coliter_t it
         const char *word = NULL
-        char *errors = "strict"
         ndarray[object] result
         object val
 
@@ -2079,17 +1989,10 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col,
 
     coliter_setup(&it, parser, col, line_start)
 
-    if c_encoding == NULL or c_encoding == b'utf-8':
-        for i in range(lines):
-            COLITER_NEXT(it, word)
-            val = PyUnicode_FromString(word)
-            result[i] = f(val)
-    else:
-        for i in range(lines):
-            COLITER_NEXT(it, word)
-            val = PyUnicode_Decode(word, strlen(word),
-                                   c_encoding, errors)
-            result[i] = f(val)
+    for i in range(lines):
+        COLITER_NEXT(it, word)
+        val = PyUnicode_FromString(word)
+        result[i] = f(val)
 
     return lib.maybe_convert_objects(result)
 
 
@@ -976,8 +976,8 @@ def assert_series_equal(
         left_values = left._values
         right_values = right._values
         # Only check exact if dtype is numeric
-        if is_extension_array_dtype(left_values) and is_extension_array_dtype(
-            right_values
+        if isinstance(left_values, ExtensionArray) and isinstance(
+            right_values, ExtensionArray
         ):
             assert_extension_array_equal(
                 left_values,
 
@@ -47,7 +47,7 @@
     from pandas.core.dtypes.dtypes import ExtensionDtype
 
     from pandas import Interval
-    from pandas.core.arrays.base import ExtensionArray  # noqa: F401
+    from pandas.core.arrays.base import ExtensionArray
     from pandas.core.frame import DataFrame
     from pandas.core.generic import NDFrame  # noqa: F401
     from pandas.core.groupby.generic import (
@@ -74,8 +74,8 @@
 
 # array-like
 
-AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
-ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray)
+ArrayLike = Union["ExtensionArray", np.ndarray]
+AnyArrayLike = Union[ArrayLike, "Index", "Series"]
 
 # scalars
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`
`3`	`3`	`:root {`
`4`	`4`	`/* Use softer blue from bootstrap's default info color */`
`5`		`- --color-info: 23, 162, 184;`
	`5`	`+ --pst-color-info: 23, 162, 184;`
`6`	`6`	`}`
`7`	`7`
`8`	`8`	`/* Getting started index page */`