pandas-dev
diff --git a/‎asv_bench/benchmarks/reshape.py
+36 b/‎asv_bench/benchmarks/reshape.py
+36
diff --git a/‎doc/source/user_guide/window.rst
+1-1 b/‎doc/source/user_guide/window.rst
+1-1
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
+5 b/‎doc/source/whatsnew/v1.3.0.rst
+5
diff --git a/‎pandas/_libs/parsers.pyx
+28-25 b/‎pandas/_libs/parsers.pyx
+28-25
diff --git a/‎pandas/_libs/src/parser/io.c
+2-2 b/‎pandas/_libs/src/parser/io.c
+2-2
diff --git a/‎pandas/_libs/src/parser/io.h
+1-1 b/‎pandas/_libs/src/parser/io.h
+1-1
diff --git a/‎pandas/_libs/src/parser/tokenizer.c
+12-8 b/‎pandas/_libs/src/parser/tokenizer.c
+12-8
diff --git a/‎pandas/_libs/src/parser/tokenizer.h
+3-3 b/‎pandas/_libs/src/parser/tokenizer.h
+3-3
diff --git a/‎pandas/core/frame.py
+3-1 b/‎pandas/core/frame.py
+3-1
diff --git a/‎pandas/core/groupby/generic.py
+2-6 b/‎pandas/core/groupby/generic.py
+2-6
@@ -53,6 +53,42 @@ def time_unstack(self):
         self.df.unstack(1)
 
 
+class ReshapeExtensionDtype:
+
+    params = ["datetime64[ns, US/Pacific]", "Period[s]"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        lev = pd.Index(list("ABCDEFGHIJ"))
+        ri = pd.Index(range(1000))
+        mi = MultiIndex.from_product([lev, ri], names=["foo", "bar"])
+
+        index = date_range("2016-01-01", periods=10000, freq="s", tz="US/Pacific")
+        if dtype == "Period[s]":
+            index = index.tz_localize(None).to_period("s")
+
+        ser = pd.Series(index, index=mi)
+        df = ser.unstack("bar")
+        # roundtrips -> df.stack().equals(ser)
+
+        self.ser = ser
+        self.df = df
+
+    def time_stack(self, dtype):
+        self.df.stack()
+
+    def time_unstack_fast(self, dtype):
+        # last level -> doesnt have to make copies
+        self.ser.unstack("bar")
+
+    def time_unstack_slow(self, dtype):
+        # first level -> must make copies
+        self.ser.unstack("foo")
+
+    def time_transpose(self, dtype):
+        self.df.T
+
+
 class Unstack:
 
     params = ["int", "category"]
 
@@ -581,7 +581,7 @@ The following formula is used to compute exponentially weighted mean with an inp
 
 .. math::
 
-    y_t = \frac{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda} x_{t-i}}{0.5^\frac{t_{t} - t_{i}}{\lambda}},
+    y_t = \frac{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda} x_{t-i}}{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda}},
 
 
 ExponentialMovingWindow also has an ``ignore_na`` argument, which determines how
 
@@ -140,7 +140,9 @@ Other enhancements
 - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files.
 - Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`)
 - Add support for unary operators in :class:`FloatingArray` (:issue:`38749`)
+- :class:`RangeIndex` can now be constructed by passing a ``range`` object directly e.g. ``pd.RangeIndex(range(3))`` (:issue:`12067`)
 - :meth:`round` being enabled for the nullable integer and floating dtypes (:issue:`38844`)
+- :meth:`pandas.read_csv` and :meth:`pandas.read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
 
 .. ---------------------------------------------------------------------------
 
@@ -437,6 +439,7 @@ Timezones
 Numeric
 ^^^^^^^
 - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`)
+- Bug in :meth:`DataFrame.sort_values` raising an :class:`IndexError` for empty ``by`` (:issue:`40258`)
 - Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`)
 - Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`)
 - Bug in :meth:`DataFrame.rank` with ``np.inf`` and mixture of ``np.nan`` and ``np.inf`` (:issue:`32593`)
@@ -534,6 +537,7 @@ I/O
 - :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`)
 - Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
 - Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
+- Bug in :func:`read_excel` dropping empty values from single-column spreadsheets (:issue:`39808`)
 
 Period
 ^^^^^^
@@ -570,6 +574,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.sample` where column selection was not applied to sample result (:issue:`39928`)
 - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would incorrectly raise a ``ValueError`` when providing ``times`` (:issue:`40164`)
 - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes  (:issue:`40164`)
+- :class:`core.window.ewm.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`)
 
 Reshaping
 ^^^^^^^^^
 
@@ -20,13 +20,19 @@ from libc.string cimport (
 import cython
 from cython import Py_ssize_t
 
-from cpython.bytes cimport PyBytes_AsString
+from cpython.bytes cimport (
+    PyBytes_AsString,
+    PyBytes_FromString,
+)
 from cpython.exc cimport (
     PyErr_Fetch,
     PyErr_Occurred,
 )
 from cpython.object cimport PyObject
-from cpython.ref cimport Py_XDECREF
+from cpython.ref cimport (
+    Py_INCREF,
+    Py_XDECREF,
+)
 from cpython.unicode cimport (
     PyUnicode_AsUTF8String,
     PyUnicode_Decode,
@@ -143,7 +149,7 @@ cdef extern from "parser/tokenizer.h":
     enum: ERROR_OVERFLOW
 
     ctypedef void* (*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
-                                  int *status)
+                                  int *status, const char *encoding_errors)
     ctypedef int (*io_cleanup)(void *src)
 
     ctypedef struct parser_t:
@@ -255,8 +261,8 @@ cdef extern from "parser/tokenizer.h":
 
     int parser_trim_buffers(parser_t *self)
 
-    int tokenize_all_rows(parser_t *self) nogil
-    int tokenize_nrows(parser_t *self, size_t nrows) nogil
+    int tokenize_all_rows(parser_t *self, const char *encoding_errors) nogil
+    int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) nogil
 
     int64_t str_to_int64(char *p_item, int64_t int_min,
                          int64_t int_max, int *error, char tsep) nogil
@@ -293,7 +299,7 @@ cdef extern from "parser/io.h":
                             size_t *bytes_read, int *status)
 
     void* buffer_rd_bytes(void *source, size_t nbytes,
-                          size_t *bytes_read, int *status)
+                          size_t *bytes_read, int *status, const char *encoding_errors)
 
 
 cdef class TextReader:
@@ -316,6 +322,7 @@ cdef class TextReader:
         uint64_t parser_start
         list clocks
         char *c_encoding
+        const char *encoding_errors
         kh_str_starts_t *false_set
         kh_str_starts_t *true_set
 
@@ -370,10 +377,15 @@ cdef class TextReader:
                   bint verbose=False,
                   bint mangle_dupe_cols=True,
                   float_precision=None,
-                  bint skip_blank_lines=True):
+                  bint skip_blank_lines=True,
+                  encoding_errors=b"strict"):
 
         # set encoding for native Python and C library
         self.c_encoding = NULL
+        if isinstance(encoding_errors, str):
+            encoding_errors = encoding_errors.encode("utf-8")
+        Py_INCREF(encoding_errors)
+        self.encoding_errors = PyBytes_AsString(encoding_errors)
 
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize
@@ -558,13 +570,7 @@ cdef class TextReader:
         pass
 
     def __dealloc__(self):
-        parser_free(self.parser)
-        if self.true_set:
-            kh_destroy_str_starts(self.true_set)
-            self.true_set = NULL
-        if self.false_set:
-            kh_destroy_str_starts(self.false_set)
-            self.false_set = NULL
+        self.close()
         parser_del(self.parser)
 
     def close(self):
@@ -632,7 +638,6 @@ cdef class TextReader:
             char *word
             object name, old_name
             uint64_t hr, data_line = 0
-            char *errors = "strict"
             StringPath path = _string_path(self.c_encoding)
             list header = []
             set unnamed_cols = set()
@@ -673,11 +678,8 @@ cdef class TextReader:
                 for i in range(field_count):
                     word = self.parser.words[start + i]
 
-                    if path == UTF8:
-                        name = PyUnicode_FromString(word)
-                    elif path == ENCODED:
-                        name = PyUnicode_Decode(word, strlen(word),
-                                                self.c_encoding, errors)
+                    name = PyUnicode_Decode(word, strlen(word),
+                                            self.c_encoding, self.encoding_errors)
 
                     # We use this later when collecting placeholder names.
                     old_name = name
@@ -831,7 +833,7 @@ cdef class TextReader:
             int status
 
         with nogil:
-            status = tokenize_nrows(self.parser, nrows)
+            status = tokenize_nrows(self.parser, nrows, self.encoding_errors)
 
         if self.parser.warn_msg != NULL:
             print(self.parser.warn_msg, file=sys.stderr)
@@ -859,7 +861,7 @@ cdef class TextReader:
                                  'the whole file')
         else:
             with nogil:
-                status = tokenize_all_rows(self.parser)
+                status = tokenize_all_rows(self.parser, self.encoding_errors)
 
             if self.parser.warn_msg != NULL:
                 print(self.parser.warn_msg, file=sys.stderr)
@@ -1201,7 +1203,7 @@ cdef class TextReader:
 
         if path == UTF8:
             return _string_box_utf8(self.parser, i, start, end, na_filter,
-                                    na_hashset)
+                                    na_hashset, self.encoding_errors)
         elif path == ENCODED:
             return _string_box_decode(self.parser, i, start, end,
                                       na_filter, na_hashset, self.c_encoding)
@@ -1352,7 +1354,8 @@ cdef inline StringPath _string_path(char *encoding):
 
 cdef _string_box_utf8(parser_t *parser, int64_t col,
                       int64_t line_start, int64_t line_end,
-                      bint na_filter, kh_str_starts_t *na_hashset):
+                      bint na_filter, kh_str_starts_t *na_hashset,
+                      const char *encoding_errors):
     cdef:
         int error, na_count = 0
         Py_ssize_t i, lines
@@ -1391,7 +1394,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col,
             pyval = <object>table.vals[k]
         else:
             # box it. new ref?
-            pyval = PyUnicode_FromString(word)
+            pyval = PyUnicode_Decode(word, strlen(word), "utf-8", encoding_errors)
 
             k = kh_put_strbox(table, word, &ret)
             table.vals[k] = <PyObject *>pyval
 
@@ -163,7 +163,7 @@ void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
 }
 
 void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
-                      int *status) {
+                      int *status, const char *encoding_errors) {
     PyGILState_STATE state;
     PyObject *result, *func, *args, *tmp;
 
@@ -191,7 +191,7 @@ void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
         *status = CALLING_READ_FAILED;
         return NULL;
     } else if (!PyBytes_Check(result)) {
-        tmp = PyUnicode_AsUTF8String(result);
+        tmp = PyUnicode_AsEncodedString(result, "utf-8", encoding_errors);
         Py_DECREF(result);
         if (tmp == NULL) {
             PyGILState_Release(state);
 
@@ -64,6 +64,6 @@ void *buffer_file_bytes(void *source, size_t nbytes, size_t *bytes_read,
                         int *status);
 
 void *buffer_rd_bytes(void *source, size_t nbytes, size_t *bytes_read,
-                      int *status);
+                      int *status, const char *encoding_errors);
 
 #endif  // PANDAS__LIBS_SRC_PARSER_IO_H_
@@ -553,13 +553,15 @@ int parser_set_skipfirstnrows(parser_t *self, int64_t nrows) {
     return 0;
 }
 
-static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
+static int parser_buffer_bytes(parser_t *self, size_t nbytes,
+                               const char *encoding_errors) {
     int status;
     size_t bytes_read;
 
     status = 0;
     self->datapos = 0;
-    self->data = self->cb_io(self->source, nbytes, &bytes_read, &status);
+    self->data = self->cb_io(self->source, nbytes, &bytes_read, &status,
+                             encoding_errors);
     TRACE((
         "parser_buffer_bytes self->cb_io: nbytes=%zu, datalen: %d, status=%d\n",
         nbytes, bytes_read, status));
@@ -1334,7 +1336,8 @@ int parser_trim_buffers(parser_t *self) {
   all : tokenize all the data vs. certain number of rows
  */
 
-int _tokenize_helper(parser_t *self, size_t nrows, int all) {
+int _tokenize_helper(parser_t *self, size_t nrows, int all,
+                     const char *encoding_errors) {
     int status = 0;
     uint64_t start_lines = self->lines;
 
@@ -1350,7 +1353,8 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) {
         if (!all && self->lines - start_lines >= nrows) break;
 
         if (self->datapos == self->datalen) {
-            status = parser_buffer_bytes(self, self->chunksize);
+            status = parser_buffer_bytes(self, self->chunksize,
+                                         encoding_errors);
 
             if (status == REACHED_EOF) {
                 // close out last line
@@ -1383,13 +1387,13 @@ int _tokenize_helper(parser_t *self, size_t nrows, int all) {
     return status;
 }
 
-int tokenize_nrows(parser_t *self, size_t nrows) {
-    int status = _tokenize_helper(self, nrows, 0);
+int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors) {
+    int status = _tokenize_helper(self, nrows, 0, encoding_errors);
     return status;
 }
 
-int tokenize_all_rows(parser_t *self) {
-    int status = _tokenize_helper(self, -1, 1);
+int tokenize_all_rows(parser_t *self, const char *encoding_errors) {
+    int status = _tokenize_helper(self, -1, 1, encoding_errors);
     return status;
 }
 
 
@@ -85,7 +85,7 @@ typedef enum {
 } QuoteStyle;
 
 typedef void *(*io_callback)(void *src, size_t nbytes, size_t *bytes_read,
-                             int *status);
+                             int *status, const char *encoding_errors);
 typedef int (*io_cleanup)(void *src);
 
 typedef struct parser_t {
@@ -196,9 +196,9 @@ void parser_del(parser_t *self);
 
 void parser_set_default_options(parser_t *self);
 
-int tokenize_nrows(parser_t *self, size_t nrows);
+int tokenize_nrows(parser_t *self, size_t nrows, const char *encoding_errors);
 
-int tokenize_all_rows(parser_t *self);
+int tokenize_all_rows(parser_t *self, const char *encoding_errors);
 
 // Have parsed / type-converted a chunk of data
 // and want to free memory from the token stream
 
@@ -5818,7 +5818,7 @@ def sort_values(  # type: ignore[override]
                 keys, orders=ascending, na_position=na_position, key=key
             )
             indexer = ensure_platform_int(indexer)
-        else:
+        elif len(by):
 
             by = by[0]
             k = self._get_label_or_level_values(by, axis=axis)
@@ -5833,6 +5833,8 @@ def sort_values(  # type: ignore[override]
             indexer = nargsort(
                 k, kind=kind, ascending=ascending, na_position=na_position, key=key
             )
+        else:
+            return self.copy()
 
         new_data = self._mgr.take(
             indexer, axis=self._get_block_manager_axis(axis), verify=False
 
@@ -81,10 +81,7 @@
     validate_func_kwargs,
 )
 from pandas.core.apply import GroupByApply
-from pandas.core.arrays import (
-    Categorical,
-    ExtensionArray,
-)
+from pandas.core.arrays import Categorical
 from pandas.core.base import (
     DataError,
     SpecificationError,
@@ -1123,8 +1120,7 @@ def py_fallback(values: ArrayLike) -> ArrayLike:
             obj: FrameOrSeriesUnion
 
             # call our grouper again with only this block
-            if isinstance(values, ExtensionArray) or values.ndim == 1:
-                # TODO(EA2D): special case not needed with 2D EAs
+            if values.ndim == 1:
                 obj = Series(values)
             else:
                 # TODO special case not needed with ArrayManager