taytzehao
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎asv_bench/benchmarks/io/style.py
Lines changed: 4 additions & 4 deletions b/‎asv_bench/benchmarks/io/style.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎asv_bench/benchmarks/strings.py
Lines changed: 10 additions & 5 deletions b/‎asv_bench/benchmarks/strings.py
Lines changed: 10 additions & 5 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 28 additions & 78 deletions b/‎ci/code_checks.sh
Lines changed: 28 additions & 78 deletions
diff --git a/‎doc/source/whatsnew/v1.2.5.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/whatsnew/v1.2.5.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 5 additions & 2 deletions b/‎doc/source/whatsnew/v1.3.0.rst
Lines changed: 5 additions & 2 deletions
diff --git a/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 29 additions & 94 deletions b/‎pandas/_libs/src/ujson/python/objToJSON.c
Lines changed: 29 additions & 94 deletions
@@ -104,6 +104,7 @@ asv_bench/env/
 asv_bench/html/
 asv_bench/results/
 asv_bench/pandas/
+test-data.xml
 
 # Documentation generated files #
 #################################
 
@@ -17,19 +17,19 @@ def setup(self, cols, rows):
 
     def time_apply_render(self, cols, rows):
         self._style_apply()
-        self.st.render()
+        self.st._render_html()
 
     def peakmem_apply_render(self, cols, rows):
         self._style_apply()
-        self.st.render()
+        self.st._render_html()
 
     def time_classes_render(self, cols, rows):
         self._style_classes()
-        self.st.render()
+        self.st._render_html()
 
     def peakmem_classes_render(self, cols, rows):
         self._style_classes()
-        self.st.render()
+        self.st._render_html()
 
     def _style_apply(self):
         def _apply_func(s):
 
@@ -213,13 +213,18 @@ def time_cat(self, other_cols, sep, na_rep, na_frac):
 
 class Contains:
 
-    params = [True, False]
-    param_names = ["regex"]
+    params = (["str", "string", "arrow_string"], [True, False])
+    param_names = ["dtype", "regex"]
+
+    def setup(self, dtype, regex):
+        from pandas.core.arrays.string_arrow import ArrowStringDtype  # noqa: F401
 
-    def setup(self, regex):
-        self.s = Series(tm.makeStringIndex(10 ** 5))
+        try:
+            self.s = Series(tm.makeStringIndex(10 ** 5), dtype=dtype)
+        except ImportError:
+            raise NotImplementedError
 
-    def time_contains(self, regex):
+    def time_contains(self, dtype, regex):
         self.s.str.contains("A", regex=regex)
 
 
 
@@ -106,84 +106,34 @@ fi
 ### DOCTESTS ###
 if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
 
-    # Individual files
-
-    MSG='Doctests accessor.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/accessor.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests aggregation.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/aggregation.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests base.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/base.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests construction.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/construction.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests frame.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/frame.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests generic.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/generic.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests series.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/series.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests strings.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/strings/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests sql.py' ; echo $MSG
-    pytest -q --doctest-modules pandas/io/sql.py
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # Directories
-
-    MSG='Doctests arrays'; echo $MSG
-    pytest -q --doctest-modules pandas/core/arrays/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests computation' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/computation/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests dtypes'; echo $MSG
-    pytest -q --doctest-modules pandas/core/dtypes/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests groupby' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/groupby/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests indexes' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/indexes/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests ops' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/ops/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests reshape' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/reshape/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests tools' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/tools/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests window' ; echo $MSG
-    pytest -q --doctest-modules pandas/core/window/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Doctests tseries' ; echo $MSG
-    pytest -q --doctest-modules pandas/tseries/
+    MSG='Doctests for individual files' ; echo $MSG
+    pytest -q --doctest-modules \
+      pandas/core/accessor.py \
+      pandas/core/aggregation.py \
+      pandas/core/algorithms.py \
+      pandas/core/base.py \
+      pandas/core/construction.py \
+      pandas/core/frame.py \
+      pandas/core/generic.py \
+      pandas/core/indexers.py \
+      pandas/core/nanops.py \
+      pandas/core/series.py \
+      pandas/io/sql.py
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Doctests for directories' ; echo $MSG
+    pytest -q --doctest-modules \
+      pandas/core/arrays/ \
+      pandas/core/computation/ \
+      pandas/core/dtypes/ \
+      pandas/core/groupby/ \
+      pandas/core/indexes/ \
+      pandas/core/ops/ \
+      pandas/core/reshape/ \
+      pandas/core/strings/ \
+      pandas/core/tools/ \
+      pandas/core/window/ \
+      pandas/tseries/
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi
 
@@ -15,7 +15,7 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
--
+- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
 -
 
 .. ---------------------------------------------------------------------------
 
@@ -609,7 +609,7 @@ Deprecations
 - Deprecated using :func:`merge` or :func:`join` on a different number of levels (:issue:`34862`)
 - Deprecated the use of ``**kwargs`` in :class:`.ExcelWriter`; use the keyword argument ``engine_kwargs`` instead (:issue:`40430`)
 - Deprecated the ``level`` keyword for :class:`DataFrame` and :class:`Series` aggregations; use groupby instead (:issue:`39983`)
-- The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories` is deprecated and will be removed in a future version (:issue:`37643`)
+- The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword  and already existing columns (:issue:`22818`)
 
 .. ---------------------------------------------------------------------------
@@ -705,8 +705,9 @@ Conversion
 - Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
 - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
 - Bug in :meth:`StringArray.astype` falling back to numpy and raising when converting to ``dtype='categorical'`` (:issue:`40450`)
+- Bug in :func:`factorize` where, when given an array with a numeric numpy dtype lower than int64, uint64 and float64, the unique values did not keep their original dtype (:issue:`41132`)
 - Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
--
+- Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`)
 
 Strings
 ^^^^^^^
@@ -847,6 +848,8 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`)
 - Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`)
 - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`)
+- Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
+- Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`)
 
 Reshaping
 ^^^^^^^^^
 
@@ -83,7 +83,6 @@ typedef struct __PdBlockContext {
     int ncols;
     int transpose;
 
-    int *cindices;             // frame column -> block column map
     NpyArrContext **npyCtxts;  // NpyArrContext for each column
 } PdBlockContext;
 
@@ -294,7 +293,12 @@ static int is_simple_frame(PyObject *obj) {
     if (!mgr) {
         return 0;
     }
-    int ret = (get_attr_length(mgr, "blocks") <= 1);
+    int ret;
+    if (PyObject_HasAttrString(mgr, "blocks")) {
+        ret = (get_attr_length(mgr, "blocks") <= 1);
+    } else {
+        ret = 0;
+    }
 
     Py_DECREF(mgr);
     return ret;
@@ -656,16 +660,10 @@ void PdBlockPassThru_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
 }
 
 void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
-    PyObject *obj, *blocks, *block, *values, *tmp;
-    PyArrayObject *locs;
+    PyObject *obj, *values, *arrays, *array;
     PdBlockContext *blkCtxt;
     NpyArrContext *npyarr;
     Py_ssize_t i;
-    NpyIter *iter;
-    NpyIter_IterNextFunc *iternext;
-    npy_int64 **dataptr;
-    npy_int64 colIdx;
-    npy_intp idx;
 
     obj = (PyObject *)_obj;
 
@@ -687,7 +685,6 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
 
     if (blkCtxt->ncols == 0) {
         blkCtxt->npyCtxts = NULL;
-        blkCtxt->cindices = NULL;
 
         GET_TC(tc)->iterNext = NpyArr_iterNextNone;
         return;
@@ -701,104 +698,45 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
         return;
     }
 
-    blkCtxt->cindices = PyObject_Malloc(sizeof(int) * blkCtxt->ncols);
-    if (!blkCtxt->cindices) {
-        PyErr_NoMemory();
-        GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-        return;
-    }
-
-    blocks = get_sub_attr(obj, "_mgr", "blocks");
-    if (!blocks) {
+    arrays = get_sub_attr(obj, "_mgr", "column_arrays");
+    if (!arrays) {
         GET_TC(tc)->iterNext = NpyArr_iterNextNone;
         return;
-    } else if (!PyTuple_Check(blocks)) {
-        PyErr_SetString(PyExc_TypeError, "blocks must be a tuple!");
-        goto BLKRET;
     }
 
-    // force transpose so each NpyArrContext strides down its column
-    GET_TC(tc)->transpose = 1;
-
-    for (i = 0; i < PyObject_Length(blocks); i++) {
-        block = PyTuple_GET_ITEM(blocks, i);
-        if (!block) {
+    for (i = 0; i < PyObject_Length(arrays); i++) {
+        array = PyList_GET_ITEM(arrays, i);
+        if (!array) {
             GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-            goto BLKRET;
+            goto ARR_RET;
         }
 
-        tmp = PyObject_CallMethod(block, "get_block_values_for_json", NULL);
-        if (!tmp) {
+        // ensure we have a numpy array (i.e. np.asarray)
+        values = PyObject_CallMethod(array, "__array__", NULL);
+        if ((!values) || (!PyArray_CheckExact(values))) {
+            // Didn't get a numpy array
             ((JSONObjectEncoder *)tc->encoder)->errorMsg = "";
             GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-            goto BLKRET;
-        }
-
-        values = PyArray_Transpose((PyArrayObject *)tmp, NULL);
-        Py_DECREF(tmp);
-        if (!values) {
-            GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-            goto BLKRET;
-        }
-
-        locs = (PyArrayObject *)get_sub_attr(block, "mgr_locs", "as_array");
-        if (!locs) {
-            Py_DECREF(values);
-            GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-            goto BLKRET;
+            goto ARR_RET;
         }
 
-        iter = NpyIter_New(locs, NPY_ITER_READONLY, NPY_KEEPORDER,
-                           NPY_NO_CASTING, NULL);
-        if (!iter) {
-            Py_DECREF(values);
-            Py_DECREF(locs);
-            GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-            goto BLKRET;
-        }
-        iternext = NpyIter_GetIterNext(iter, NULL);
-        if (!iternext) {
-            NpyIter_Deallocate(iter);
-            Py_DECREF(values);
-            Py_DECREF(locs);
-            GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-            goto BLKRET;
-        }
-        dataptr = (npy_int64 **)NpyIter_GetDataPtrArray(iter);
-        do {
-            colIdx = **dataptr;
-            idx = NpyIter_GetIterIndex(iter);
+        GET_TC(tc)->newObj = values;
 
-            blkCtxt->cindices[colIdx] = idx;
+        // init a dedicated context for this column
+        NpyArr_iterBegin(obj, tc);
+        npyarr = GET_TC(tc)->npyarr;
 
-            // Reference freed in Pdblock_iterend
-            Py_INCREF(values);
-            GET_TC(tc)->newObj = values;
-
-            // init a dedicated context for this column
-            NpyArr_iterBegin(obj, tc);
-            npyarr = GET_TC(tc)->npyarr;
-
-            // set the dataptr to our desired column and initialise
-            if (npyarr != NULL) {
-                npyarr->dataptr += npyarr->stride * idx;
-                NpyArr_iterNext(obj, tc);
-            }
-            GET_TC(tc)->itemValue = NULL;
-            ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL;
-
-            blkCtxt->npyCtxts[colIdx] = npyarr;
-            GET_TC(tc)->newObj = NULL;
-        } while (iternext(iter));
+        GET_TC(tc)->itemValue = NULL;
+        ((PyObjectEncoder *)tc->encoder)->npyCtxtPassthru = NULL;
 
-        NpyIter_Deallocate(iter);
-        Py_DECREF(values);
-        Py_DECREF(locs);
+        blkCtxt->npyCtxts[i] = npyarr;
+        GET_TC(tc)->newObj = NULL;
     }
     GET_TC(tc)->npyarr = blkCtxt->npyCtxts[0];
+    goto ARR_RET;
 
-BLKRET:
-    Py_DECREF(blocks);
+ARR_RET:
+    Py_DECREF(arrays);
 }
 
 void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
@@ -830,9 +768,6 @@ void PdBlock_iterEnd(JSOBJ obj, JSONTypeContext *tc) {
         if (blkCtxt->npyCtxts) {
             PyObject_Free(blkCtxt->npyCtxts);
         }
-        if (blkCtxt->cindices) {
-            PyObject_Free(blkCtxt->cindices);
-        }
         PyObject_Free(blkCtxt);
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ including other versions of pandas.`
`15`	`15`	`Fixed regressions`
`16`	`16`	`~~~~~~~~~~~~~~~~~`
`17`	`17`	- Regression in :func:`concat` between two :class:`DataFrames` where one has an :class:`Index` that is all-None and the other is :class:`DatetimeIndex` incorrectly raising (:issue:`40841`)
`18`		`--`
	`18`	+- Regression in :func:`read_csv` when using ``memory_map=True`` with an non-UTF8 encoding (:issue:`40986`)
`19`	`19`	`-`
`20`	`20`
`21`	`21`	`.. ---------------------------------------------------------------------------`