simonjayhawkins
diff --git a/‎codecov.yml
+1-1 b/‎codecov.yml
+1-1
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
9.56 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pdf
9.56 KB
diff --git a/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
9.14 KB b/‎doc/cheatsheet/Pandas_Cheat_Sheet.pptx
9.14 KB
diff --git a/‎doc/source/ecosystem.rst
+10-1 b/‎doc/source/ecosystem.rst
+10-1
diff --git a/‎doc/source/user_guide/index.rst
+1-1 b/‎doc/source/user_guide/index.rst
+1-1
diff --git a/‎doc/source/user_guide/style.ipynb
+794-404 b/‎doc/source/user_guide/style.ipynb
+794-404
diff --git a/‎doc/source/user_guide/visualization.rst
+6-3 b/‎doc/source/user_guide/visualization.rst
+6-3
diff --git a/‎doc/source/whatsnew/v1.3.0.rst
+26 b/‎doc/source/whatsnew/v1.3.0.rst
+26
diff --git a/‎pandas/_libs/algos.pyx
+4-58 b/‎pandas/_libs/algos.pyx
+4-58
diff --git a/‎pandas/_libs/groupby.pyx
+25-43 b/‎pandas/_libs/groupby.pyx
+25-43
diff --git a/‎pandas/_libs/hashtable_class_helper.pxi.in
+9-9 b/‎pandas/_libs/hashtable_class_helper.pxi.in
+9-9
@@ -8,7 +8,7 @@ coverage:
   status:
     project:
       default:
-        target: '82'
+        target: '72'
     patch:
       default:
         target: '50'
 
@@ -98,7 +98,8 @@ which can be used for a wide variety of time series data mining tasks.
 Visualization
 -------------
 
-While :ref:`pandas has built-in support for data visualization with matplotlib <visualization>`,
+`Pandas has its own Styler class for table visualization <user_guide/style.ipynb>`_, and while
+:ref:`pandas also has built-in support for data visualization through charts with matplotlib <visualization>`,
 there are a number of other pandas-compatible libraries.
 
 `Altair <https://altair-viz.github.io/>`__
@@ -368,6 +369,14 @@ far exceeding the performance of the native ``df.to_sql`` method. Internally, it
 Microsoft's BCP utility, but the complexity is fully abstracted away from the end user.
 Rigorously tested, it is a complete replacement for ``df.to_sql``.
 
+`Deltalake <https://pypi.org/project/deltalake>`__
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Deltalake python package lets you access tables stored in
+`Delta Lake <https://delta.io/>`__ natively in Python without the need to use Spark or
+JVM. It provides the ``delta_table.to_pyarrow_table().to_pandas()`` method to convert
+any Delta table into Pandas dataframe.
+
 
 .. _ecosystem.out-of-core:
 
 
@@ -38,12 +38,12 @@ Further information on any specific method can be obtained in the
     integer_na
     boolean
     visualization
+    style
     computation
     groupby
     window
     timeseries
     timedeltas
-    style
     options
     enhancingperf
     scale
 
@@ -2,9 +2,12 @@
 
 {{ header }}
 
-*************
-Visualization
-*************
+*******************
+Chart Visualization
+*******************
+
+This section demonstrates visualization through charting. For information on
+visualization of tabular data please see the section on `Table Visualization <style.ipynb>`_.
 
 We use the standard convention for referencing the matplotlib API:
 
 
@@ -110,6 +110,30 @@ both XPath 1.0 and XSLT 1.0 is available. (:issue:`27554`)
 
 For more, see :ref:`io.xml` in the user guide on IO tools.
 
+.. _whatsnew_130.dataframe_honors_copy_with_dict:
+
+DataFrame constructor honors ``copy=False`` with dict
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When passing a dictionary to :class:`DataFrame` with ``copy=False``,
+a copy will no longer be made (:issue:`32960`)
+
+.. ipython:: python
+
+    arr = np.array([1, 2, 3])
+    df = pd.DataFrame({"A": arr, "B": arr.copy()}, copy=False)
+    df
+
+``df["A"]`` remains a view on ``arr``:
+
+.. ipython:: python
+
+    arr[0] = 0
+    assert df.iloc[0, 0] == 0
+
+The default behavior when not passing ``copy`` will remain unchanged, i.e.
+a copy will be made.
+
 .. _whatsnew_130.enhancements.other:
 
 Other enhancements
@@ -546,6 +570,8 @@ Conversion
 - Bug in creating a :class:`DataFrame` from an empty ``np.recarray`` not retaining the original dtypes (:issue:`40121`)
 - Bug in :class:`DataFrame` failing to raise ``TypeError`` when constructing from a ``frozenset`` (:issue:`40163`)
 - Bug in :class:`Index` construction silently ignoring a passed ``dtype`` when the data cannot be cast to that dtype (:issue:`21311`)
+- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
+-
 
 Strings
 ^^^^^^^
 
@@ -794,68 +794,14 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray:
     return indexer
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None):
-    cdef:
-        Py_ssize_t i, N
-        algos_t val
-        uint8_t prev_mask
-        int lim, fill_count = 0
-
-    N = len(values)
-
-    # GH#2778
-    if N == 0:
-        return
-
-    lim = validate_limit(N, limit)
-
-    val = values[N - 1]
-    prev_mask = mask[N - 1]
-    for i in range(N - 1, -1, -1):
-        if mask[i]:
-            if fill_count >= lim:
-                continue
-            fill_count += 1
-            values[i] = val
-            mask[i] = prev_mask
-        else:
-            fill_count = 0
-            val = values[i]
-            prev_mask = mask[i]
+    pad_inplace(values[::-1], mask[::-1], limit=limit)
 
 
-@cython.boundscheck(False)
-@cython.wraparound(False)
 def backfill_2d_inplace(algos_t[:, :] values,
                         const uint8_t[:, :] mask,
                         limit=None):
-    cdef:
-        Py_ssize_t i, j, N, K
-        algos_t val
-        int lim, fill_count = 0
-
-    K, N = (<object>values).shape
-
-    # GH#2778
-    if N == 0:
-        return
-
-    lim = validate_limit(N, limit)
-
-    for j in range(K):
-        fill_count = 0
-        val = values[j, N - 1]
-        for i in range(N - 1, -1, -1):
-            if mask[j, i]:
-                if fill_count >= lim:
-                    continue
-                fill_count += 1
-                values[j, i] = val
-            else:
-                fill_count = 0
-                val = values[j, i]
+    pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit)
 
 
 @cython.boundscheck(False)
@@ -987,10 +933,10 @@ def rank_1d(
         * max: highest rank in group
         * first: ranks assigned in order they appear in the array
         * dense: like 'min', but rank always increases by 1 between groups
-    ascending : boolean, default True
+    ascending : bool, default True
         False for ranks by high (1) to low (N)
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
-    pct : boolean, default False
+    pct : bool, default False
         Compute percentage rank of data within each group
     na_option : {'keep', 'top', 'bottom'}, default 'keep'
         * keep: leave NA values where they are
 
@@ -402,9 +402,9 @@ def group_any_all(uint8_t[::1] out,
         ordering matching up to the corresponding record in `values`
     values : array containing the truth value of each element
     mask : array indicating whether a value is na or not
-    val_test : str {'any', 'all'}
+    val_test : {'any', 'all'}
         String object dictating whether to use any or all truth testing
-    skipna : boolean
+    skipna : bool
         Flag to ignore nan values during truth testing
 
     Notes
@@ -455,11 +455,11 @@ ctypedef fused complexfloating_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_add(complexfloating_t[:, ::1] out,
-               int64_t[::1] counts,
-               ndarray[complexfloating_t, ndim=2] values,
-               const intp_t[:] labels,
-               Py_ssize_t min_count=0):
+def group_add(complexfloating_t[:, ::1] out,
+              int64_t[::1] counts,
+              ndarray[complexfloating_t, ndim=2] values,
+              const intp_t[:] labels,
+              Py_ssize_t min_count=0):
     """
     Only aggregates on axis=0 using Kahan summation
     """
@@ -506,19 +506,13 @@ def _group_add(complexfloating_t[:, ::1] out,
                     out[i, j] = sumx[i, j]
 
 
-group_add_float32 = _group_add['float32_t']
-group_add_float64 = _group_add['float64_t']
-group_add_complex64 = _group_add['float complex']
-group_add_complex128 = _group_add['double complex']
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_prod(floating[:, ::1] out,
-                int64_t[::1] counts,
-                ndarray[floating, ndim=2] values,
-                const intp_t[:] labels,
-                Py_ssize_t min_count=0):
+def group_prod(floating[:, ::1] out,
+               int64_t[::1] counts,
+               ndarray[floating, ndim=2] values,
+               const intp_t[:] labels,
+               Py_ssize_t min_count=0):
     """
     Only aggregates on axis=0
     """
@@ -560,19 +554,15 @@ def _group_prod(floating[:, ::1] out,
                     out[i, j] = prodx[i, j]
 
 
-group_prod_float32 = _group_prod['float']
-group_prod_float64 = _group_prod['double']
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 @cython.cdivision(True)
-def _group_var(floating[:, ::1] out,
-               int64_t[::1] counts,
-               ndarray[floating, ndim=2] values,
-               const intp_t[:] labels,
-               Py_ssize_t min_count=-1,
-               int64_t ddof=1):
+def group_var(floating[:, ::1] out,
+              int64_t[::1] counts,
+              ndarray[floating, ndim=2] values,
+              const intp_t[:] labels,
+              Py_ssize_t min_count=-1,
+              int64_t ddof=1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, ct, oldmean
@@ -619,17 +609,13 @@ def _group_var(floating[:, ::1] out,
                     out[i, j] /= (ct - ddof)
 
 
-group_var_float32 = _group_var['float']
-group_var_float64 = _group_var['double']
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def _group_mean(floating[:, ::1] out,
-                int64_t[::1] counts,
-                ndarray[floating, ndim=2] values,
-                const intp_t[::1] labels,
-                Py_ssize_t min_count=-1):
+def group_mean(floating[:, ::1] out,
+               int64_t[::1] counts,
+               ndarray[floating, ndim=2] values,
+               const intp_t[::1] labels,
+               Py_ssize_t min_count=-1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, count, y, t
@@ -675,10 +661,6 @@ def _group_mean(floating[:, ::1] out,
                     out[i, j] = sumx[i, j] / count
 
 
-group_mean_float32 = _group_mean['float']
-group_mean_float64 = _group_mean['double']
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def group_ohlc(floating[:, ::1] out,
@@ -1083,10 +1065,10 @@ def group_rank(float64_t[:, ::1] out,
         * max: highest rank in group
         * first: ranks assigned in order they appear in the array
         * dense: like 'min', but rank always increases by 1 between groups
-    ascending : boolean, default True
+    ascending : bool, default True
         False for ranks by high (1) to low (N)
         na_option : {'keep', 'top', 'bottom'}, default 'keep'
-    pct : boolean, default False
+    pct : bool, default False
         Compute percentage rank of data within each group
     na_option : {'keep', 'top', 'bottom'}, default 'keep'
         * keep: leave NA values where they are
 
@@ -523,15 +523,15 @@ cdef class {{name}}HashTable(HashTable):
             any value "val" satisfying val != val is considered missing.
             If na_value is not None, then _additionally_, any value "val"
             satisfying val == na_value is considered missing.
-        ignore_na : boolean, default False
+        ignore_na : bool, default False
             Whether NA-values should be ignored for calculating the uniques. If
             True, the labels corresponding to missing values will be set to
             na_sentinel.
         mask : ndarray[bool], optional
             If not None, the mask is used as indicator for missing values
             (True = missing, False = valid) instead of `na_value` or
             condition "val != val".
-        return_inverse : boolean, default False
+        return_inverse : bool, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -625,7 +625,7 @@ cdef class {{name}}HashTable(HashTable):
         ----------
         values : ndarray[{{dtype}}]
             Array of values of which unique will be calculated
-        return_inverse : boolean, default False
+        return_inverse : bool, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -906,11 +906,11 @@ cdef class StringHashTable(HashTable):
             that is not a string is considered missing. If na_value is
             not None, then _additionally_ any value "val" satisfying
             val == na_value is considered missing.
-        ignore_na : boolean, default False
+        ignore_na : bool, default False
             Whether NA-values should be ignored for calculating the uniques. If
             True, the labels corresponding to missing values will be set to
             na_sentinel.
-        return_inverse : boolean, default False
+        return_inverse : bool, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -998,7 +998,7 @@ cdef class StringHashTable(HashTable):
         ----------
         values : ndarray[object]
             Array of values of which unique will be calculated
-        return_inverse : boolean, default False
+        return_inverse : bool, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -1181,11 +1181,11 @@ cdef class PyObjectHashTable(HashTable):
             any value "val" satisfying val != val is considered missing.
             If na_value is not None, then _additionally_, any value "val"
             satisfying val == na_value is considered missing.
-        ignore_na : boolean, default False
+        ignore_na : bool, default False
             Whether NA-values should be ignored for calculating the uniques. If
             True, the labels corresponding to missing values will be set to
             na_sentinel.
-        return_inverse : boolean, default False
+        return_inverse : bool, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.
 
@@ -1251,7 +1251,7 @@ cdef class PyObjectHashTable(HashTable):
         ----------
         values : ndarray[object]
             Array of values of which unique will be calculated
-        return_inverse : boolean, default False
+        return_inverse : bool, default False
             Whether the mapping of the original array values to their location
             in the vector of uniques should be returned.