From 42d2347f2df73c4d704413c63a9bf78a797dbc3a Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Sat, 20 Jun 2020 17:56:08 +0200
Subject: [PATCH 1/8] TST: regression tests for indexing sparse dataframe with
 iterable

closes #34526
---
 pandas/tests/frame/indexing/test_indexing.py | 22 ++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 3865ea64ee479..9ef1cade6b7e5 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -20,9 +20,11 @@
     isna,
     notna,
 )
+import pandas.util._test_decorators as td
 import pandas._testing as tm
 from pandas.arrays import SparseArray
 import pandas.core.common as com
+from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.indexing import IndexingError
 
 from pandas.tseries.offsets import BDay
@@ -1921,6 +1923,26 @@ def test_getitem_sparse_column(self):
         result = df.loc[:, "A"]
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
+    @td.skip_if_no_scipy
+    def test_locindexer_from_spmatrix(self, spmatrix_t):
+        import scipy.sparse
+        spmatrix_t = getattr(scipy.sparse, spmatrix_t)
+
+        spmatrix = spmatrix_t([[1.0, 0.0], [0.0, 0.0]], dtype=np.float64)
+        df = pd.DataFrame.sparse.from_spmatrix(spmatrix)
+
+        # regression test for #34526
+        itr_idx = [1]
+        result = df.loc[itr_idx].values
+        expected = spmatrix.toarray()[itr_idx]
+        tm.assert_numpy_array_equal(result, expected)
+
+        # regression test for #34540
+        result_t = df.loc[itr_idx].dtypes.values
+        expected_t = np.full(2, SparseDtype(np.float64, fill_value=0))
+        tm.assert_numpy_array_equal(result_t, expected_t)
+
     def test_setitem_with_unaligned_tz_aware_datetime_column(self):
         # GH 12981
         # Assignment of unaligned offset-aware datetime series.

From 3ce4a673c72a7619d21ea5f94dbc639b19d89576 Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Sat, 20 Jun 2020 19:02:14 +0200
Subject: [PATCH 2/8] Reorganise sparse indexing tests into a separate file

---
 pandas/tests/frame/indexing/test_indexing.py | 36 ---------------
 pandas/tests/frame/indexing/test_sparse.py   | 47 ++++++++++++++++++++
 2 files changed, 47 insertions(+), 36 deletions(-)
 create mode 100644 pandas/tests/frame/indexing/test_sparse.py

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 9ef1cade6b7e5..82a04e93dbcf1 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -22,9 +22,7 @@
 )
 import pandas.util._test_decorators as td
 import pandas._testing as tm
-from pandas.arrays import SparseArray
 import pandas.core.common as com
-from pandas.core.arrays.sparse import SparseDtype
 from pandas.core.indexing import IndexingError
 
 from pandas.tseries.offsets import BDay
@@ -1909,40 +1907,6 @@ def test_getitem_ix_float_duplicates(self):
         expect = df.iloc[[1, -1], 0]
         tm.assert_series_equal(df.loc[0.2, "a"], expect)
 
-    def test_getitem_sparse_column(self):
-        # https://github.com/pandas-dev/pandas/issues/23559
-        data = SparseArray([0, 1])
-        df = pd.DataFrame({"A": data})
-        expected = pd.Series(data, name="A")
-        result = df["A"]
-        tm.assert_series_equal(result, expected)
-
-        result = df.iloc[:, 0]
-        tm.assert_series_equal(result, expected)
-
-        result = df.loc[:, "A"]
-        tm.assert_series_equal(result, expected)
-
-    @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
-    @td.skip_if_no_scipy
-    def test_locindexer_from_spmatrix(self, spmatrix_t):
-        import scipy.sparse
-        spmatrix_t = getattr(scipy.sparse, spmatrix_t)
-
-        spmatrix = spmatrix_t([[1.0, 0.0], [0.0, 0.0]], dtype=np.float64)
-        df = pd.DataFrame.sparse.from_spmatrix(spmatrix)
-
-        # regression test for #34526
-        itr_idx = [1]
-        result = df.loc[itr_idx].values
-        expected = spmatrix.toarray()[itr_idx]
-        tm.assert_numpy_array_equal(result, expected)
-
-        # regression test for #34540
-        result_t = df.loc[itr_idx].dtypes.values
-        expected_t = np.full(2, SparseDtype(np.float64, fill_value=0))
-        tm.assert_numpy_array_equal(result_t, expected_t)
-
     def test_setitem_with_unaligned_tz_aware_datetime_column(self):
         # GH 12981
         # Assignment of unaligned offset-aware datetime series.
diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py
new file mode 100644
index 0000000000000..b22eaad329582
--- /dev/null
+++ b/pandas/tests/frame/indexing/test_sparse.py
@@ -0,0 +1,47 @@
+import numpy as np
+import pandas as pd
+
+import pandas.util._test_decorators as td
+import pandas._testing as tm
+
+from pandas.arrays import SparseArray
+from pandas.core.arrays.sparse import SparseDtype
+
+import pytest
+
+
+class TestSparseDataFrameIndexing:
+    def test_getitem_sparse_column(self):
+        # https://github.com/pandas-dev/pandas/issues/23559
+        data = SparseArray([0, 1])
+        df = pd.DataFrame({"A": data})
+        expected = pd.Series(data, name="A")
+        result = df["A"]
+        tm.assert_series_equal(result, expected)
+
+        result = df.iloc[:, 0]
+        tm.assert_series_equal(result, expected)
+
+        result = df.loc[:, "A"]
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
+    @td.skip_if_no_scipy
+    def test_locindexer_from_spmatrix(self, spmatrix_t):
+        import scipy.sparse
+
+        spmatrix_t = getattr(scipy.sparse, spmatrix_t)
+
+        spmatrix = spmatrix_t([[1.0, 0.0], [0.0, 0.0]], dtype=np.float64)
+        df = pd.DataFrame.sparse.from_spmatrix(spmatrix)
+
+        # regression test for #34526
+        itr_idx = [1]
+        result = df.loc[itr_idx].values
+        expected = spmatrix.toarray()[itr_idx]
+        tm.assert_numpy_array_equal(result, expected)
+
+        # regression test for #34540
+        result = df.loc[itr_idx].dtypes.values
+        expected = np.full(2, SparseDtype(np.float64, fill_value=0))
+        tm.assert_numpy_array_equal(result, expected)

From a82683ae5203db7d8fd264ced4eb70f06e85ee53 Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Wed, 24 Jun 2020 12:17:44 +0200
Subject: [PATCH 3/8] sparse/array.py: fix the dtype when indexing only sparse
 elements

---
 pandas/core/arrays/sparse/array.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 4996a10002c63..b18a58da3950f 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -866,11 +866,8 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray:
 
         if self.sp_index.npoints == 0:
             # Avoid taking from the empty self.sp_values
-            taken = np.full(
-                sp_indexer.shape,
-                fill_value=fill_value,
-                dtype=np.result_type(type(fill_value)),
-            )
+            _dtype = np.result_type(self.dtype.subtype, type(fill_value))
+            taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype)
         else:
             taken = self.sp_values.take(sp_indexer)
 

From ac82243b6a137f064d7463605935c8527827ae78 Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Wed, 24 Jun 2020 15:17:39 +0200
Subject: [PATCH 4/8] indexing/test_sparse.py: more thorough indexing by
 iterable tests

---
 pandas/tests/frame/indexing/test_sparse.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py
index b22eaad329582..362295cbfa779 100644
--- a/pandas/tests/frame/indexing/test_sparse.py
+++ b/pandas/tests/frame/indexing/test_sparse.py
@@ -26,22 +26,27 @@ def test_getitem_sparse_column(self):
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
+    @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex])
     @td.skip_if_no_scipy
-    def test_locindexer_from_spmatrix(self, spmatrix_t):
+    def test_locindexer_from_spmatrix(self, spmatrix_t, dtype):
         import scipy.sparse
 
         spmatrix_t = getattr(scipy.sparse, spmatrix_t)
 
-        spmatrix = spmatrix_t([[1.0, 0.0], [0.0, 0.0]], dtype=np.float64)
+        # The bug is triggered by a sparse matrix with purely sparse columns.  So the
+        # recipe below generates a rectangular matrix of dimension (5, 7) where all the
+        # diagonal cells are ones, meaning the last two columns are purely sparse.
+        rows, cols = 5, 7
+        spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
         df = pd.DataFrame.sparse.from_spmatrix(spmatrix)
 
         # regression test for #34526
-        itr_idx = [1]
+        itr_idx = range(2, rows)
         result = df.loc[itr_idx].values
         expected = spmatrix.toarray()[itr_idx]
         tm.assert_numpy_array_equal(result, expected)
 
         # regression test for #34540
         result = df.loc[itr_idx].dtypes.values
-        expected = np.full(2, SparseDtype(np.float64, fill_value=0))
+        expected = np.full(cols, SparseDtype(dtype, fill_value=0))
         tm.assert_numpy_array_equal(result, expected)

From c785d9e0353c6a5dc3fc0a68368b2de485874175 Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Wed, 24 Jun 2020 15:25:36 +0200
Subject: [PATCH 5/8] indexing/test_indexing.py: remove unused import

---
 pandas/tests/frame/indexing/test_indexing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
index 82a04e93dbcf1..3fa3c9303806f 100644
--- a/pandas/tests/frame/indexing/test_indexing.py
+++ b/pandas/tests/frame/indexing/test_indexing.py
@@ -20,7 +20,6 @@
     isna,
     notna,
 )
-import pandas.util._test_decorators as td
 import pandas._testing as tm
 import pandas.core.common as com
 from pandas.core.indexing import IndexingError

From d015d032424a1b4baf2d37b46f60056f996e1ffb Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Wed, 24 Jun 2020 16:03:43 +0200
Subject: [PATCH 6/8] indexing/test_sparse.py: make isort happy

---
 pandas/tests/frame/indexing/test_sparse.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py
index 362295cbfa779..0aead625a7955 100644
--- a/pandas/tests/frame/indexing/test_sparse.py
+++ b/pandas/tests/frame/indexing/test_sparse.py
@@ -1,14 +1,13 @@
 import numpy as np
-import pandas as pd
+import pytest
 
 import pandas.util._test_decorators as td
-import pandas._testing as tm
 
+import pandas as pd
+import pandas._testing as tm
 from pandas.arrays import SparseArray
 from pandas.core.arrays.sparse import SparseDtype
 
-import pytest
-
 
 class TestSparseDataFrameIndexing:
     def test_getitem_sparse_column(self):

From d1446df05e8c4bbdb7d43442bcab1ffc4749202d Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Wed, 24 Jun 2020 16:06:49 +0200
Subject: [PATCH 7/8] indexing/test_sparse.py: resolve deprecation warning from
 np_dev

---
 pandas/tests/frame/indexing/test_sparse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/indexing/test_sparse.py b/pandas/tests/frame/indexing/test_sparse.py
index 0aead625a7955..876fbe212c466 100644
--- a/pandas/tests/frame/indexing/test_sparse.py
+++ b/pandas/tests/frame/indexing/test_sparse.py
@@ -25,7 +25,7 @@ def test_getitem_sparse_column(self):
         tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
-    @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex])
+    @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
     @td.skip_if_no_scipy
     def test_locindexer_from_spmatrix(self, spmatrix_t, dtype):
         import scipy.sparse

From bbd193c8ac92718f1db91bee8e4cace334d58c21 Mon Sep 17 00:00:00 2001
From: Suvayu Ali <fatkasuvayu+linux@gmail.com>
Date: Wed, 8 Jul 2020 00:02:18 +0200
Subject: [PATCH 8/8] v1.1.0.rst: add entry under bug fixes

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index cee41f248fc60..386fe3ce2160f 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1124,6 +1124,7 @@ Sparse
 - Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`)
 - The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s  string representation (:issue:`34352`)
 - Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`)
+- Bug in :meth:`arrays.SparseArray` was returning the incorrect type when indexing a sparse dataframe with an iterable (:issue:`34526`, :issue:`34540`)
 
 ExtensionArray
 ^^^^^^^^^^^^^^