From 7804303df362de7a340590c38037413578cb6b11 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Fri, 15 Sep 2017 08:38:36 +0900
Subject: [PATCH 01/11] BUG: Fix make_sparse mask generation not to cast when
 dtype is object

---
 pandas/core/sparse/array.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index f965c91999a03..be1b94cff57c2 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -19,6 +19,7 @@
 from pandas.core.dtypes.common import (
     _ensure_platform_int,
     is_float, is_integer,
+    is_object_dtype,
     is_integer_dtype,
     is_bool_dtype,
     is_list_like,
@@ -789,7 +790,16 @@ def make_sparse(arr, kind='block', fill_value=None):
         if is_string_dtype(arr):
             arr = arr.astype(object)
 
-        mask = arr != fill_value
+        if is_object_dtype(arr.dtype):
+            mask = []
+            for e in arr:
+                if type(e) is type(fill_value):
+                    mask.append(e != fill_value)
+                else:
+                    mask.append(True)
+            mask = np.array(mask)
+        else:
+            mask = arr != fill_value
 
     length = len(arr)
     if length != mask.size:

From b64c123fe508b8d7ef6d2f655630aa4764a971b3 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Tue, 19 Sep 2017 00:37:35 +0900
Subject: [PATCH 02/11] TST: Add test of the make_sparse mask generation

---
 pandas/tests/sparse/test_array.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index b0a9182a265fe..63050f6ce6fec 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -61,6 +61,12 @@ def test_constructor_object_dtype(self):
         assert arr.dtype == np.object
         assert arr.fill_value == 'A'
 
+        data = [False, 0, 100.0, 0.0]
+        arr = SparseArray(data, dtype=np.object, fill_value=False)
+        assert arr.dtype == np.object
+        assert arr.fill_value is False
+        assert (arr == np.array(data, dtype=np.object)).to_dense().all()
+
     def test_constructor_spindex_dtype(self):
         arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
         tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))

From bff0ac0305e106af8ac4674a243f54456e517f79 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Fri, 22 Sep 2017 00:11:24 +0900
Subject: [PATCH 03/11] TST: Add GitHub PR number comment on the test

---
 pandas/tests/sparse/test_array.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index 63050f6ce6fec..fc6d1b7579fdf 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -61,6 +61,7 @@ def test_constructor_object_dtype(self):
         assert arr.dtype == np.object
         assert arr.fill_value == 'A'
 
+        # GH 17574
         data = [False, 0, 100.0, 0.0]
         arr = SparseArray(data, dtype=np.object, fill_value=False)
         assert arr.dtype == np.object

From 7190704e45b439c6bf0cba5c84407118ec31cfa4 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Fri, 22 Sep 2017 09:48:58 +0900
Subject: [PATCH 04/11] BUG: Fix the element-wise mask generation method in
 make_spase

---
 pandas/core/sparse/array.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index be1b94cff57c2..7cba94f09b01f 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -791,13 +791,12 @@ def make_sparse(arr, kind='block', fill_value=None):
             arr = arr.astype(object)
 
         if is_object_dtype(arr.dtype):
-            mask = []
-            for e in arr:
-                if type(e) is type(fill_value):
-                    mask.append(e != fill_value)
-                else:
-                    mask.append(True)
-            mask = np.array(mask)
+            mask = np.ones(len(arr), dtype=np.bool)
+            fv_type = type(fill_value)
+
+            itr = (type(x) is fv_type for x in arr)
+            cond = np.fromiter(itr, dtype=np.bool)
+            mask[cond] = arr[cond] != fill_value
         else:
             mask = arr != fill_value
 

From c9d674a3b88f43dd26ed3a67723878d92459d2e9 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Fri, 22 Sep 2017 11:29:05 +0900
Subject: [PATCH 05/11] DOC: Add the description of make_sparse bug fix in
 whatsnew note

---
 doc/source/whatsnew/v0.21.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 23a98d59554e9..448ed279cdf23 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -544,6 +544,7 @@ Sparse
 
 - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`)
 - Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
+- Bug in :func:`make_sparse` treating two numeric/boolean data, which have same bits, as same when array ``dtype`` is ``object`` (:issue:`17574`)
 
 
 Reshaping

From 7727c5a12dac328098b65b5effedef9031bb5f47 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Fri, 22 Sep 2017 11:40:43 +0900
Subject: [PATCH 06/11] TST: Fix wrong array comparison in the make_sparse
 bug-fix test

---
 pandas/tests/sparse/test_array.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index fc6d1b7579fdf..f653ee50982ad 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -66,7 +66,9 @@ def test_constructor_object_dtype(self):
         arr = SparseArray(data, dtype=np.object, fill_value=False)
         assert arr.dtype == np.object
         assert arr.fill_value is False
-        assert (arr == np.array(data, dtype=np.object)).to_dense().all()
+        arr_expected = np.array(data, dtype=np.object)
+        it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
+        assert np.fromiter(it, dtype=np.bool).all()
 
     def test_constructor_spindex_dtype(self):
         arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))

From 26cb4cae711488dc43d0c0239718947342d8d8d1 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Sat, 23 Sep 2017 00:47:58 +0900
Subject: [PATCH 07/11] BUG: Simplified the mask generation method in
 make_spase

---
 pandas/core/sparse/array.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 7cba94f09b01f..bc8faaf432184 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -794,9 +794,9 @@ def make_sparse(arr, kind='block', fill_value=None):
             mask = np.ones(len(arr), dtype=np.bool)
             fv_type = type(fill_value)
 
-            itr = (type(x) is fv_type for x in arr)
-            cond = np.fromiter(itr, dtype=np.bool)
-            mask[cond] = arr[cond] != fill_value
+            for i, x in enumerate(arr):
+                if type(x) is fv_type:
+                    mask[i] = arr[i] != fill_value
         else:
             mask = arr != fill_value
 

From 8e01026b4f3e8befb58d950c3d3e10f23d5e0e9c Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Mon, 25 Sep 2017 00:40:58 +0900
Subject: [PATCH 08/11] TST: Add the SparseArray constructor performance test

---
 asv_bench/benchmarks/sparse.py | 54 +++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 7259e8cdb7d61..4fd38fc20d412 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -2,7 +2,7 @@
 
 from .pandas_vb_common import *
 import scipy.sparse
-from pandas import SparseSeries, SparseDataFrame
+from pandas import SparseSeries, SparseDataFrame, SparseArray
 
 
 class sparse_series_to_frame(object):
@@ -23,6 +23,58 @@ def time_sparse_series_to_frame(self):
         SparseDataFrame(self.series)
 
 
+class sparse_array_constructor(object):
+    goal_time = 0.2
+
+    def setup(self):
+        np.random.seed(1)
+        self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
+        self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
+
+        self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
+        self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
+
+        self.object_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
+        self.object_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
+
+    def make_numeric_array(self, length, dense_size, fill_value, dtype):
+        arr = np.array([fill_value] * length, dtype=dtype)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.randint(0, 100, len(indexer))
+        return (arr, fill_value, dtype)
+
+    def make_object_array(self, length, dense_size, fill_value):
+        elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
+        arr = np.array([fill_value] * length, dtype=np.object)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.choice(elems, len(indexer))
+        return (arr, fill_value, np.object)
+
+    def time_sparse_array_constructor_int64_10percent(self):
+        arr, fill_value, dtype = self.int64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_int64_1percent(self):
+        arr, fill_value, dtype = self.int64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_10percent(self):
+        arr, fill_value, dtype = self.float64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_1percent(self):
+        arr, fill_value, dtype = self.float64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_10percent(self):
+        arr, fill_value, dtype = self.object_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_1percent(self):
+        arr, fill_value, dtype = self.object_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+
 class sparse_frame_constructor(object):
     goal_time = 0.2
 

From a48f95726faf686e50c3d3725e71c9e60be13425 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Mon, 25 Sep 2017 02:05:05 +0900
Subject: [PATCH 09/11] DOC: Add the description of make_sparse mask generation
 routine

---
 pandas/core/sparse/array.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index bc8faaf432184..72bc3fbb3e8d1 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -791,6 +791,9 @@ def make_sparse(arr, kind='block', fill_value=None):
             arr = arr.astype(object)
 
         if is_object_dtype(arr.dtype):
+            # element-wise equality check method in numpy doesn't treat
+            # each element type, eg. 0, 0.0, and False are treated as
+            # same. So we have to check the both of its type and value.
             mask = np.ones(len(arr), dtype=np.bool)
             fv_type = type(fill_value)
 

From 35890726d2ea19758439f8d8448ab3323c7d0493 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Tue, 26 Sep 2017 22:57:03 +0900
Subject: [PATCH 10/11] PERF: Reimplement the SparseArray mask create method
 for object ndarray by Cython

---
 pandas/_libs/sparse.pyx     | 19 +++++++++++++++++++
 pandas/core/sparse/array.py |  7 +------
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index 1cc7f5ace95ea..fac678e531c8b 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -848,3 +848,22 @@ def reindex_integer(ndarray[float64_t, ndim=1] values,
                     IntIndex sparse_index,
                     ndarray[int32_t, ndim=1] indexer):
     pass
+
+
+# -----------------------------------------------------------------------------
+# SparseArray mask create operations
+
+def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
+    cdef object value
+    cdef Py_ssize_t i
+    cdef Py_ssize_t new_length = len(arr)
+    cdef ndarray[int8_t, ndim=1] mask
+
+    mask = np.ones(new_length, dtype=np.int8)
+
+    for i in range(new_length):
+        value = arr[i]
+        if value == fill_value and type(value) == type(fill_value):
+            mask[i] = 0
+
+    return mask.view(dtype=np.bool)
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 72bc3fbb3e8d1..3b45a013734c9 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -794,12 +794,7 @@ def make_sparse(arr, kind='block', fill_value=None):
             # element-wise equality check method in numpy doesn't treat
             # each element type, eg. 0, 0.0, and False are treated as
             # same. So we have to check the both of its type and value.
-            mask = np.ones(len(arr), dtype=np.bool)
-            fv_type = type(fill_value)
-
-            for i, x in enumerate(arr):
-                if type(x) is fv_type:
-                    mask[i] = arr[i] != fill_value
+            mask = splib.make_mask_object_ndarray(arr, fill_value)
         else:
             mask = arr != fill_value
 

From 48687b32710ff0be3fa8753aaffe8dd3f76d94d6 Mon Sep 17 00:00:00 2001
From: Licht-T <licht-t@outlook.jp>
Date: Wed, 27 Sep 2017 00:41:19 +0900
Subject: [PATCH 11/11] TST: Add asv tests for the object dtype SparseArray
 with NaN fill value

---
 asv_bench/benchmarks/sparse.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 4fd38fc20d412..a435882bbca71 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -34,8 +34,11 @@ def setup(self):
         self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
         self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
 
-        self.object_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
-        self.object_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
+        self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
+        self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
+
+        self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
+        self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
 
     def make_numeric_array(self, length, dense_size, fill_value, dtype):
         arr = np.array([fill_value] * length, dtype=dtype)
@@ -66,12 +69,20 @@ def time_sparse_array_constructor_float64_1percent(self):
         arr, fill_value, dtype = self.float64_1percent
         SparseArray(arr, fill_value=fill_value, dtype=dtype)
 
-    def time_sparse_array_constructor_object_10percent(self):
-        arr, fill_value, dtype = self.object_10percent
+    def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
         SparseArray(arr, fill_value=fill_value, dtype=dtype)
 
-    def time_sparse_array_constructor_object_1percent(self):
-        arr, fill_value, dtype = self.object_1percent
+    def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
         SparseArray(arr, fill_value=fill_value, dtype=dtype)