From 33281d5a25422e5b129752b1072117bb9a0ae2f9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 19 Feb 2020 07:44:39 -0600
Subject: [PATCH 1/3] BUG: Pickle NA objects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

According to
https://docs.python.org/3/library/pickle.html#object.__reduce__,

> If a string is returned, the string should be interpreted as the name
> of a global variable. It should be the object’s local name relative to
> its module; the pickle module searches the module namespace to determine
> the object’s module. This behaviour is typically useful for singletons.

Closes https://github.com/pandas-dev/pandas/issues/31847
---
 doc/source/whatsnew/v1.0.2.rst        | 1 +
 pandas/_libs/missing.pyx              | 3 +++
 pandas/tests/scalar/test_na_scalar.py | 8 ++++++++
 3 files changed, 12 insertions(+)

diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst
index c9031ac1ae9fe..0201f29fa87e4 100644
--- a/doc/source/whatsnew/v1.0.2.rst
+++ b/doc/source/whatsnew/v1.0.2.rst
@@ -37,6 +37,7 @@ Bug fixes
 **I/O**
 
 - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`)
+- Fixed pickling of ``pandas.NA``. Previously a new object was returned, which broke computations relying on ``NA`` being a singleton (:issue:`31847`)
 - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`).
 
 
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index 4d17a6f883c1c..c54cb652d7b21 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -364,6 +364,9 @@ class NAType(C_NAType):
         exponent = 31 if is_32bit else 61
         return 2 ** exponent - 1
 
+    def __reduce__(self):
+        return "NA"
+
     # Binary arithmetic and comparison ops -> propagate
 
     __add__ = _create_binary_propagating_op("__add__")
diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py
index dcb9d66708724..0bb7d3e547c9c 100644
--- a/pandas/tests/scalar/test_na_scalar.py
+++ b/pandas/tests/scalar/test_na_scalar.py
@@ -1,3 +1,5 @@
+import pickle
+
 import numpy as np
 import pytest
 
@@ -267,3 +269,9 @@ def test_integer_hash_collision_set():
     assert len(result) == 2
     assert NA in result
     assert hash(NA) in result
+
+
+def test_pickle_roundtrip():
+    # https://github.com/pandas-dev/pandas/issues/31847
+    result = pickle.loads(pickle.dumps(pd.NA))
+    assert result is pd.NA

From 8827a504284158e2ead0f76be3202959ce643bda Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 19 Feb 2020 08:56:00 -0600
Subject: [PATCH 2/3] more

---
 pandas/tests/scalar/test_na_scalar.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py
index 0bb7d3e547c9c..6dff0eb8ae91f 100644
--- a/pandas/tests/scalar/test_na_scalar.py
+++ b/pandas/tests/scalar/test_na_scalar.py
@@ -275,3 +275,23 @@ def test_pickle_roundtrip():
     # https://github.com/pandas-dev/pandas/issues/31847
     result = pickle.loads(pickle.dumps(pd.NA))
     assert result is pd.NA
+
+
+def test_pickle_roundtrip_pandas():
+    with tm.ensure_clean("data.pkl") as f:
+        with open(f):
+            pd.to_pickle(pd.NA, f)
+        with open(f):
+            result = pd.read_pickle(f)
+    assert result is pd.NA
+
+
+def test_pickle_roundtrip_series():
+    s = pd.Series(pd.array([1, 2, pd.NA]))
+    with tm.ensure_clean("data.pkl") as f:
+        with open(f):
+            pd.to_pickle(s, f)
+        with open(f):
+            result = pd.read_pickle(f)
+
+    tm.assert_series_equal(result, s)

From d3c6bce9b3e3310d4d6077b3edb6040a2c5a181a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 2 Mar 2020 11:24:10 -0600
Subject: [PATCH 3/3] fixup

---
 pandas/tests/scalar/test_na_scalar.py | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/pandas/tests/scalar/test_na_scalar.py b/pandas/tests/scalar/test_na_scalar.py
index 6dff0eb8ae91f..07656de2e9062 100644
--- a/pandas/tests/scalar/test_na_scalar.py
+++ b/pandas/tests/scalar/test_na_scalar.py
@@ -278,20 +278,17 @@ def test_pickle_roundtrip():
 
 
 def test_pickle_roundtrip_pandas():
-    with tm.ensure_clean("data.pkl") as f:
-        with open(f):
-            pd.to_pickle(pd.NA, f)
-        with open(f):
-            result = pd.read_pickle(f)
+    result = tm.round_trip_pickle(pd.NA)
     assert result is pd.NA
 
 
-def test_pickle_roundtrip_series():
-    s = pd.Series(pd.array([1, 2, pd.NA]))
-    with tm.ensure_clean("data.pkl") as f:
-        with open(f):
-            pd.to_pickle(s, f)
-        with open(f):
-            result = pd.read_pickle(f)
-
-    tm.assert_series_equal(result, s)
+@pytest.mark.parametrize(
+    "values, dtype", [([1, 2, pd.NA], "Int64"), (["A", "B", pd.NA], "string")]
+)
+@pytest.mark.parametrize("as_frame", [True, False])
+def test_pickle_roundtrip_containers(as_frame, values, dtype):
+    s = pd.Series(pd.array(values, dtype=dtype))
+    if as_frame:
+        s = s.to_frame(name="A")
+    result = tm.round_trip_pickle(s)
+    tm.assert_equal(result, s)