From e232067e75170f4d00161f7bce51a952155d1942 Mon Sep 17 00:00:00 2001
From: Kenneth <24253983+aerymilts@users.noreply.github.com>
Date: Sun, 18 Mar 2018 18:44:40 +0800
Subject: [PATCH 1/2] BUG: fixed json_normalize for subrecords with NoneTypes

TST: additional coverage for the test cases

DOC: added changes to whatsnew/v0.23.0.txt

BUG: changed how nan is declared for backward compatibility - python2.7
---
 doc/source/whatsnew/v0.23.0.txt        |  1 +
 pandas/io/json/normalize.py            |  5 ++-
 pandas/tests/io/json/test_normalize.py | 49 ++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index cfe28edd175b6..f0fc62f455fd1 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -979,6 +979,7 @@ I/O
 - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`, :issue:`9155`, :issue:`19900`)
 - Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`)
 - Bug in :func:`read_pickle` when unpickling objects with :class:`TimedeltaIndex` or :class:`Float64Index` created with pandas prior to version 0.20 (:issue:`19939`)
+- Bug in :meth:`pandas.io.json.json_normalize` where subrecords are not properly normalized if any subrecords values are NoneType (:issue:`20030`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index c7901f4352d00..549204abd3caf 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -80,6 +80,8 @@ def nested_to_record(ds, prefix="", sep=".", level=0):
                 if level != 0:  # so we skip copying for top level, common case
                     v = new_d.pop(k)
                     new_d[newkey] = v
+                if v is None:  # pop the key if the value is None
+                    new_d.pop(k)
                 continue
             else:
                 v = new_d.pop(k)
@@ -189,7 +191,8 @@ def _pull_field(js, spec):
         data = [data]
 
     if record_path is None:
-        if any(isinstance(x, dict) for x in compat.itervalues(data[0])):
+        if any([[isinstance(x, dict)
+                for x in compat.itervalues(y)] for y in data]):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 1cceae32cd748..3249f8ed78c7f 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -54,6 +54,17 @@ def state_data():
          'state': 'Ohio'}]
 
 
+@pytest.fixture
+def author_missing_data():
+    return [
+        {'info': None},
+        {'info':
+            {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
+            'author_name':
+         {'first': 'Jane', 'last_name': 'Doe'}
+         }]
+
+
 class TestJSONNormalize(object):
 
     def test_simple_records(self):
@@ -226,6 +237,21 @@ def test_non_ascii_key(self):
         result = json_normalize(json.loads(testjson))
         tm.assert_frame_equal(result, expected)
 
+    def test_missing_field(self, author_missing_data):
+        result = json_normalize(author_missing_data)
+        ex_data = [
+            {'author_name.first': float('nan'),
+             'author_name.last_name': float('nan'),
+             'info.created_at': float('nan'),
+             'info.last_updated': float('nan')},
+            {'author_name.first': 'Jane',
+             'author_name.last_name': 'Doe',
+             'info.created_at': '11/08/1993',
+             'info.last_updated': '26/05/2012'}
+        ]
+        expected = DataFrame(ex_data)
+        tm.assert_frame_equal(result, expected)
+
 
 class TestNestedToRecord(object):
 
@@ -322,3 +348,26 @@ def test_json_normalize_errors(self):
                             ['general', 'trade_version']],
                       errors='raise'
                       )
+
+    def test_nonetype_dropping(self):
+        data = [
+            {'info': None,
+             'author_name':
+             {'first': 'Smith', 'last_name': 'Appleseed'}
+             },
+            {'info':
+                {'created_at': '11/08/1993', 'last_updated': '26/05/2012'},
+             'author_name':
+                {'first': 'Jane', 'last_name': 'Doe'}
+             }
+        ]
+        result = nested_to_record(data)
+        expected = [
+            {'author_name.first': 'Smith',
+             'author_name.last_name': 'Appleseed'},
+            {'author_name.first': 'Jane',
+             'author_name.last_name': 'Doe',
+             'info.created_at': '11/08/1993',
+             'info.last_updated': '26/05/2012'}]
+
+        assert result == expected

From 76d2d910e995d1c4f0a7ce649b786d53e0faefab Mon Sep 17 00:00:00 2001
From: Kenneth <24253983+aerymilts@users.noreply.github.com>
Date: Tue, 20 Mar 2018 10:31:01 +0800
Subject: [PATCH 2/2] TST: updated test purposes and how nan is declared
 (#20030)

---
 pandas/tests/io/json/test_normalize.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
index 3249f8ed78c7f..0fabaf747b6de 100644
--- a/pandas/tests/io/json/test_normalize.py
+++ b/pandas/tests/io/json/test_normalize.py
@@ -238,12 +238,14 @@ def test_non_ascii_key(self):
         tm.assert_frame_equal(result, expected)
 
     def test_missing_field(self, author_missing_data):
+        # GH20030: Checks for robustness of json_normalize - should
+        # unnest records where only the first record has a None value
         result = json_normalize(author_missing_data)
         ex_data = [
-            {'author_name.first': float('nan'),
-             'author_name.last_name': float('nan'),
-             'info.created_at': float('nan'),
-             'info.last_updated': float('nan')},
+            {'author_name.first': np.nan,
+             'author_name.last_name': np.nan,
+             'info.created_at': np.nan,
+             'info.last_updated': np.nan},
             {'author_name.first': 'Jane',
              'author_name.last_name': 'Doe',
              'info.created_at': '11/08/1993',
@@ -350,6 +352,8 @@ def test_json_normalize_errors(self):
                       )
 
     def test_nonetype_dropping(self):
+        # GH20030: Checks that None values are dropped in nested_to_record
+        # to prevent additional columns of nans when passed to DataFrame
         data = [
             {'info': None,
              'author_name':