From bcfbf186725efde89aa003decddb7137e73377b6 Mon Sep 17 00:00:00 2001
From: dickreuter <dickreuter@yahoo.com>
Date: Wed, 26 Oct 2016 21:14:55 +0100
Subject: [PATCH 1/5] Avoids exception when pandas.io.json.json_normalize
 contains items in meta parameter that don't always occur in every item of the
 list Added documentation and test for issue #14505 Added keyword errors
 {'raise'|'ignore} Shortened what's new Removed commas in dictionary for
 linting compatibility Updated doc

---
 pandas/io/json.py                      | 15 +++++++-
 pandas/io/tests/json/test_json_norm.py | 53 ++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/pandas/io/json.py b/pandas/io/json.py
index 878506a6ddc05..ab4c30df1b4c8 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -723,7 +723,9 @@ def nested_to_record(ds, prefix="", level=0):
 
 def json_normalize(data, record_path=None, meta=None,
                    meta_prefix=None,
-                   record_prefix=None):
+                   record_prefix=None,
+                   errors='raise'):
+
     """
     "Normalize" semi-structured JSON data into a flat table
 
@@ -740,6 +742,8 @@ def json_normalize(data, record_path=None, meta=None,
         If True, prefix records with dotted (?) path, e.g. foo.bar.field if
         path to records is ['foo', 'bar']
     meta_prefix : string, default None
+    error: {'raise', 'ignore'}, default 'raise'
+        * ignore: will ignore keyErrors if keys listed in meta are not always present
 
     Returns
     -------
@@ -839,7 +843,14 @@ def _recursive_extract(data, path, seen_meta, level=0):
                     if level + 1 > len(val):
                         meta_val = seen_meta[key]
                     else:
-                        meta_val = _pull_field(obj, val[level:])
+                        try:
+                            meta_val = _pull_field(obj, val[level:])
+                        except KeyError as e:
+                            if errors == 'ignore':
+                                meta_val = np.nan
+                            else:
+                                raise KeyError(
+                                    "Try running with errors='ignore' as the following key may not always be present: " + str(e))
                     meta_vals[key].append(meta_val)
 
                 records.extend(recs)
diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py
index 4848db97194d9..4877728d9ec52 100644
--- a/pandas/io/tests/json/test_json_norm.py
+++ b/pandas/io/tests/json/test_json_norm.py
@@ -225,6 +225,59 @@ def test_nested_flattens(self):
 
         self.assertEqual(result, expected)
 
+
+    def test_json_normalise_fix(self):
+        # issue 14505
+        j = {
+            "Trades": [{
+                "general": {
+                    "tradeid": 100,
+                    "trade_version": 1,
+                    "stocks": [{
+
+                        "symbol": "AAPL",
+                        "name": "Apple",
+                        "price": "0"
+
+                    }, {
+
+                        "symbol": "GOOG",
+                        "name": "Google",
+                        "price": "0"
+
+                    }
+                    ]
+                }
+            }, {
+                "general": {
+                    "tradeid": 100,
+                    "stocks": [{
+
+                        "symbol": "AAPL",
+                        "name": "Apple",
+                        "price": "0"
+
+                    }, {
+                        "symbol": "GOOG",
+                        "name": "Google",
+                        "price": "0"
+
+                    }
+                    ]
+                }
+            }
+            ]
+        }
+        j = json_normalize(data=j['Trades'], record_path=[['general', 'stocks']],
+                           meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='ignore')
+        expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
+         'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
+         'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
+         'price': {0: '0', 1: '0', 2: '0', 3: '0'},
+         'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
+
+        self.assertEqual(j.fillna('').to_dict(), expected)
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
                          '--pdb-failure', '-s'], exit=False)

From d29858878f6c4f4be59307867004e75c41e6709a Mon Sep 17 00:00:00 2001
From: dickreuter <dickreuter@yahoo.com>
Date: Fri, 18 Nov 2016 17:19:11 +0000
Subject: [PATCH 2/5] Fixed as instructed in pull request page

---
 doc/source/whatsnew/v0.20.0.txt        |  2 ++
 pandas/io/json.py                      |  5 +++--
 pandas/io/tests/json/test_json_norm.py | 14 ++++++++++----
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 8d88a7b4fb215..33d17a193c791 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -61,6 +61,8 @@ Other enhancements
 - The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value  (:issue:`14154`)
 - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
 - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
+- ``pandas.io.json.json_normalize`` If meta keys are not always present a new option to set errors="ignore" (:issue:`14583`)
+
 
 .. _whatsnew_0200.api_breaking:
 
diff --git a/pandas/io/json.py b/pandas/io/json.py
index ab4c30df1b4c8..6066cdc92f10d 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -849,8 +849,9 @@ def _recursive_extract(data, path, seen_meta, level=0):
                             if errors == 'ignore':
                                 meta_val = np.nan
                             else:
-                                raise KeyError(
-                                    "Try running with errors='ignore' as the following key may not always be present: " + str(e))
+                                raise \
+                                    KeyError(
+                                    "Try running with errors='ignore' as key may not always be present: %s", e)
                     meta_vals[key].append(meta_val)
 
                 records.extend(recs)
diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py
index 4877728d9ec52..fdbb4c699882e 100644
--- a/pandas/io/tests/json/test_json_norm.py
+++ b/pandas/io/tests/json/test_json_norm.py
@@ -226,9 +226,9 @@ def test_nested_flattens(self):
         self.assertEqual(result, expected)
 
 
-    def test_json_normalise_fix(self):
-        # issue 14505
-        j = {
+    def test_json_normalize_errors(self):
+        # If meta keys are not always present a new option to set errors='ignore' has been implemented (:issue:`14583`)
+        i = {
             "Trades": [{
                 "general": {
                     "tradeid": 100,
@@ -268,7 +268,7 @@ def test_json_normalise_fix(self):
             }
             ]
         }
-        j = json_normalize(data=j['Trades'], record_path=[['general', 'stocks']],
+        j = json_normalize(data=i['Trades'], record_path=[['general', 'stocks']],
                            meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='ignore')
         expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
          'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
@@ -278,6 +278,12 @@ def test_json_normalise_fix(self):
 
         self.assertEqual(j.fillna('').to_dict(), expected)
 
+        self.assertRaises(KeyError,
+                json_normalize, data=i['Trades'], record_path=[['general', 'stocks']],
+                           meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='raise'
+                          )
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb',
                          '--pdb-failure', '-s'], exit=False)

From 2028924c8ef63bbb9f9f39752d525cc12b77e1df Mon Sep 17 00:00:00 2001
From: dickreuter <dickreuter@yahoo.com>
Date: Fri, 2 Dec 2016 15:14:51 +0000
Subject: [PATCH 3/5] doc changes

---
 doc/source/whatsnew/v0.20.0.txt        | 1 +
 pandas/io/json.py                      | 7 +++----
 pandas/io/tests/json/test_json_norm.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 33d17a193c791..6b9f380ded918 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -62,6 +62,7 @@ Other enhancements
 - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
 - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
 - ``pandas.io.json.json_normalize`` If meta keys are not always present a new option to set errors="ignore" (:issue:`14583`)
+- ``pandas.io.json.json_normalize`` gained the option ``errors='ignore'|raise``; the default is raise and is backward compatible. (:issue:`14583`)
 
 
 .. _whatsnew_0200.api_breaking:
diff --git a/pandas/io/json.py b/pandas/io/json.py
index 6066cdc92f10d..327c764d08541 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -743,7 +743,8 @@ def json_normalize(data, record_path=None, meta=None,
         path to records is ['foo', 'bar']
     meta_prefix : string, default None
     error: {'raise', 'ignore'}, default 'raise'
-        * ignore: will ignore keyErrors if keys listed in meta are not always present
+        * ignore: will ignore KeyError if keys listed in meta are not always present
+        * raise: will raise KeyError if keys listed in meta are not always present
 
     Returns
     -------
@@ -849,9 +850,7 @@ def _recursive_extract(data, path, seen_meta, level=0):
                             if errors == 'ignore':
                                 meta_val = np.nan
                             else:
-                                raise \
-                                    KeyError(
-                                    "Try running with errors='ignore' as key may not always be present: %s", e)
+                                raise KeyError("Try running with errors='ignore' as key %s is not always present.", e)
                     meta_vals[key].append(meta_val)
 
                 records.extend(recs)
diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py
index fdbb4c699882e..ff2627c82d0c9 100644
--- a/pandas/io/tests/json/test_json_norm.py
+++ b/pandas/io/tests/json/test_json_norm.py
@@ -227,7 +227,7 @@ def test_nested_flattens(self):
 
 
     def test_json_normalize_errors(self):
-        # If meta keys are not always present a new option to set errors='ignore' has been implemented (:issue:`14583`)
+        # GH14583: If meta keys are not always present a new option to set errors='ignore' has been implemented
         i = {
             "Trades": [{
                 "general": {

From 3c942065525b1a1b76387e11a34a14b878d34024 Mon Sep 17 00:00:00 2001
From: dickreuter <dickreuter@yahoo.com>
Date: Thu, 8 Dec 2016 00:16:43 +0000
Subject: [PATCH 4/5] shortened lines to pass linting

---
 doc/source/whatsnew/v0.20.0.txt        |  2 --
 pandas/io/json.py                      | 11 ++++++++---
 pandas/io/tests/json/test_json_norm.py | 15 ++++++---------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 6b9f380ded918..7efaaaa39871c 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -9,7 +9,6 @@ users upgrade to this version.
 
 Highlights include:
 
-- Building pandas for development now requires ``cython >= 0.23`` (:issue:`14831`)
 
 Check the :ref:`API Changes <whatsnew_0200.api_breaking>` and :ref:`deprecations <whatsnew_0200.deprecations>` before updating.
 
@@ -61,7 +60,6 @@ Other enhancements
 - The ``usecols`` argument in ``pd.read_csv`` now accepts a callable function as a value  (:issue:`14154`)
 - ``pd.DataFrame.plot`` now prints a title above each subplot if ``suplots=True`` and ``title`` is a list of strings (:issue:`14753`)
 - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
-- ``pandas.io.json.json_normalize`` If meta keys are not always present a new option to set errors="ignore" (:issue:`14583`)
 - ``pandas.io.json.json_normalize`` gained the option ``errors='ignore'|raise``; the default is raise and is backward compatible. (:issue:`14583`)
 
 
diff --git a/pandas/io/json.py b/pandas/io/json.py
index 327c764d08541..ead111e4724f0 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -743,8 +743,12 @@ def json_normalize(data, record_path=None, meta=None,
         path to records is ['foo', 'bar']
     meta_prefix : string, default None
     error: {'raise', 'ignore'}, default 'raise'
-        * ignore: will ignore KeyError if keys listed in meta are not always present
-        * raise: will raise KeyError if keys listed in meta are not always present
+        * ignore: will ignore KeyError if keys listed in meta are not
+        always present
+        * raise: will raise KeyError if keys listed in meta are not
+        always present
+        
+        .. versionadded:: 0.20.0
 
     Returns
     -------
@@ -850,7 +854,8 @@ def _recursive_extract(data, path, seen_meta, level=0):
                             if errors == 'ignore':
                                 meta_val = np.nan
                             else:
-                                raise KeyError("Try running with errors='ignore' as key %s is not always present.", e)
+                                raise KeyError("Try running with errors='ignore'"
+                                               "as key %s is not always present.", e)
                     meta_vals[key].append(meta_val)
 
                 records.extend(recs)
diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py
index ff2627c82d0c9..85815e9c74d28 100644
--- a/pandas/io/tests/json/test_json_norm.py
+++ b/pandas/io/tests/json/test_json_norm.py
@@ -227,7 +227,8 @@ def test_nested_flattens(self):
 
 
     def test_json_normalize_errors(self):
-        # GH14583: If meta keys are not always present a new option to set errors='ignore' has been implemented
+        # GH14583: If meta keys are not always present
+        # a new option to set errors='ignore' has been implemented
         i = {
             "Trades": [{
                 "general": {
@@ -238,13 +239,10 @@ def test_json_normalize_errors(self):
                         "symbol": "AAPL",
                         "name": "Apple",
                         "price": "0"
-
                     }, {
-
                         "symbol": "GOOG",
                         "name": "Google",
                         "price": "0"
-
                     }
                     ]
                 }
@@ -252,16 +250,13 @@ def test_json_normalize_errors(self):
                 "general": {
                     "tradeid": 100,
                     "stocks": [{
-
                         "symbol": "AAPL",
                         "name": "Apple",
                         "price": "0"
-
                     }, {
                         "symbol": "GOOG",
                         "name": "Google",
                         "price": "0"
-
                     }
                     ]
                 }
@@ -269,7 +264,8 @@ def test_json_normalize_errors(self):
             ]
         }
         j = json_normalize(data=i['Trades'], record_path=[['general', 'stocks']],
-                           meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='ignore')
+                           meta=[['general', 'tradeid'], ['general', 'trade_version']],
+                           errors='ignore')
         expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
          'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
          'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
@@ -280,7 +276,8 @@ def test_json_normalize_errors(self):
 
         self.assertRaises(KeyError,
                 json_normalize, data=i['Trades'], record_path=[['general', 'stocks']],
-                           meta=[['general', 'tradeid'], ['general', 'trade_version']], errors='raise'
+                           meta=[['general', 'tradeid'], ['general', 'trade_version']],
+                          errors='raise'
                           )
 
 

From 701c14092267425542aa8eb367298c035d9e8610 Mon Sep 17 00:00:00 2001
From: dickreuter <dickreuter@yahoo.com>
Date: Sun, 11 Dec 2016 03:40:47 +0000
Subject: [PATCH 5/5] adjusted formatting

---
 pandas/io/json.py                      |  7 ++++---
 pandas/io/tests/json/test_json_norm.py | 23 +++++++++++++----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/pandas/io/json.py b/pandas/io/json.py
index ead111e4724f0..da540a8797578 100644
--- a/pandas/io/json.py
+++ b/pandas/io/json.py
@@ -747,7 +747,6 @@ def json_normalize(data, record_path=None, meta=None,
         always present
         * raise: will raise KeyError if keys listed in meta are not
         always present
-        
         .. versionadded:: 0.20.0
 
     Returns
@@ -854,8 +853,10 @@ def _recursive_extract(data, path, seen_meta, level=0):
                             if errors == 'ignore':
                                 meta_val = np.nan
                             else:
-                                raise KeyError("Try running with errors='ignore'"
-                                               "as key %s is not always present.", e)
+                                raise \
+                                    KeyError("Try running with "
+                                             "errors='ignore' as key "
+                                             "%s is not always present", e)
                     meta_vals[key].append(meta_val)
 
                 records.extend(recs)
diff --git a/pandas/io/tests/json/test_json_norm.py b/pandas/io/tests/json/test_json_norm.py
index 85815e9c74d28..36110898448ea 100644
--- a/pandas/io/tests/json/test_json_norm.py
+++ b/pandas/io/tests/json/test_json_norm.py
@@ -225,7 +225,6 @@ def test_nested_flattens(self):
 
         self.assertEqual(result, expected)
 
-
     def test_json_normalize_errors(self):
         # GH14583: If meta keys are not always present
         # a new option to set errors='ignore' has been implemented
@@ -263,20 +262,24 @@ def test_json_normalize_errors(self):
             }
             ]
         }
-        j = json_normalize(data=i['Trades'], record_path=[['general', 'stocks']],
-                           meta=[['general', 'tradeid'], ['general', 'trade_version']],
+        j = json_normalize(data=i['Trades'],
+                           record_path=[['general', 'stocks']],
+                           meta=[['general', 'tradeid'],
+                                 ['general', 'trade_version']],
                            errors='ignore')
-        expected={'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
-         'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
-         'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
-         'price': {0: '0', 1: '0', 2: '0', 3: '0'},
-         'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
+        expected = {'general.trade_version': {0: 1.0, 1: 1.0, 2: '', 3: ''},
+                    'general.tradeid': {0: 100, 1: 100, 2: 100, 3: 100},
+                    'name': {0: 'Apple', 1: 'Google', 2: 'Apple', 3: 'Google'},
+                    'price': {0: '0', 1: '0', 2: '0', 3: '0'},
+                    'symbol': {0: 'AAPL', 1: 'GOOG', 2: 'AAPL', 3: 'GOOG'}}
 
         self.assertEqual(j.fillna('').to_dict(), expected)
 
         self.assertRaises(KeyError,
-                json_normalize, data=i['Trades'], record_path=[['general', 'stocks']],
-                           meta=[['general', 'tradeid'], ['general', 'trade_version']],
+                          json_normalize, data=i['Trades'],
+                          record_path=[['general', 'stocks']],
+                          meta=[['general', 'tradeid'],
+                                ['general', 'trade_version']],
                           errors='raise'
                           )