ENH: json_normalize now takes a user-specified separator

jowens · jreback · commit 8edc40e548ef · 2017-03-28T15:14:16.000-04:00
closes #14883
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -303,6 +303,7 @@ Other Enhancements
 - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
 - ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
 - The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
+
 - ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
 - ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
 - ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).
@@ -318,6 +319,9 @@ Other Enhancements
 - Re-enable the ``parse_dates`` keyword of ``read_excel`` to parse string columns as dates (:issue:`14326`)
 - Added ``.empty`` property to subclasses of ``Index``. (:issue:`15270`)
 
+- ``pandas.io.json.json_normalize()`` has gained a ``sep`` option that accepts ``str`` to separate joined fields; the default is ".", which is backward compatible. (:issue:`14883`)
+
+
 .. _ISO 8601 duration: https://en.wikipedia.org/wiki/ISO_8601#Durations
 
 
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -67,6 +67,21 @@ def test_empty_array(self):
         expected = DataFrame()
         tm.assert_frame_equal(result, expected)
 
+    def test_simple_normalize_with_default_separator(self):
+        result = json_normalize({'A': {'A': 1, 'B': 2}})
+        expected = DataFrame([[1, 2]], columns={'A.A', 'A.B'})
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize_with_user_specified_separator(self):
+        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep='_')
+        expected = DataFrame([[1, 2]], columns={'A_A', 'A_B'})
+        tm.assert_frame_equal(result, expected)
+
+    def test_simple_normalize_with_user_specified_unicode_separator(self):
+        result = json_normalize({'A': {'A': 1, 'B': 2}}, sep=u'\u03c3')
+        expected = DataFrame([[1, 2]], columns={u'A\u03c3A', u'A\u03c3B'})
+        tm.assert_frame_equal(result, expected)
+
     def test_more_deeply_nested(self):
         data = [{'country': 'USA',
                  'states': [{'name': 'California',