Skip to content

Commit fa3deef

Browse files
committed
CLN: reorg pandas/io/json to sub-dirs
1 parent f93714b commit fa3deef

File tree

5 files changed

+256
-246
lines changed

5 files changed

+256
-246
lines changed

pandas/io/json/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .json import to_json, read_json, loads, dumps # noqa
2+
from .normalize import json_normalize # noqa
3+
4+
del json, normalize # noqa

pandas/io/json.py renamed to pandas/io/json/json.py

+1-245
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# pylint: disable-msg=E1101,W0613,W0603
22

33
import os
4-
import copy
5-
from collections import defaultdict
64
import numpy as np
75

86
import pandas.json as _json
@@ -13,6 +11,7 @@
1311
from pandas.io.common import get_filepath_or_buffer, _get_handle
1412
from pandas.core.common import AbstractMethodError
1513
from pandas.formats.printing import pprint_thing
14+
from .normalize import _convert_to_line_delimits
1615

1716
loads = _json.loads
1817
dumps = _json.dumps
@@ -641,246 +640,3 @@ def is_ok(col):
641640
lambda col, c: self._try_convert_to_date(c),
642641
lambda col, c: ((self.keep_default_dates and is_ok(col)) or
643642
col in convert_dates))
644-
645-
# ---------------------------------------------------------------------
646-
# JSON normalization routines
647-
648-
649-
def _convert_to_line_delimits(s):
650-
"""Helper function that converts json lists to line delimited json."""
651-
652-
# Determine we have a JSON list to turn to lines otherwise just return the
653-
# json object, only lists can
654-
if not s[0] == '[' and s[-1] == ']':
655-
return s
656-
s = s[1:-1]
657-
658-
from pandas.lib import convert_json_to_lines
659-
return convert_json_to_lines(s)
660-
661-
662-
def nested_to_record(ds, prefix="", level=0):
663-
"""a simplified json_normalize
664-
665-
converts a nested dict into a flat dict ("record"), unlike json_normalize,
666-
it does not attempt to extract a subset of the data.
667-
668-
Parameters
669-
----------
670-
ds : dict or list of dicts
671-
prefix: the prefix, optional, default: ""
672-
level: the number of levels in the jason string, optional, default: 0
673-
674-
Returns
675-
-------
676-
d - dict or list of dicts, matching `ds`
677-
678-
Examples
679-
--------
680-
681-
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
682-
nested=dict(e=dict(c=1,d=2),d=2)))
683-
Out[52]:
684-
{'dict1.c': 1,
685-
'dict1.d': 2,
686-
'flat1': 1,
687-
'nested.d': 2,
688-
'nested.e.c': 1,
689-
'nested.e.d': 2}
690-
"""
691-
singleton = False
692-
if isinstance(ds, dict):
693-
ds = [ds]
694-
singleton = True
695-
696-
new_ds = []
697-
for d in ds:
698-
699-
new_d = copy.deepcopy(d)
700-
for k, v in d.items():
701-
# each key gets renamed with prefix
702-
if not isinstance(k, compat.string_types):
703-
k = str(k)
704-
if level == 0:
705-
newkey = k
706-
else:
707-
newkey = prefix + '.' + k
708-
709-
# only dicts gets recurse-flattend
710-
# only at level>1 do we rename the rest of the keys
711-
if not isinstance(v, dict):
712-
if level != 0: # so we skip copying for top level, common case
713-
v = new_d.pop(k)
714-
new_d[newkey] = v
715-
continue
716-
else:
717-
v = new_d.pop(k)
718-
new_d.update(nested_to_record(v, newkey, level + 1))
719-
new_ds.append(new_d)
720-
721-
if singleton:
722-
return new_ds[0]
723-
return new_ds
724-
725-
726-
def json_normalize(data, record_path=None, meta=None,
727-
meta_prefix=None,
728-
record_prefix=None,
729-
errors='raise'):
730-
731-
"""
732-
"Normalize" semi-structured JSON data into a flat table
733-
734-
Parameters
735-
----------
736-
data : dict or list of dicts
737-
Unserialized JSON objects
738-
record_path : string or list of strings, default None
739-
Path in each object to list of records. If not passed, data will be
740-
assumed to be an array of records
741-
meta : list of paths (string or list of strings), default None
742-
Fields to use as metadata for each record in resulting table
743-
record_prefix : string, default None
744-
If True, prefix records with dotted (?) path, e.g. foo.bar.field if
745-
path to records is ['foo', 'bar']
746-
meta_prefix : string, default None
747-
errors : {'raise', 'ignore'}, default 'raise'
748-
749-
* ignore : will ignore KeyError if keys listed in meta are not
750-
always present
751-
* raise : will raise KeyError if keys listed in meta are not
752-
always present
753-
754-
.. versionadded:: 0.20.0
755-
756-
Returns
757-
-------
758-
frame : DataFrame
759-
760-
Examples
761-
--------
762-
763-
>>> data = [{'state': 'Florida',
764-
... 'shortname': 'FL',
765-
... 'info': {
766-
... 'governor': 'Rick Scott'
767-
... },
768-
... 'counties': [{'name': 'Dade', 'population': 12345},
769-
... {'name': 'Broward', 'population': 40000},
770-
... {'name': 'Palm Beach', 'population': 60000}]},
771-
... {'state': 'Ohio',
772-
... 'shortname': 'OH',
773-
... 'info': {
774-
... 'governor': 'John Kasich'
775-
... },
776-
... 'counties': [{'name': 'Summit', 'population': 1234},
777-
... {'name': 'Cuyahoga', 'population': 1337}]}]
778-
>>> from pandas.io.json import json_normalize
779-
>>> result = json_normalize(data, 'counties', ['state', 'shortname',
780-
... ['info', 'governor']])
781-
>>> result
782-
name population info.governor state shortname
783-
0 Dade 12345 Rick Scott Florida FL
784-
1 Broward 40000 Rick Scott Florida FL
785-
2 Palm Beach 60000 Rick Scott Florida FL
786-
3 Summit 1234 John Kasich Ohio OH
787-
4 Cuyahoga 1337 John Kasich Ohio OH
788-
789-
"""
790-
def _pull_field(js, spec):
791-
result = js
792-
if isinstance(spec, list):
793-
for field in spec:
794-
result = result[field]
795-
else:
796-
result = result[spec]
797-
798-
return result
799-
800-
# A bit of a hackjob
801-
if isinstance(data, dict):
802-
data = [data]
803-
804-
if record_path is None:
805-
if any([isinstance(x, dict) for x in compat.itervalues(data[0])]):
806-
# naive normalization, this is idempotent for flat records
807-
# and potentially will inflate the data considerably for
808-
# deeply nested structures:
809-
# {VeryLong: { b: 1,c:2}} -> {VeryLong.b:1 ,VeryLong.c:@}
810-
#
811-
# TODO: handle record value which are lists, at least error
812-
# reasonably
813-
data = nested_to_record(data)
814-
return DataFrame(data)
815-
elif not isinstance(record_path, list):
816-
record_path = [record_path]
817-
818-
if meta is None:
819-
meta = []
820-
elif not isinstance(meta, list):
821-
meta = [meta]
822-
823-
for i, x in enumerate(meta):
824-
if not isinstance(x, list):
825-
meta[i] = [x]
826-
827-
# Disastrously inefficient for now
828-
records = []
829-
lengths = []
830-
831-
meta_vals = defaultdict(list)
832-
meta_keys = ['.'.join(val) for val in meta]
833-
834-
def _recursive_extract(data, path, seen_meta, level=0):
835-
if len(path) > 1:
836-
for obj in data:
837-
for val, key in zip(meta, meta_keys):
838-
if level + 1 == len(val):
839-
seen_meta[key] = _pull_field(obj, val[-1])
840-
841-
_recursive_extract(obj[path[0]], path[1:],
842-
seen_meta, level=level + 1)
843-
else:
844-
for obj in data:
845-
recs = _pull_field(obj, path[0])
846-
847-
# For repeating the metadata later
848-
lengths.append(len(recs))
849-
850-
for val, key in zip(meta, meta_keys):
851-
if level + 1 > len(val):
852-
meta_val = seen_meta[key]
853-
else:
854-
try:
855-
meta_val = _pull_field(obj, val[level:])
856-
except KeyError as e:
857-
if errors == 'ignore':
858-
meta_val = np.nan
859-
else:
860-
raise \
861-
KeyError("Try running with "
862-
"errors='ignore' as key "
863-
"%s is not always present", e)
864-
meta_vals[key].append(meta_val)
865-
866-
records.extend(recs)
867-
868-
_recursive_extract(data, record_path, {}, level=0)
869-
870-
result = DataFrame(records)
871-
872-
if record_prefix is not None:
873-
result.rename(columns=lambda x: record_prefix + x, inplace=True)
874-
875-
# Data types, a problem
876-
for k, v in compat.iteritems(meta_vals):
877-
if meta_prefix is not None:
878-
k = meta_prefix + k
879-
880-
if k in result:
881-
raise ValueError('Conflicting metadata name %s, '
882-
'need distinguishing prefix ' % k)
883-
884-
result[k] = np.array(v).repeat(lengths)
885-
886-
return result

0 commit comments

Comments
 (0)