|
| 1 | +""" feather-format compat """ |
| 2 | + |
| 3 | +from pandas import DataFrame, RangeIndex, MultiIndex, Int64Index |
| 4 | +from pandas.types.common import is_object_dtype |
| 5 | +from pandas.compat import range |
| 6 | +from pandas.lib import infer_dtype |
| 7 | + |
| 8 | + |
| 9 | +def _try_import(): |
| 10 | + # since pandas is a dependency of feather |
| 11 | + # we need to import on first use |
| 12 | + |
| 13 | + try: |
| 14 | + import feather |
| 15 | + except ImportError: |
| 16 | + |
| 17 | + # give a nice error message |
| 18 | + raise ImportError("the feather-format library is not installed\n" |
| 19 | + "you can install via conda\n" |
| 20 | + "conda install feather-format -c conda-forge") |
| 21 | + return feather |
| 22 | + |
| 23 | + |
| 24 | +def to_feather(df, path): |
| 25 | + """ |
| 26 | + Write a DataFrame to the feather-format |
| 27 | +
|
| 28 | + Parameters |
| 29 | + ---------- |
| 30 | + df : DataFrame |
| 31 | + path : string |
| 32 | + File path |
| 33 | + """ |
| 34 | + if not isinstance(df, DataFrame): |
| 35 | + raise ValueError("feather only support IO with DataFrames") |
| 36 | + |
| 37 | + feather = _try_import() |
| 38 | + valid_types = {'string', 'unicode'} |
| 39 | + |
| 40 | + # validate index |
| 41 | + # -------------- |
| 42 | + |
| 43 | + # validate that we have only a default index |
| 44 | + # raise on anything else as we don't serialize the index |
| 45 | + |
| 46 | + if not isinstance(df.index, (RangeIndex, Int64Index)): |
| 47 | + raise ValueError("feather does not serializing {} " |
| 48 | + "for the index; you can .reset_index()" |
| 49 | + "to make the index into column(s)".format( |
| 50 | + type(df.index))) |
| 51 | + |
| 52 | + if not df.index.equals(RangeIndex.from_range(range(len(df)))): |
| 53 | + raise ValueError("feather does not serializing a non-default index " |
| 54 | + "for the index; you can .reset_index()" |
| 55 | + "to make the index into column(s)") |
| 56 | + |
| 57 | + # validate columns |
| 58 | + # ---------------- |
| 59 | + |
| 60 | + # must have unique column names |
| 61 | + if not df.columns.is_unique: |
| 62 | + raise ValueError("feather does not support duplicate columns") |
| 63 | + |
| 64 | + # must be a Index |
| 65 | + if isinstance(df.columns, MultiIndex): |
| 66 | + raise ValueError("feather does not support serializing a " |
| 67 | + "MultiIndex for the columns") |
| 68 | + |
| 69 | + # must have value column names (strings only) |
| 70 | + if df.columns.inferred_type not in valid_types: |
| 71 | + raise ValueError("feather must have string column names") |
| 72 | + |
| 73 | + # validate dtypes |
| 74 | + # --------------- |
| 75 | + |
| 76 | + # validate that we do not have any non-string object dtypes |
| 77 | + # as these 'work', but will not properly de-serialize |
| 78 | + objects = [c for c, dtype in df.dtypes.iteritems() |
| 79 | + if is_object_dtype(dtype)] |
| 80 | + dtypes = [infer_dtype(df[c]) for c in objects] |
| 81 | + if len(set(dtypes) - valid_types): |
| 82 | + invalid = DataFrame([[i, c, dtype] for i, (c, dtype) in |
| 83 | + enumerate(zip(objects, dtypes))]) |
| 84 | + invalid.columns = ['ncolumn', 'column', 'inferred_dtype'] |
| 85 | + invalid = invalid[~invalid.inferred_dtype.isin(list(valid_types))] |
| 86 | + |
| 87 | + msg = ("The following columns are not supported to serialize " |
| 88 | + "to the feather-format:\n\n" |
| 89 | + "{}".format(invalid.to_string())) |
| 90 | + raise ValueError(msg) |
| 91 | + |
| 92 | + feather.write_dataframe(df, path) |
| 93 | + |
| 94 | + |
| 95 | +def read_feather(path): |
| 96 | + """ |
| 97 | + Load a feather-format object from the file path |
| 98 | +
|
| 99 | + Parameters |
| 100 | + ---------- |
| 101 | + path : string |
| 102 | + File path |
| 103 | +
|
| 104 | + Returns |
| 105 | + ------- |
| 106 | + type of object stored in file |
| 107 | + """ |
| 108 | + |
| 109 | + feather = _try_import() |
| 110 | + return feather.read_dataframe(path) |
0 commit comments