Skip to content

REF: eliminate method _write() in json writers #36218

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 22, 2020
150 changes: 44 additions & 106 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
from abc import ABC, abstractmethod
from collections import abc
import functools
from io import BytesIO, StringIO
from itertools import islice
import os
from typing import IO, Any, Callable, List, Optional, Tuple, Type
from typing import IO, Any, Callable, List, Mapping, Optional, Tuple, Type, Union

import numpy as np

import pandas._libs.json as json
from pandas._libs.tslibs import iNaT
from pandas._typing import CompressionOptions, JSONSerializable, StorageOptions
from pandas._typing import (
CompressionOptions,
IndexLabel,
JSONSerializable,
StorageOptions,
)
from pandas.errors import AbstractMethodError
from pandas.util._decorators import deprecate_kwarg, deprecate_nonkeyword_arguments

from pandas.core.dtypes.common import ensure_str, is_period_dtype

from pandas import DataFrame, MultiIndex, Series, isna, to_datetime
from pandas.core.construction import create_series_with_explicit_dtype
from pandas.core.generic import NDFrame
from pandas.core.reshape.concat import concat

from pandas.io.common import get_compression_method, get_filepath_or_buffer, get_handle
Expand All @@ -33,7 +40,7 @@
# interface to/from
def to_json(
path_or_buf,
obj,
obj: NDFrame,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be FrameOrSeries?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with FrameOrSeries is that it does not allow changing the type of return value.
Inside the function to_json there is a statement, which changes object from series to frame, which is not allowed.

    if orient == "table" and isinstance(obj, Series):
        obj = obj.to_frame(name=obj.name or "values")

mypy error:

pandas/io/json/_json.py:78: error: Incompatible types in assignment (expression has type "DataFrame", variable has type "FrameOrSeries")  [assignment]

I tried to use FrameOrSeriesUnion, which allows changing type from series to frame, but I get another mypy error.
Apparently for NDFrame only FrameOrSeries is a correct casting.

pandas/core/generic.py:2159: error: The erased type of self "Union[pandas.core.frame.DataFrame, pandas.core.series.Series]" is not a supertype of its class "pandas.core.generic.NDFrame"  [misc]

So, I have no clue how to deal with typing here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@WillAyd i expected FrameOrSeriesUnion would work for this, am i missing something?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ivanovmg can you merge master and update the error message here? I don't think that line number is reflective of the latest code base

I think the error message is stemming from a mixture of using a Union and a TypeVar (the latter being somewhere in generic). Ideally should be cleaned up but I don't think needs to hold up this PR

Copy link
Member Author

@ivanovmg ivanovmg Oct 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here we go.
I merged master, then modified pandas/core/generic.py.

line 2164:

    def to_json(
        self: FrameOrSeriesUnion,
        path_or_buf: Optional[FilePathOrBuffer] = None,

And replaced NDFrame with FrameOrSeriesUnion in _json.py.

The error would be:

pandas/core/generic.py:2164: error: The erased type of self "Union[pandas.core.frame.DataFrame, pandas.core.series.Series]" is not a supertype of its class "pandas.core.generic.NDFrame"  [misc]

However, if I do not touch generic.py, then the following error with be thrown.

pandas/core/generic.py:2437: error: Argument "obj" to "to_json" has incompatible type "NDFrame"; expected "Union[DataFrame, Series]"  [arg-type]

orient: Optional[str] = None,
date_format: str = "epoch",
double_precision: int = 10,
Expand Down Expand Up @@ -110,7 +117,7 @@ def to_json(
path_or_buf.close()


class Writer:
class Writer(ABC):
_default_orient: str

def __init__(
Expand Down Expand Up @@ -146,75 +153,52 @@ def _format_axes(self):
raise AbstractMethodError(self)

def write(self):
return self._write(
self.obj,
self.orient,
self.double_precision,
self.ensure_ascii,
self.date_unit,
self.date_format == "iso",
self.default_handler,
self.indent,
)

def _write(
self,
obj,
orient: Optional[str],
double_precision: int,
ensure_ascii: bool,
date_unit: str,
iso_dates: bool,
default_handler: Optional[Callable[[Any], JSONSerializable]],
indent: int,
):
iso_dates = self.date_format == "iso"
return dumps(
obj,
orient=orient,
double_precision=double_precision,
ensure_ascii=ensure_ascii,
date_unit=date_unit,
self.obj_to_write,
orient=self.orient,
double_precision=self.double_precision,
ensure_ascii=self.ensure_ascii,
date_unit=self.date_unit,
iso_dates=iso_dates,
default_handler=default_handler,
indent=indent,
default_handler=self.default_handler,
indent=self.indent,
)

@property
@abstractmethod
def obj_to_write(self) -> Union[NDFrame, Mapping[IndexLabel, Any]]:
"""Object to write in JSON format."""
pass

Copy link
Member

@jbrockmendel jbrockmendel Oct 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you explicitly pass here

are the keys in the Dict necessarily str, or can they be any Index entries?

Copy link
Member Author

@ivanovmg ivanovmg Oct 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yes, actually the keys in the dictionary can be of IndexLabel type.
In FrameWriter the resulting dictionary keys can be tuples of strings in case of multicolumns.
In other cases the keys seem to be only strings.


class SeriesWriter(Writer):
_default_orient = "index"

@property
def obj_to_write(self) -> Union[NDFrame, Mapping[IndexLabel, Any]]:
if not self.index and self.orient == "split":
return {"name": self.obj.name, "data": self.obj.values}
else:
return self.obj

def _format_axes(self):
if not self.obj.index.is_unique and self.orient == "index":
raise ValueError(f"Series index must be unique for orient='{self.orient}'")

def _write(
self,
obj,
orient: Optional[str],
double_precision: int,
ensure_ascii: bool,
date_unit: str,
iso_dates: bool,
default_handler: Optional[Callable[[Any], JSONSerializable]],
indent: int,
):
if not self.index and orient == "split":
obj = {"name": obj.name, "data": obj.values}
return super()._write(
obj,
orient,
double_precision,
ensure_ascii,
date_unit,
iso_dates,
default_handler,
indent,
)


class FrameWriter(Writer):
_default_orient = "columns"

@property
def obj_to_write(self) -> Union[NDFrame, Mapping[IndexLabel, Any]]:
if not self.index and self.orient == "split":
obj_to_write = self.obj.to_dict(orient="split")
del obj_to_write["index"]
else:
obj_to_write = self.obj
return obj_to_write

def _format_axes(self):
"""
Try to format axes if they are datelike.
Expand All @@ -232,31 +216,6 @@ def _format_axes(self):
f"DataFrame columns must be unique for orient='{self.orient}'."
)

def _write(
self,
obj,
orient: Optional[str],
double_precision: int,
ensure_ascii: bool,
date_unit: str,
iso_dates: bool,
default_handler: Optional[Callable[[Any], JSONSerializable]],
indent: int,
):
if not self.index and orient == "split":
obj = obj.to_dict(orient="split")
del obj["index"]
return super()._write(
obj,
orient,
double_precision,
ensure_ascii,
date_unit,
iso_dates,
default_handler,
indent,
)


class JSONTableWriter(FrameWriter):
_default_orient = "records"
Expand Down Expand Up @@ -331,30 +290,9 @@ def __init__(
self.orient = "records"
self.index = index

def _write(
self,
obj,
orient,
double_precision,
ensure_ascii,
date_unit,
iso_dates,
default_handler,
indent,
):
table_obj = {"schema": self.schema, "data": obj}
serialized = super()._write(
table_obj,
orient,
double_precision,
ensure_ascii,
date_unit,
iso_dates,
default_handler,
indent,
)

return serialized
@property
def obj_to_write(self) -> Union[NDFrame, Mapping[IndexLabel, Any]]:
return {"schema": self.schema, "data": self.obj}


@deprecate_kwarg(old_arg_name="numpy", new_arg_name=None)
Expand Down