From b4eadf5f1c390cea78187b9ca8f15978ae781758 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 22 Jul 2022 18:45:06 +0100 Subject: [PATCH 01/11] ENH: Improve io/pickle Make pickle consistent with upstream pandas Only include what is in the API --- pandas-stubs/_typing.pyi | 3 +++ pandas-stubs/io/pickle.pyi | 26 +++++++++++++++++--------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 4f43cbbc8..ef4b00bed 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -67,6 +67,9 @@ class BaseBuffer(Protocol): ... class ReadBuffer(BaseBuffer, Protocol[AnyStr_cov]): ... class WriteBuffer(BaseBuffer, Protocol[AnyStr_cov]): ... +class ReadPickleBuffer(ReadBuffer[bytes], Protocol): + def readline(self) -> AnyStr_cov: ... + FilePath = Union[str, PathLike[str]] Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap] diff --git a/pandas-stubs/io/pickle.pyi b/pandas-stubs/io/pickle.pyi index 232916607..67ccd80c9 100644 --- a/pandas-stubs/io/pickle.pyi +++ b/pandas-stubs/io/pickle.pyi @@ -1,16 +1,24 @@ from __future__ import annotations -from typing import Literal +from typing import Any -from pandas._typing import FilePathOrBuffer +from pandas._typing import ( + CompressionOptions, + FilePath, + ReadPickleBuffer, + StorageOptions, + WriteBuffer, +) def to_pickle( - obj, - filepath_or_buffer: FilePathOrBuffer, - compression: str | None = ..., + obj: Any, + filepath_or_buffer: FilePath | WriteBuffer[bytes], + compression: CompressionOptions = ..., protocol: int = ..., -): ... + storage_options: StorageOptions = ..., +) -> None: ... def read_pickle( - filepath_or_buffer_or_reader: FilePathOrBuffer, - compression: str | Literal["infer", "gzip", "bz2", "zip", "xz"] | None = ..., -): ... + filepath_or_buffer: FilePath | ReadPickleBuffer, + compression: CompressionOptions = ..., + storage_options: StorageOptions = ..., +) -> Any: ... From 2e254677854095f42535f91c60b736393e190a9f Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 22 Jul 2022 18:58:41 +0100 Subject: [PATCH 02/11] ENH: Add read_table Fully add read table and supporting classes --- pandas-stubs/_libs/lib.pyi | 9 + pandas-stubs/_typing.pyi | 12 + pandas-stubs/io/parsers/readers.pyi | 503 ++++++++++++++++++++++++++++ 3 files changed, 524 insertions(+) create mode 100644 pandas-stubs/io/parsers/readers.pyi diff --git a/pandas-stubs/_libs/lib.pyi b/pandas-stubs/_libs/lib.pyi index abe06e602..f32ff2f07 100644 --- a/pandas-stubs/_libs/lib.pyi +++ b/pandas-stubs/_libs/lib.pyi @@ -1,5 +1,14 @@ from __future__ import annotations +from enum import Enum + no_default = None +from typing import Literal + +class _NoDefault(Enum): + no_default = ... + +NoDefault = Literal[_NoDefault.no_default] + def infer_dtype(value: object, skipna: bool = ...) -> str: ... diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index ef4b00bed..8268a732f 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -16,6 +16,7 @@ from typing import ( AnyStr, Callable, Hashable, + Iterator, Literal, Mapping, Optional, @@ -70,6 +71,15 @@ class WriteBuffer(BaseBuffer, Protocol[AnyStr_cov]): ... class ReadPickleBuffer(ReadBuffer[bytes], Protocol): def readline(self) -> AnyStr_cov: ... +class WriteExcelBuffer(WriteBuffer[bytes], Protocol): + def truncate(self, size: int | None = ...) -> int: ... + +class ReadCsvBuffer(ReadBuffer[AnyStr_cov], Protocol[AnyStr_cov]): + def __iter__(self) -> Iterator[AnyStr_cov]: ... + def readline(self) -> AnyStr_cov: ... + @property + def closed(self) -> bool: ... + FilePath = Union[str, PathLike[str]] Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap] @@ -211,4 +221,6 @@ GroupByObjectNonScalar = Union[ ] GroupByObject = Union[Scalar, GroupByObjectNonScalar] +CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"] + __all__ = ["npt", "type_t"] diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi new file mode 100644 index 000000000..59ff9acd1 --- /dev/null +++ b/pandas-stubs/io/parsers/readers.pyi @@ -0,0 +1,503 @@ +from collections import abc +import csv +from typing import ( + Any, + Hashable, + Literal, + NamedTuple, + Sequence, + overload, +) + +from pandas.core.frame import DataFrame + +import pandas._libs.lib as lib +from pandas._typing import ( + CompressionOptions, + CSVEngine, + DtypeArg, + FilePath, + IndexLabel, + ReadCsvBuffer, + StorageOptions, +) + +class _DeprecationConfig(NamedTuple): + default_value: Any + msg: str | None + +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: ... +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: ... +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: ... +@overload +def read_csv( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace: bool = ..., + low_memory=..., + memory_map: bool = ..., + float_precision: Literal["high", "legacy"] | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame | TextFileReader: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[True], + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int, + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> TextFileReader: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: Literal[False] = ..., + chunksize: None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: ... +@overload +def read_table( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + *, + sep: str | None | lib.NoDefault = ..., + delimiter: str | None | lib.NoDefault = ..., + header: int | Sequence[int] | None | Literal["infer"] = ..., + names: Sequence[Hashable] | None | lib.NoDefault = ..., + index_col: IndexLabel | Literal[False] | None = ..., + usecols=..., + squeeze: bool | None = ..., + prefix: str | lib.NoDefault = ..., + mangle_dupe_cols: bool = ..., + dtype: DtypeArg | None = ..., + engine: CSVEngine | None = ..., + converters=..., + true_values=..., + false_values=..., + skipinitialspace: bool = ..., + skiprows=..., + skipfooter: int = ..., + nrows: int | None = ..., + na_values=..., + keep_default_na: bool = ..., + na_filter: bool = ..., + verbose: bool = ..., + skip_blank_lines: bool = ..., + parse_dates=..., + infer_datetime_format: bool = ..., + keep_date_col: bool = ..., + date_parser=..., + dayfirst: bool = ..., + cache_dates: bool = ..., + iterator: bool = ..., + chunksize: int | None = ..., + compression: CompressionOptions = ..., + thousands: str | None = ..., + decimal: str = ..., + lineterminator: str | None = ..., + quotechar: str = ..., + quoting: int = ..., + doublequote: bool = ..., + escapechar: str | None = ..., + comment: str | None = ..., + encoding: str | None = ..., + encoding_errors: str | None = ..., + dialect: str | csv.Dialect | None = ..., + error_bad_lines: bool | None = ..., + warn_bad_lines: bool | None = ..., + on_bad_lines=..., + delim_whitespace=..., + low_memory=..., + memory_map: bool = ..., + float_precision: str | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame | TextFileReader: ... +def read_fwf( + filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], + colspecs: Sequence[tuple[int, int]] | str | None = ..., + widths: Sequence[int] | None = ..., + infer_nrows: int = ..., + **kwds, +) -> DataFrame | TextFileReader: ... + +class TextFileReader(abc.Iterator): + engine = ... # Incomplete + orig_options = ... # Incomplete + chunksize = ... # Incomplete + nrows = ... # Incomplete + squeeze = ... # Incomplete + handles = ... # Incomplete + def __init__( + self, + f: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str] | list, + engine: CSVEngine | None = ..., + **kwds, + ) -> None: ... + def close(self) -> None: ... + def __next__(self) -> DataFrame: ... + def read(self, nrows: int | None = ...) -> DataFrame: ... + def get_chunk(self, size: int | None = ...) -> DataFrame: ... + def __enter__(self) -> TextFileReader: ... + def __exit__(self, exc_type, exc_value, traceback) -> None: ... From 65120b7f54e8567c7962d919552d4a882d9f8e73 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Fri, 22 Jul 2022 23:17:34 +0100 Subject: [PATCH 03/11] ENH: Sync to/read gbp --- pandas-stubs/io/gbq.pyi | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas-stubs/io/gbq.pyi b/pandas-stubs/io/gbq.pyi index d09a3c958..6ffda6de5 100644 --- a/pandas-stubs/io/gbq.pyi +++ b/pandas-stubs/io/gbq.pyi @@ -14,10 +14,10 @@ def read_gbq( dialect: str | None = ..., location: str | None = ..., configuration: dict[str, Any] | None = ..., - credentials=..., + # Google type, not available + credentials: Any = ..., use_bqstorage_api: bool | None = ..., - private_key=..., - verbose=..., + max_results: int | None = ..., progress_bar_type: str | None = ..., ) -> DataFrame: ... def to_gbq( @@ -31,7 +31,6 @@ def to_gbq( table_schema: list[dict[str, str]] | None = ..., location: str | None = ..., progress_bar: bool = ..., - credentials=..., - verbose=..., - private_key=..., + # Google type, not available + credentials: Any = ..., ) -> None: ... From 3f268254e781b95079e1f1c87d35651f57e817a5 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 25 Jul 2022 01:07:50 +0100 Subject: [PATCH 04/11] ENH: Add and update XML io interface --- pandas-stubs/core/frame.pyi | 24 ++++++++++++++++++++++++ pandas-stubs/io/pickle.pyi | 2 +- pandas-stubs/io/xml.pyi | 7 +++---- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 54ddd319e..fa08bc9a6 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -48,8 +48,10 @@ from pandas._typing import ( Axes, Axis, AxisType, + CompressionOptions, Dtype, DtypeNp, + FilePath, FilePathOrBuffer, FilePathOrBytesBuffer, GroupByObjectNonScalar, @@ -61,12 +63,15 @@ from pandas._typing import ( Label, Level, MaskType, + ReadBuffer, Renamer, Scalar, ScalarT, SeriesAxisType, + StorageOptions, StrLike, T as TType, + WriteBuffer, np_ndarray_bool, np_ndarray_str, num, @@ -332,6 +337,25 @@ class DataFrame(NDFrame, OpsMixin): render_links: _bool = ..., encoding: _str | None = ..., ) -> _str: ... + def to_xml( + self, + path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = ..., + index: bool = ..., + root_name: str | None = ..., + row_name: str | None = ..., + na_rep: str | None = ..., + attr_cols: list[str] | None = ..., + elem_cols: list[str] | None = ..., + namespaces: dict[str | None, str] | None = ..., + prefix: str | None = ..., + encoding: str = ..., + xml_declaration: bool | None = ..., + pretty_print: bool | None = ..., + parser: str | None = ..., + stylesheet: FilePath | ReadBuffer[str] | ReadBuffer[bytes] | None = ..., + compression: CompressionOptions = ..., + storage_options: StorageOptions = ..., + ) -> str | None: ... def info( self, verbose=..., buf=..., max_cols=..., memory_usage=..., null_counts=... ) -> None: ... diff --git a/pandas-stubs/io/pickle.pyi b/pandas-stubs/io/pickle.pyi index 67ccd80c9..3462fe871 100644 --- a/pandas-stubs/io/pickle.pyi +++ b/pandas-stubs/io/pickle.pyi @@ -11,7 +11,7 @@ from pandas._typing import ( ) def to_pickle( - obj: Any, + obj: object, filepath_or_buffer: FilePath | WriteBuffer[bytes], compression: CompressionOptions = ..., protocol: int = ..., diff --git a/pandas-stubs/io/xml.pyi b/pandas-stubs/io/xml.pyi index a2c6ff9ae..c200ec4e7 100644 --- a/pandas-stubs/io/xml.pyi +++ b/pandas-stubs/io/xml.pyi @@ -25,11 +25,10 @@ def read_xml( dtype: DtypeArg | None = ..., converters: ConvertersArg | None = ..., parse_dates: ParseDatesArg | None = ..., - # encoding can not be None for lxml and StringIO input encoding: str | None = ..., - parser: XMLParsers | None = ..., + parser: XMLParsers = ..., stylesheet: FilePath | ReadBuffer[bytes] | ReadBuffer[str] | None = ..., iterparse: dict[str, list[str]] | None = ..., - compression: CompressionOptions | None = ..., - storage_options: StorageOptions | None = ..., + compression: CompressionOptions = ..., + storage_options: StorageOptions = ..., ) -> DataFrame: ... From 79da23b0015ac957d23bfa6bccd9cc8a6506d62e Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 25 Jul 2022 10:07:46 +0100 Subject: [PATCH 05/11] ENH: Improve and clean stata io functions --- pandas-stubs/core/frame.pyi | 18 +++++--- pandas-stubs/io/stata.pyi | 92 ++++++++++++++----------------------- 2 files changed, 46 insertions(+), 64 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index fa08bc9a6..53fc75f0b 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -245,15 +245,19 @@ class DataFrame(NDFrame, OpsMixin): ) -> np.recarray: ... def to_stata( self, - path: FilePathOrBuffer, - convert_dates: dict | None = ..., + path: FilePath | WriteBuffer[bytes], + convert_dates: dict[Hashable, str] | None = ..., write_index: _bool = ..., - byteorder: _str | Literal["<", ">", "little", "big"] | None = ..., - time_stamp=..., + byteorder: Literal["<", ">", "little", "big"] | None = ..., + time_stamp: _dt.datetime | None = ..., data_label: _str | None = ..., - variable_labels: dict | None = ..., - version: int = ..., - convert_strl: list[_str] | None = ..., + variable_labels: dict[Hashable, str] | None = ..., + version: int | None = ..., + convert_strl: list[HashableT] | None = ..., + compression: CompressionOptions = ..., + storage_options: StorageOptions = ..., + *, + value_labels: dict[Hashable, dict[float | int, str]] | None = ..., ) -> None: ... def to_feather(self, path: FilePathOrBuffer, **kwargs) -> None: ... @overload diff --git a/pandas-stubs/io/stata.pyi b/pandas-stubs/io/stata.pyi index 92837a96e..9e1837b54 100644 --- a/pandas-stubs/io/stata.pyi +++ b/pandas-stubs/io/stata.pyi @@ -8,6 +8,7 @@ from typing import ( Hashable, Literal, Sequence, + overload, ) import numpy as np @@ -24,6 +25,7 @@ from pandas._typing import ( WriteBuffer, ) +@overload def read_stata( path: FilePath | ReadBuffer[bytes], convert_dates: bool = ..., @@ -34,57 +36,46 @@ def read_stata( columns: list[HashableT] | None = ..., order_categoricals: bool = ..., chunksize: int | None = ..., - iterator: bool = ..., + iterator: Literal[False] = ..., compression: CompressionOptions = ..., storage_options: StorageOptions = ..., -) -> DataFrame | StataReader: ... - -stata_epoch: datetime.datetime = ... -excessive_string_length_error: str +) -> DataFrame: ... +@overload +def read_stata( + path: FilePath | ReadBuffer[bytes], + convert_dates: bool = ..., + convert_categoricals: bool = ..., + index_col: str | None = ..., + convert_missing: bool = ..., + preserve_dtypes: bool = ..., + columns: list[HashableT] | None = ..., + order_categoricals: bool = ..., + chunksize: int | None = ..., + *, + iterator: Literal[True], + compression: CompressionOptions, + storage_options: StorageOptions, +) -> StataReader: ... +@overload +def read_stata( + path: FilePath | ReadBuffer[bytes], + convert_dates: bool, + convert_categoricals: bool, + index_col: str | None, + convert_missing: bool, + preserve_dtypes: bool, + columns: list[HashableT] | None, + order_categoricals: bool, + chunksize: int | None, + iterator: Literal[True], + compression: CompressionOptions = ..., + storage_options: StorageOptions = ..., +) -> StataReader: ... class PossiblePrecisionLoss(Warning): ... - -precision_loss_doc: str - class ValueLabelTypeMismatch(Warning): ... - -value_label_mismatch_doc: str - class InvalidColumnName(Warning): ... -invalid_name_doc: str - -class StataValueLabel: - labname: Hashable = ... - value_labels: list[tuple[int | float, str]] = ... - text_len: int = ... - off: npt.NDArray[np.int32] = ... - val: npt.NDArray[np.int32] = ... - txt: list[bytes] = ... - n: int = ... - len: int = ... - def __init__( - self, catarray: pd.Series, encoding: Literal["latin-1", "utf-8"] = ... - ) -> None: ... - def generate_value_label(self, byteorder: str) -> bytes: ... - -class StataMissingValue: - MISSING_VALUES: dict[float, str] = ... - bases: tuple[int, int, int] = ... - float32_base: bytes = ... - increment: int = ... - int_value: int = ... - float64_base: bytes = ... - BASE_MISSING_VALUES: dict[str, int] = ... - def __init__(self, value: int | float) -> None: ... - def __eq__(self, other: object) -> bool: ... - @property - def string(self) -> str: ... - @property - def value(self) -> int | float: ... - @classmethod - def get_base_missing_value(cls, dtype): ... - class StataParser: DTYPE_MAP: dict[int, np.dtype] = ... DTYPE_MAP_XML: dict[int, np.dtype] = ... @@ -162,19 +153,6 @@ class StataWriter(StataParser): ) -> None: ... def write_file(self) -> None: ... -class StataStrLWriter: - df: DataFrame = ... - columns: Sequence[str] = ... - def __init__( - self, - df: DataFrame, - columns: Sequence[str], - version: int = ..., - byteorder: str | None = ..., - ) -> None: ... - def generate_table(self) -> tuple[dict[str, tuple[int, int]], DataFrame]: ... - def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes: ... - class StataWriter117(StataWriter): def __init__( self, From 8a78a322a25121981175f180ab0536b99727a68a Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 25 Jul 2022 10:17:09 +0100 Subject: [PATCH 06/11] ENH: Improve and clean orc io functions --- pandas-stubs/io/orc.pyi | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/pandas-stubs/io/orc.pyi b/pandas-stubs/io/orc.pyi index d64b396d3..532e4b275 100644 --- a/pandas-stubs/io/orc.pyi +++ b/pandas-stubs/io/orc.pyi @@ -1,9 +1,28 @@ from __future__ import annotations -from pandas.core.frame import DataFrame +from typing import ( + Any, + Literal, +) -from pandas._typing import FilePathOrBuffer +from pandas import DataFrame + +from pandas._typing import ( + FilePath, + ReadBuffer, + WriteBuffer, +) def read_orc( - path: FilePathOrBuffer, columns: list[str] | None = ..., **kwargs + path: FilePath | ReadBuffer[bytes], + columns: list[str] | None = ..., + **kwargs: Any, ) -> DataFrame: ... +def to_orc( + df: DataFrame, + path: FilePath | WriteBuffer[bytes] | None = ..., + *, + engine: Literal["pyarrow"] = ..., + index: bool | None = ..., + engine_kwargs: dict[str, Any] | None = ..., +) -> bytes | None: ... From f60035ff825b911c1661a4f3a0e9ca6ca3c5e116 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 25 Jul 2022 10:42:33 +0100 Subject: [PATCH 07/11] ENH: Improve and clean sql io functions --- pandas-stubs/core/frame.pyi | 12 -- pandas-stubs/core/generic.pyi | 8 +- pandas-stubs/io/sql.pyi | 218 ++++++++++------------------------ 3 files changed, 70 insertions(+), 168 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 53fc75f0b..ff3edc318 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -2144,18 +2144,6 @@ class DataFrame(NDFrame, OpsMixin): compression: _str | Literal["infer", "gzip", "bz2", "zip", "xz"] = ..., protocol: int = ..., ) -> None: ... - def to_sql( - self, - name: _str, - con, - schema: _str | None = ..., - if_exists: _str = ..., - index: _bool = ..., - index_label: _str | Sequence[_str] | None = ..., - chunksize: int | None = ..., - dtype: dict | Scalar | None = ..., - method: _str | Callable | None = ..., - ) -> None: ... @overload def to_string( self, diff --git a/pandas-stubs/core/generic.pyi b/pandas-stubs/core/generic.pyi index 6fcf3da0a..6f2060c90 100644 --- a/pandas-stubs/core/generic.pyi +++ b/pandas-stubs/core/generic.pyi @@ -21,12 +21,12 @@ from pandas._typing import ( ArrayLike, Axis, Dtype, + DtypeArg, FilePathOrBuffer, FrameOrSeries, FrameOrSeriesUnion, IgnoreRaise, Level, - Scalar, SeriesAxisType, T, ) @@ -152,13 +152,13 @@ class NDFrame(PandasObject, indexing.IndexingMixin): def to_sql( self, name: _str, - con, + con: str | Any, schema: _str | None = ..., if_exists: _str = ..., index: _bool = ..., - index_label: _str | Sequence[_str] | None = ..., + index_label: Hashable | Sequence[Hashable] | None = ..., chunksize: int | None = ..., - dtype: dict | Scalar | None = ..., + dtype: DtypeArg | None = ..., method: _str | Callable | None = ..., ) -> None: ... def to_pickle( diff --git a/pandas-stubs/io/sql.pyi b/pandas-stubs/io/sql.pyi index b588d6548..33fe52376 100644 --- a/pandas-stubs/io/sql.pyi +++ b/pandas-stubs/io/sql.pyi @@ -2,37 +2,68 @@ from __future__ import annotations from typing import ( Any, + Callable, + Hashable, + Iterator, Mapping, Sequence, + overload, ) -from pandas.core.base import PandasObject from pandas.core.frame import DataFrame +from pandas._typing import DtypeArg + class SQLAlchemyRequired(ImportError): ... class DatabaseError(IOError): ... -def execute(sql, con, cur=..., params=...): ... +@overload def read_sql_table( table_name: str, - con, + con: str | Any, schema: str | None = ..., index_col: str | Sequence[str] | None = ..., coerce_float: bool = ..., parse_dates: Sequence[str] | Mapping[str, str] | None = ..., columns: Sequence[str] | None = ..., - chunksize: int | None = ..., + chunksize: None = ..., ) -> DataFrame: ... -def read_sql_query( - sql, - con, +@overload +def read_sql_table( + table_name: str, + con: str | Any, schema: str | None = ..., index_col: str | Sequence[str] | None = ..., coerce_float: bool = ..., + parse_dates: Sequence[str] | Mapping[str, str] | None = ..., + columns: Sequence[str] | None = ..., + *, + chunksize: int, +) -> Iterator[DataFrame]: ... +@overload +def read_sql_query( + sql: str | Any, + con: str | Any, + index_col: str | Sequence[str] | None = ..., + coerce_float: bool = ..., params=..., parse_dates: Sequence[str] | Mapping[str, str] | None = ..., - chunksize: int | None = ..., + chunksize: None = ..., + dtype: DtypeArg | None = ..., ) -> DataFrame: ... +@overload +def read_sql_query( + sql: str | Any, + con: str | Any, + index_col: str | Sequence[str] | None = ..., + coerce_float: bool = ..., + params=..., + parse_dates: Sequence[str] | Mapping[str, str] | None = ..., + *, + chunksize: int, + dtype: DtypeArg | None = ..., +) -> Iterator[DataFrame]: ... +@overload def read_sql( sql: str | Any, con: str | Any = ..., @@ -44,151 +75,34 @@ def read_sql( | Mapping[str, Mapping[str, Any]] | None = ..., columns: Sequence[str] = ..., - chunksize: int = ..., + chunksize: None = ..., ) -> DataFrame: ... +@overload +def read_sql( + sql: str | Any, + con: str | Any = ..., + index_col: str | Sequence[str] | None = ..., + coerce_float: bool = ..., + params: Sequence[str] | tuple[str, ...] | Mapping[str, str] | None = ..., + parse_dates: Sequence[str] + | Mapping[str, str] + | Mapping[str, Mapping[str, Any]] + | None = ..., + columns: Sequence[str] = ..., + *, + chunksize: int, +) -> Iterator[DataFrame]: ... def to_sql( - frame, - name, - con, - schema=..., + frame: DataFrame, + name: str, + con: Any, + schema: str | None = ..., if_exists: str = ..., index: bool = ..., - index_label=..., - chunksize=..., - dtype=..., - method=..., -) -> None: ... -def has_table(table_name, con, schema=...): ... - -table_exists = has_table - -def pandasSQL_builder(con, schema=..., meta=..., is_cursor: bool = ...): ... - -class SQLTable(PandasObject): - name = ... - pd_sql = ... - prefix = ... - frame = ... - index = ... - schema = ... - if_exists = ... - keys = ... - dtype = ... - table = ... - def __init__( - self, - name, - pandas_sql_engine, - frame=..., - index: bool = ..., - if_exists: str = ..., - prefix: str = ..., - index_label=..., - schema=..., - keys=..., - dtype=..., - ) -> None: ... - def exists(self): ... - def sql_schema(self): ... - def create(self) -> None: ... - def insert_data(self): ... - def insert(self, chunksize=..., method=...) -> None: ... - def read( - self, coerce_float: bool = ..., parse_dates=..., columns=..., chunksize=... - ): ... - -class PandasSQL(PandasObject): - def read_sql(self, *args, **kwargs) -> None: ... - def to_sql( - self, - frame, - name, - if_exists: str = ..., - index: bool = ..., - index_label=..., - schema=..., - chunksize=..., - dtype=..., - method=..., - ) -> None: ... - -class SQLDatabase(PandasSQL): - connectable = ... - meta = ... - def __init__(self, engine, schema=..., meta=...) -> None: ... - def run_transaction(self) -> None: ... - def execute(self, *args, **kwargs): ... - def read_table( - self, - table_name, - index_col=..., - coerce_float: bool = ..., - parse_dates=..., - columns=..., - schema=..., - chunksize=..., - ): ... - def read_query( - self, - sql, - index_col=..., - coerce_float: bool = ..., - parse_dates=..., - params=..., - chunksize=..., - ): ... - def to_sql( - self, - frame, - name, - if_exists: str = ..., - index: bool = ..., - index_label=..., - schema=..., - chunksize=..., - dtype=..., - method=..., - ) -> None: ... - @property - def tables(self): ... - def has_table(self, name, schema=...): ... - def get_table(self, table_name, schema=...): ... - def drop_table(self, table_name, schema=...) -> None: ... - -class SQLiteTable(SQLTable): - def __init__(self, *args, **kwargs): ... - def sql_schema(self): ... - def insert_statement(self): ... - -class SQLiteDatabase(PandasSQL): - is_cursor = ... - con = ... - def __init__(self, con, is_cursor: bool = ...) -> None: ... - def run_transaction(self) -> None: ... - def execute(self, *args, **kwargs): ... - def read_query( - self, - sql, - index_col=..., - coerce_float: bool = ..., - params=..., - parse_dates=..., - chunksize=..., - ): ... - def to_sql( - self, - frame, - name, - if_exists: str = ..., - index: bool = ..., - index_label=..., - schema=..., - chunksize=..., - dtype=..., - method=..., - ) -> None: ... - def has_table(self, name, schema=...): ... - def get_table(self, table_name, schema=...) -> None: ... - def drop_table(self, name, schema=...) -> None: ... - -def get_schema(frame, name, keys=..., con=..., dtype=...): ... + index_label: Hashable | Sequence[Hashable] | None = ..., + chunksize: int | None = ..., + dtype: DtypeArg | None = ..., + method: str | Callable | None = ..., + engine: str = ..., + **engine_kwargs, +) -> int | None: ... From d089b8b234ad1c50aaee85dc27a1f66cfef31421 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 25 Jul 2022 11:04:22 +0100 Subject: [PATCH 08/11] CLN: Remove non-public class --- pandas-stubs/io/parsers/readers.pyi | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas-stubs/io/parsers/readers.pyi b/pandas-stubs/io/parsers/readers.pyi index 59ff9acd1..fbff15998 100644 --- a/pandas-stubs/io/parsers/readers.pyi +++ b/pandas-stubs/io/parsers/readers.pyi @@ -1,10 +1,8 @@ from collections import abc import csv from typing import ( - Any, Hashable, Literal, - NamedTuple, Sequence, overload, ) @@ -22,10 +20,6 @@ from pandas._typing import ( StorageOptions, ) -class _DeprecationConfig(NamedTuple): - default_value: Any - msg: str | None - @overload def read_csv( filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], From d1b3b1c84b17bd1bf7906e4f72c349a4095dd061 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 25 Jul 2022 13:50:14 +0100 Subject: [PATCH 09/11] ENH: Verify clipboard io functions --- pandas-stubs/io/clipboards.pyi | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas-stubs/io/clipboards.pyi b/pandas-stubs/io/clipboards.pyi index 4cf04c28f..ad26ce8f9 100644 --- a/pandas-stubs/io/clipboards.pyi +++ b/pandas-stubs/io/clipboards.pyi @@ -1,6 +1,10 @@ from __future__ import annotations +from typing import Any + from pandas.core.frame import DataFrame -def read_clipboard(sep: str = ..., **kwargs) -> DataFrame: ... -def to_clipboard(obj, excel: bool = ..., sep=..., **kwargs) -> None: ... +def read_clipboard(sep: str = ..., **kwargs: Any) -> DataFrame: ... +def to_clipboard( + obj, excel: bool = ..., sep: str | None = ..., **kwargs: Any +) -> None: ... From 18cb4821ca68451da7636c241dae049d6c5960a0 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 26 Jul 2022 08:33:06 +0100 Subject: [PATCH 10/11] ENH: Verify json functions --- pandas-stubs/io/json/_json.pyi | 210 +++++++++------------------------ 1 file changed, 56 insertions(+), 154 deletions(-) diff --git a/pandas-stubs/io/json/_json.pyi b/pandas-stubs/io/json/_json.pyi index 6d50275a5..d6fd799d9 100644 --- a/pandas-stubs/io/json/_json.pyi +++ b/pandas-stubs/io/json/_json.pyi @@ -1,6 +1,5 @@ from __future__ import annotations -from collections import abc from typing import ( Any, Callable, @@ -11,196 +10,99 @@ from typing import ( from pandas.core.frame import DataFrame from pandas.core.series import Series +from pandas._libs.json import ( + dumps as dumps, + loads as loads, +) from pandas._typing import ( - FilePathOrBuffer, + CompressionOptions, + DtypeArg, + FilePath, JSONSerializable, + ReadBuffer, + StorageOptions, + WriteBuffer, ) -loads = ... -dumps = ... -TABLE_SCHEMA_VERSION: str = ... - def to_json( - path_or_buf, - obj, + path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes], + obj: DataFrame | Series, orient: str | None = ..., - date_format: str = ..., + date_format: Literal["epoch", "iso"] | None = ..., double_precision: int = ..., force_ascii: bool = ..., date_unit: str = ..., default_handler: Callable[[Any], JSONSerializable] | None = ..., lines: bool = ..., - compression: str | None = ..., + compression: CompressionOptions = ..., index: bool = ..., indent: int = ..., -): ... - -class Writer: - obj = ... - orient = ... - date_format = ... - double_precision = ... - ensure_ascii = ... - date_unit = ... - default_handler = ... - index = ... - indent = ... - is_copy = ... - def __init__( - self, - obj, - orient: str | None, - date_format: str, - double_precision: int, - ensure_ascii: bool, - date_unit: str, - index: bool, - default_handler: Callable[[Any], JSONSerializable] | None = ..., - indent: int = ..., - ) -> None: ... - def write(self): ... - -class SeriesWriter(Writer): ... -class FrameWriter(Writer): ... - -class JSONTableWriter(FrameWriter): - schema = ... - obj = ... - date_format = ... - orient = ... - index = ... - def __init__( - self, - obj, - orient: str | None, - date_format: str, - double_precision: int, - ensure_ascii: bool, - date_unit: str, - index: bool, - default_handler: Callable[[Any], JSONSerializable] | None = ..., - indent: int = ..., - ): ... - + storage_options: StorageOptions = ..., +) -> str | None: ... @overload def read_json( - path: FilePathOrBuffer, - orient: str | None = ..., - dtype=..., - convert_axes=..., + path: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + orient: str | None, + typ: Literal["series"], + dtype: DtypeArg | None = ..., + convert_axes: bool | None = ..., convert_dates: bool = ..., keep_default_dates: bool = ..., - numpy: bool = ..., + # Removed since deprecated + # numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., - encoding: str | None = ..., + encoding: Literal[ + "strict", "ignore", "replace", "backslashreplace", "surrogateescape" + ] = ..., lines: bool = ..., chunksize: int | None = ..., - compression: str | Literal["infer", "gzip", "bz2", "zip", "xz"] | None = ..., - *, - typ: Literal["series"], + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., ) -> Series: ... @overload def read_json( - path: FilePathOrBuffer, + path: FilePath | ReadBuffer[str] | ReadBuffer[bytes], + *, orient: str | None = ..., - dtype=..., - convert_axes=..., + typ: Literal["series"], + dtype: DtypeArg | None = ..., + convert_axes: bool | None = ..., convert_dates: bool = ..., keep_default_dates: bool = ..., - numpy: bool = ..., + # Removed since deprecated + # numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., - encoding: str | None = ..., + encoding: Literal[ + "strict", "ignore", "replace", "backslashreplace", "surrogateescape" + ] = ..., lines: bool = ..., chunksize: int | None = ..., - compression: str | Literal["infer", "gzip", "bz2", "zip", "xz"] | None = ..., - *, - typ: Literal["frame"], -) -> DataFrame: ... + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> Series: ... @overload def read_json( - path: FilePathOrBuffer, + path: FilePath | ReadBuffer[str] | ReadBuffer[bytes], orient: str | None = ..., - typ: str | None = ..., - dtype=..., - convert_axes=..., + typ: Literal["frame"] = ..., + dtype: DtypeArg | None = ..., + convert_axes: bool | None = ..., convert_dates: bool = ..., keep_default_dates: bool = ..., - numpy: bool = ..., + # Removed since deprecated + # numpy: bool = ..., precise_float: bool = ..., date_unit: str | None = ..., - encoding: str | None = ..., + encoding: Literal[ + "strict", "ignore", "replace", "backslashreplace", "surrogateescape" + ] = ..., lines: bool = ..., chunksize: int | None = ..., - compression: str | Literal["infer", "gzip", "bz2", "zip", "xz"] | None = ..., -) -> Series | DataFrame: ... - -class JsonReader(abc.Iterator): - path_or_buf = ... - orient = ... - typ = ... - dtype = ... - convert_axes = ... - convert_dates = ... - keep_default_dates = ... - numpy = ... - precise_float = ... - date_unit = ... - encoding = ... - compression = ... - lines = ... - chunksize = ... - nrows_seen: int = ... - should_close: bool = ... - data = ... - def __init__( - self, - filepath_or_buffer, - orient, - typ, - dtype, - convert_axes, - convert_dates, - keep_default_dates, - numpy, - precise_float, - date_unit, - encoding, - lines, - chunksize, - compression, - ) -> None: ... - def read(self): ... - def close(self) -> None: ... - def __next__(self): ... - -class Parser: - json = ... - orient = ... - dtype = ... - min_stamp = ... - numpy = ... - precise_float = ... - convert_axes = ... - convert_dates = ... - date_unit = ... - keep_default_dates = ... - obj = ... - def __init__( - self, - json, - orient, - dtype=..., - convert_axes: bool = ..., - convert_dates: bool = ..., - keep_default_dates: bool = ..., - numpy: bool = ..., - precise_float: bool = ..., - date_unit=..., - ) -> None: ... - def check_keys_split(self, decoded) -> None: ... - def parse(self): ... - -class SeriesParser(Parser): ... -class FrameParser(Parser): ... + compression: CompressionOptions = ..., + nrows: int | None = ..., + storage_options: StorageOptions = ..., +) -> DataFrame: ... From 698190f73ac6afb9d0567795cd477c80324952a5 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 26 Jul 2022 13:28:33 +0100 Subject: [PATCH 11/11] ENH: Verify HDF functions --- pandas-stubs/io/pytables.pyi | 443 +++++++++++++---------------------- pyproject.toml | 1 + 2 files changed, 159 insertions(+), 285 deletions(-) diff --git a/pandas-stubs/io/pytables.pyi b/pandas-stubs/io/pytables.pyi index 28b229798..681884781 100644 --- a/pandas-stubs/io/pytables.pyi +++ b/pandas-stubs/io/pytables.pyi @@ -3,47 +3,41 @@ from __future__ import annotations from typing import ( Any, Hashable, + Literal, ) +from _typeshed import Incomplete import numpy as np -from pandas.core.computation.pytables import PyTablesExpr -from pandas.core.frame import DataFrame +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + MultiIndex, + Series, +) from pandas.core.generic import NDFrame -from pandas.core.indexes.base import Index -from pandas.core.indexes.multi import MultiIndex -from pandas.core.series import Series +from tables import ( + Col, + Node, +) from pandas._typing import ( + AnyArrayLike, ArrayLike, + DtypeArg, FilePathOrBuffer, + npt, ) -from pandas.core.dtypes.generic import ABCExtensionArray - # from tables import Col, File, Node # pytables may not be installed so create them as dummy classes -class Col: ... -class Node: ... - -Term = PyTablesExpr class PossibleDataLossError(Exception): ... class ClosedFileError(Exception): ... class IncompatibilityWarning(Warning): ... - -incompatibility_doc: str - class AttributeConflictWarning(Warning): ... - -attribute_conflict_doc: str - class DuplicateWarning(Warning): ... -duplicate_doc: str -performance_doc: str -dropna_doc: str -format_doc: str - def to_hdf( path_or_buf: FilePathOrBuffer, key: str, @@ -55,9 +49,9 @@ def to_hdf( format: str | None = ..., index: bool = ..., min_itemsize: int | dict[str, int] | None = ..., - nan_rep=..., + nan_rep: str | None = ..., dropna: bool | None = ..., - data_columns: list[str] | None = ..., + data_columns: Literal[True] | list[str] | None = ..., errors: str = ..., encoding: str = ..., ): ... @@ -204,78 +198,52 @@ class HDFStore: ): ... def info(self) -> str: ... -class TableIterator: - chunksize: int | None - store: HDFStore - s: GenericFixed | Table - func = ... - where = ... - nrows = ... - start = ... - stop = ... - coordinates = ... - auto_close = ... - def __init__( - self, - store: HDFStore, - s: GenericFixed | Table, - func, - where, - nrows, - start=..., - stop=..., - iterator: bool = ..., - chunksize: int | None = ..., - auto_close: bool = ..., - ) -> None: ... - def __iter__(self): ... - def close(self) -> None: ... - def get_result(self, coordinates: bool = ...): ... - class IndexCol: - is_an_indexable: bool = ... - is_data_indexable: bool = ... + is_an_indexable: bool + is_data_indexable: bool name: str cname: str - values = ... - kind = ... - typ = ... - axis = ... - pos = ... - freq = ... - tz = ... - index_name = ... - ordered = ... - table = ... - meta = ... - metadata = ... + values = ... # Incomplete + kind = ... # Incomplete + typ = ... # Incomplete + axis = ... # Incomplete + pos = ... # Incomplete + freq = ... # Incomplete + tz = ... # Incomplete + index_name = ... # Incomplete + ordered = ... # Incomplete + table = ... # Incomplete + meta = ... # Incomplete + metadata = ... # Incomplete def __init__( self, name: str, - values=..., - kind=..., - typ=..., + values: Incomplete | None = ..., + kind: Incomplete | None = ..., + typ: Incomplete | None = ..., cname: str | None = ..., - axis=..., - pos=..., - freq=..., - tz=..., - index_name=..., - ordered=..., - table=..., - meta=..., - metadata=..., + axis: Incomplete | None = ..., + pos: Incomplete | None = ..., + freq: Incomplete | None = ..., + tz: Incomplete | None = ..., + index_name: Incomplete | None = ..., + ordered: Incomplete | None = ..., + table: Incomplete | None = ..., + meta: Incomplete | None = ..., + metadata: Incomplete | None = ..., ) -> None: ... @property def itemsize(self) -> int: ... @property def kind_attr(self) -> str: ... - def set_pos(self, pos: int): ... - def __eq__(self, other) -> bool: ... + def set_pos(self, pos: int) -> None: ... + def __eq__(self, other: object) -> bool: ... def __ne__(self, other) -> bool: ... @property def is_indexed(self) -> bool: ... - def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): ... + def convert( + self, values: np.ndarray, nan_rep, encoding: str, errors: str + ) -> tuple[np.ndarray, np.ndarray] | tuple[DatetimeIndex, DatetimeIndex]: ... def take_data(self): ... @property def attrs(self): ... @@ -286,51 +254,45 @@ class IndexCol: @property def cvalues(self): ... def __iter__(self): ... - def maybe_set_size(self, min_itemsize=...) -> None: ... + def maybe_set_size(self, min_itemsize: Incomplete | None = ...) -> None: ... def validate_names(self) -> None: ... - def validate_and_set(self, handler: AppendableTable, append: bool): ... - def validate_col(self, itemsize=...): ... - def validate_attr(self, append: bool): ... + def validate_and_set(self, handler: AppendableTable, append: bool) -> None: ... + def validate_col(self, itemsize: Incomplete | None = ...): ... + def validate_attr(self, append: bool) -> None: ... def update_info(self, info) -> None: ... def set_info(self, info) -> None: ... def set_attr(self) -> None: ... - def validate_metadata(self, handler: AppendableTable): ... - def write_metadata(self, handler: AppendableTable): ... - -class GenericIndexCol(IndexCol): - @property - def is_indexed(self) -> bool: ... - def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): ... - def set_attr(self) -> None: ... + def validate_metadata(self, handler: AppendableTable) -> None: ... + def write_metadata(self, handler: AppendableTable) -> None: ... class DataCol(IndexCol): - is_an_indexable: bool = ... - is_data_indexable: bool = ... - dtype = ... - data = ... + is_an_indexable: bool + is_data_indexable: bool + dtype = ... # Incomplete + data = ... # Incomplete def __init__( self, name: str, - values=..., - kind=..., - typ=..., - cname=..., - pos=..., - tz=..., - ordered=..., - table=..., - meta=..., - metadata=..., - dtype=..., - data=..., + values: Incomplete | None = ..., + kind: Incomplete | None = ..., + typ: Incomplete | None = ..., + cname: Incomplete | None = ..., + pos: Incomplete | None = ..., + tz: Incomplete | None = ..., + ordered: Incomplete | None = ..., + table: Incomplete | None = ..., + meta: Incomplete | None = ..., + metadata: Incomplete | None = ..., + dtype: DtypeArg | None = ..., + data: Incomplete | None = ..., ) -> None: ... @property def dtype_attr(self) -> str: ... @property def meta_attr(self) -> str: ... - def __eq__(self, other) -> bool: ... - kind = ... - def set_data(self, data: np.ndarray | ABCExtensionArray): ... + def __eq__(self, other: object) -> bool: ... + kind = ... # Incomplete + def set_data(self, data: ArrayLike) -> None: ... def take_data(self): ... @classmethod def get_atom_string(cls, shape, itemsize): ... @@ -350,30 +312,16 @@ class DataCol(IndexCol): def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str): ... def set_attr(self) -> None: ... -class DataIndexableCol(DataCol): - is_data_indexable: bool = ... - def validate_names(self) -> None: ... - @classmethod - def get_atom_string(cls, shape, itemsize): ... - @classmethod - def get_atom_data(cls, shape, kind: str) -> Col: ... - @classmethod - def get_atom_datetime64(cls, shape): ... - @classmethod - def get_atom_timedelta64(cls, shape): ... - -class GenericDataIndexableCol(DataIndexableCol): ... - class Fixed: pandas_kind: str - format_type: str = ... + format_type: str obj_type: type[DataFrame | Series] ndim: int encoding: str parent: HDFStore group: Node errors: str - is_table: bool = ... + is_table: bool def __init__( self, parent: HDFStore, group: Node, encoding: str = ..., errors: str = ... ) -> None: ... @@ -384,7 +332,7 @@ class Fixed: @property def pandas_type(self): ... def set_object_info(self) -> None: ... - def copy(self): ... + def copy(self) -> Fixed: ... @property def shape(self): ... @property @@ -399,96 +347,76 @@ class Fixed: def is_exists(self) -> bool: ... @property def nrows(self): ... - def validate(self, other): ... - def validate_version(self, where=...): ... - def infer_axes(self): ... + def validate(self, other) -> Literal[True] | None: ... + def validate_version(self, where: Incomplete | None = ...) -> None: ... + def infer_axes(self) -> bool: ... def read( self, - where=..., - columns=..., + where: Incomplete | None = ..., + columns: Incomplete | None = ..., start: int | None = ..., stop: int | None = ..., ): ... - def delete(self, where=..., start: int | None = ..., stop: int | None = ...): ... + def write(self, obj, **kwargs) -> None: ... + def delete( + self, + where: Incomplete | None = ..., + start: int | None = ..., + stop: int | None = ..., + ) -> None: ... class GenericFixed(Fixed): - attributes: list[str] = ... + attributes: list[str] def validate_read(self, columns, where) -> None: ... @property def is_exists(self) -> bool: ... def set_attrs(self) -> None: ... + encoding: str + errors: str def get_attrs(self) -> None: ... def write(self, obj, **kwargs) -> None: ... def read_array(self, key: str, start: int | None = ..., stop: int | None = ...): ... def read_index( self, key: str, start: int | None = ..., stop: int | None = ... ) -> Index: ... - def write_index(self, key: str, index: Index): ... - def write_multi_index(self, key: str, index: MultiIndex): ... + def write_index(self, key: str, index: Index) -> None: ... + def write_multi_index(self, key: str, index: MultiIndex) -> None: ... def read_multi_index( self, key: str, start: int | None = ..., stop: int | None = ... ) -> MultiIndex: ... def read_index_node( self, node: Node, start: int | None = ..., stop: int | None = ... ) -> Index: ... - def write_array_empty(self, key: str, value: ArrayLike): ... - def write_array(self, key: str, value: ArrayLike, items: Index | None = ...): ... - -class SeriesFixed(GenericFixed): - pandas_kind: str = ... - name: Hashable | None - @property - def shape(self): ... - def read( - self, - where=..., - columns=..., - start: int | None = ..., - stop: int | None = ..., - ): ... - def write(self, obj, **kwargs) -> None: ... - -class BlockManagerFixed(GenericFixed): - nblocks: int - @property - def shape(self): ... - def read( - self, - where=..., - columns=..., - start: int | None = ..., - stop: int | None = ..., - ): ... - def write(self, obj, **kwargs) -> None: ... - -class FrameFixed(BlockManagerFixed): - pandas_kind: str = ... + def write_array_empty(self, key: str, value: ArrayLike) -> None: ... + def write_array( + self, key: str, obj: AnyArrayLike, items: Index | None = ... + ) -> None: ... class Table(Fixed): - pandas_kind: str = ... - format_type: str = ... + pandas_kind: str + format_type: str table_type: str - levels: int = ... - is_table: bool = ... + levels: int | list[Hashable] + is_table: bool index_axes: list[IndexCol] non_index_axes: list[tuple[int, Any]] values_axes: list[DataCol] data_columns: list metadata: list info: dict - nan_rep = ... + nan_rep = ... # Incomplete def __init__( self, parent: HDFStore, group: Node, - encoding=..., + encoding: Incomplete | None = ..., errors: str = ..., - index_axes=..., - non_index_axes=..., - values_axes=..., - data_columns=..., - info=..., - nan_rep=..., + index_axes: Incomplete | None = ..., + non_index_axes: Incomplete | None = ..., + values_axes: Incomplete | None = ..., + data_columns: Incomplete | None = ..., + info: Incomplete | None = ..., + nan_rep: Incomplete | None = ..., ) -> None: ... @property def table_type_short(self) -> str: ... @@ -496,7 +424,9 @@ class Table(Fixed): def validate(self, other) -> None: ... @property def is_multi_index(self) -> bool: ... - def validate_multiindex(self, obj): ... + def validate_multiindex( + self, obj: DataFrame | Series + ) -> tuple[DataFrame, list[Hashable]]: ... @property def nrows_expected(self) -> int: ... @property @@ -516,22 +446,31 @@ class Table(Fixed): @property def is_transposed(self) -> bool: ... @property - def data_orientation(self): ... + def data_orientation(self) -> tuple[int, ...]: ... def queryables(self) -> dict[str, Any]: ... def index_cols(self): ... def values_cols(self) -> list[str]: ... - def write_metadata(self, key: str, values: np.ndarray): ... + def write_metadata(self, key: str, values: np.ndarray) -> None: ... def read_metadata(self, key: str): ... def set_attrs(self) -> None: ... + encoding: str + errors: str def get_attrs(self) -> None: ... - def validate_version(self, where=...) -> None: ... + def validate_version(self, where: Incomplete | None = ...) -> None: ... def validate_min_itemsize(self, min_itemsize) -> None: ... def indexables(self): ... - def create_index(self, columns=..., optlevel=..., kind: str | None = ...): ... + def create_index( + self, + columns: Incomplete | None = ..., + optlevel: Incomplete | None = ..., + kind: str | None = ..., + ) -> None: ... @classmethod def get_object(cls, obj, transposed: bool): ... def validate_data_columns(self, data_columns, min_itemsize, non_index_axes): ... - def process_axes(self, obj, selection: Selection, columns=...): ... + def process_axes( + self, obj, selection: Selection, columns: Incomplete | None = ... + ) -> DataFrame: ... def create_description( self, complib, @@ -540,132 +479,66 @@ class Table(Fixed): expectedrows: int | None, ) -> dict[str, Any]: ... def read_coordinates( - self, where=..., start: int | None = ..., stop: int | None = ... - ): ... - def read_column( self, - column: str, - where=..., + where: Incomplete | None = ..., start: int | None = ..., stop: int | None = ..., ): ... - -class WORMTable(Table): - table_type: str = ... - def read( + def read_column( self, - where=..., - columns=..., + column: str, + where: Incomplete | None = ..., start: int | None = ..., stop: int | None = ..., ): ... - def write(self, **kwargs) -> None: ... class AppendableTable(Table): - table_type: str = ... - def write( + table_type: str + # Class design makes this untypable + def write( # type: ignore[override] self, obj, - axes=..., + axes: Incomplete | None = ..., append: bool = ..., - complib=..., - complevel=..., - fletcher32=..., - min_itemsize=..., - chunksize=..., - expectedrows=..., + complib: Incomplete | None = ..., + complevel: Incomplete | None = ..., + fletcher32: Incomplete | None = ..., + min_itemsize: Incomplete | None = ..., + chunksize: Incomplete | None = ..., + expectedrows: Incomplete | None = ..., dropna: bool = ..., - nan_rep=..., - data_columns=..., + nan_rep: Incomplete | None = ..., + data_columns: Incomplete | None = ..., + track_times: bool = ..., ) -> None: ... - def write_data(self, chunksize: int | None, dropna: bool = ...): ... + def write_data(self, chunksize: int | None, dropna: bool = ...) -> None: ... def write_data_chunk( self, rows: np.ndarray, indexes: list[np.ndarray], - mask: np.ndarray | None, + mask: npt.NDArray[np.bool_] | None, values: list[np.ndarray], - ): ... - def delete(self, where=..., start: int | None = ..., stop: int | None = ...): ... - -class AppendableFrameTable(AppendableTable): - pandas_kind: str = ... - table_type: str = ... - ndim: int = ... - obj_type: type[DataFrame | Series] = ... - @property - def is_transposed(self) -> bool: ... - @classmethod - def get_object(cls, obj, transposed: bool): ... - def read( - self, - where=..., - columns=..., - start: int | None = ..., - stop: int | None = ..., - ): ... - -class AppendableSeriesTable(AppendableFrameTable): - pandas_kind: str = ... - table_type: str = ... - ndim: int = ... - @property - def is_transposed(self) -> bool: ... - @classmethod - def get_object(cls, obj, transposed: bool): ... - def write(self, obj, data_columns=..., **kwargs): ... - def read( - self, - where=..., - columns=..., - start: int | None = ..., - stop: int | None = ..., - ) -> Series: ... - -class AppendableMultiSeriesTable(AppendableSeriesTable): - pandas_kind: str = ... - table_type: str = ... - def write(self, obj, **kwargs): ... - -class GenericTable(AppendableFrameTable): - pandas_kind: str = ... - table_type: str = ... - ndim: int = ... - @property - def pandas_type(self) -> str: ... - @property - def storable(self): ... - nan_rep = ... - def get_attrs(self) -> None: ... - def indexables(self): ... - -class AppendableMultiFrameTable(AppendableFrameTable): - table_type: str = ... - ndim: int = ... - @property - def table_type_short(self) -> str: ... - def write(self, obj, data_columns=..., **kwargs): ... - def read( + ) -> None: ... + def delete( self, - where=..., - columns=..., + where: Incomplete | None = ..., start: int | None = ..., stop: int | None = ..., ): ... class Selection: - table = ... - where = ... - start = ... - stop = ... - condition = ... - filter = ... - terms = ... - coordinates = ... + table = ... # Incomplete + where = ... # Incomplete + start = ... # Incomplete + stop = ... # Incomplete + condition = ... # Incomplete + filter = ... # Incomplete + terms = ... # Incomplete + coordinates = ... # Incomplete def __init__( self, table: Table, - where=..., + where: Incomplete | None = ..., start: int | None = ..., stop: int | None = ..., ) -> None: ... diff --git a/pyproject.toml b/pyproject.toml index bf9187611..a3e262fc5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,7 @@ loguru = ">=0.6.0" pandas = "1.4.3" typing-extensions = ">=4.2.0" matplotlib = ">=3.3.2" +tables = ">=3.6.0" pre-commit = ">=2.19.0" black = ">=22.6.0" isort = ">=5.10.1"