Skip to content

DOC: validate_docstrings cleans up leftover files; doctest +SKIP file examples #44711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 2, 2021
Merged
46 changes: 19 additions & 27 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2709,32 +2709,27 @@ def to_hdf(
Examples
--------
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
... index=['a', 'b', 'c'])
>>> df.to_hdf('data.h5', key='df', mode='w')
... index=['a', 'b', 'c']) # doctest: +SKIP
>>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP

We can add another object to the same file:

>>> s = pd.Series([1, 2, 3, 4])
>>> s.to_hdf('data.h5', key='s')
>>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP
>>> s.to_hdf('data.h5', key='s') # doctest: +SKIP

Reading from HDF file:

>>> pd.read_hdf('data.h5', 'df')
>>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP
A B
a 1 4
b 2 5
c 3 6
>>> pd.read_hdf('data.h5', 's')
>>> pd.read_hdf('data.h5', 's') # doctest: +SKIP
0 1
1 2
2 3
3 4
dtype: int64

Deleting file with data:

>>> import os
>>> os.remove('data.h5')
"""
from pandas.io import pytables

Expand Down Expand Up @@ -2970,28 +2965,25 @@ def to_pickle(

Examples
--------
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
>>> original_df
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
>>> original_df # doctest: +SKIP
foo bar
0 0 5
1 1 6
2 2 7
3 3 8
4 4 9
>>> original_df.to_pickle("./dummy.pkl")
>>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP

>>> unpickled_df = pd.read_pickle("./dummy.pkl")
>>> unpickled_df
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
>>> unpickled_df # doctest: +SKIP
foo bar
0 0 5
1 1 6
2 2 7
3 3 8
4 4 9

>>> import os
>>> os.remove("./dummy.pkl")
"""
""" # noqa: E501
from pandas.io.pickle import to_pickle

to_pickle(
Expand Down Expand Up @@ -3509,14 +3501,14 @@ def to_csv(
To write a csv file to a new folder or nested folder you will first
need to create it using either Pathlib or os:

>>> from pathlib import Path
>>> filepath = Path('folder/subfolder/out.csv')
>>> filepath.parent.mkdir(parents=True, exist_ok=True)
>>> df.to_csv(filepath)
>>> from pathlib import Path # doctest: +SKIP
>>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP
>>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP
>>> df.to_csv(filepath) # doctest: +SKIP

>>> import os
>>> os.makedirs('folder/subfolder', exist_ok=True)
>>> df.to_csv('folder/subfolder/out.csv')
>>> import os # doctest: +SKIP
>>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP
>>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP
"""
df = self if isinstance(self, ABCDataFrame) else self.to_frame()

Expand Down
19 changes: 8 additions & 11 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,32 +95,29 @@ class DtypeWarning(Warning):

>>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 +
... ['1'] * 100000),
... 'b': ['b'] * 300000})
>>> df.to_csv('test.csv', index=False)
>>> df2 = pd.read_csv('test.csv')
... 'b': ['b'] * 300000}) # doctest: +SKIP
>>> df.to_csv('test.csv', index=False) # doctest: +SKIP
>>> df2 = pd.read_csv('test.csv') # doctest: +SKIP
... # DtypeWarning: Columns (0) have mixed types

Important to notice that ``df2`` will contain both `str` and `int` for the
same input, '1'.

>>> df2.iloc[262140, 0]
>>> df2.iloc[262140, 0] # doctest: +SKIP
'1'
>>> type(df2.iloc[262140, 0])
>>> type(df2.iloc[262140, 0]) # doctest: +SKIP
<class 'str'>
>>> df2.iloc[262150, 0]
>>> df2.iloc[262150, 0] # doctest: +SKIP
1
>>> type(df2.iloc[262150, 0])
>>> type(df2.iloc[262150, 0]) # doctest: +SKIP
<class 'int'>

One way to solve this issue is using the `dtype` parameter in the
`read_csv` and `read_table` functions to explicit the conversion:

>>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str})
>>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP

No warning was issued.

>>> import os
>>> os.remove('test.csv')
"""


Expand Down
34 changes: 17 additions & 17 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,40 +756,40 @@ class ExcelWriter(metaclass=abc.ABCMeta):
--------
Default usage:

>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP
>>> with pd.ExcelWriter("path_to_file.xlsx") as writer:
... df.to_excel(writer)
... df.to_excel(writer) # doctest: +SKIP

To write to separate sheets in a single file:

>>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"])
>>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
>>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"]) # doctest: +SKIP
>>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP
>>> with pd.ExcelWriter("path_to_file.xlsx") as writer:
... df1.to_excel(writer, sheet_name="Sheet1")
... df2.to_excel(writer, sheet_name="Sheet2")
... df1.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP
... df2.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP

You can set the date format or datetime format:

>>> from datetime import date, datetime
>>> from datetime import date, datetime # doctest: +SKIP
>>> df = pd.DataFrame(
... [
... [date(2014, 1, 31), date(1999, 9, 24)],
... [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
... ],
... index=["Date", "Datetime"],
... columns=["X", "Y"],
... )
... ) # doctest: +SKIP
>>> with pd.ExcelWriter(
... "path_to_file.xlsx",
... date_format="YYYY-MM-DD",
... datetime_format="YYYY-MM-DD HH:MM:SS"
... ) as writer:
... df.to_excel(writer)
... df.to_excel(writer) # doctest: +SKIP

You can also append to an existing Excel file:

>>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer:
... df.to_excel(writer, sheet_name="Sheet3")
... df.to_excel(writer, sheet_name="Sheet3") # doctest: +SKIP

Here, the `if_sheet_exists` parameter can be set to replace a sheet if it
already exists:
Expand All @@ -800,7 +800,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... engine="openpyxl",
... if_sheet_exists="replace",
... ) as writer:
... df.to_excel(writer, sheet_name="Sheet1")
... df.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP

You can also write multiple DataFrames to a single sheet. Note that the
``if_sheet_exists`` parameter needs to be set to ``overlay``:
Expand All @@ -811,7 +811,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... if_sheet_exists="overlay",
... ) as writer:
... df1.to_excel(writer, sheet_name="Sheet1")
... df2.to_excel(writer, sheet_name="Sheet1", startcol=3)
... df2.to_excel(writer, sheet_name="Sheet1", startcol=3) # doctest: +SKIP

You can store Excel file in RAM:

Expand All @@ -823,12 +823,12 @@ class ExcelWriter(metaclass=abc.ABCMeta):

You can pack Excel file into zip archive:

>>> import zipfile
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
>>> import zipfile # doctest: +SKIP
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP
>>> with zipfile.ZipFile("path_to_file.zip", "w") as zf:
... with zf.open("filename.xlsx", "w") as buffer:
... with pd.ExcelWriter(buffer) as writer:
... df.to_excel(writer)
... df.to_excel(writer) # doctest: +SKIP

You can specify additional arguments to the underlying engine:

Expand All @@ -837,7 +837,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... engine="xlsxwriter",
... engine_kwargs={"options": {"nan_inf_to_errors": True}}
... ) as writer:
... df.to_excel(writer)
... df.to_excel(writer) # doctest: +SKIP

In append mode, ``engine_kwargs`` are passed through to
openpyxl's ``load_workbook``:
Expand All @@ -848,7 +848,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
... mode="a",
... engine_kwargs={"keep_vba": True}
... ) as writer:
... df.to_excel(writer, sheet_name="Sheet2")
... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP
"""

# Defining an ExcelWriter implementation (see abstract methods for more...)
Expand Down
30 changes: 12 additions & 18 deletions pandas/io/pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,28 +71,25 @@ def to_pickle(

Examples
--------
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
>>> original_df
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
>>> original_df # doctest: +SKIP
foo bar
0 0 5
1 1 6
2 2 7
3 3 8
4 4 9
>>> pd.to_pickle(original_df, "./dummy.pkl")
>>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP

>>> unpickled_df = pd.read_pickle("./dummy.pkl")
>>> unpickled_df
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
>>> unpickled_df # doctest: +SKIP
foo bar
0 0 5
1 1 6
2 2 7
3 3 8
4 4 9

>>> import os
>>> os.remove("./dummy.pkl")
"""
""" # noqa: E501
if protocol < 0:
protocol = pickle.HIGHEST_PROTOCOL

Expand Down Expand Up @@ -165,28 +162,25 @@ def read_pickle(

Examples
--------
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
>>> original_df
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
>>> original_df # doctest: +SKIP
foo bar
0 0 5
1 1 6
2 2 7
3 3 8
4 4 9
>>> pd.to_pickle(original_df, "./dummy.pkl")
>>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP

>>> unpickled_df = pd.read_pickle("./dummy.pkl")
>>> unpickled_df
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
>>> unpickled_df # doctest: +SKIP
foo bar
0 0 5
1 1 6
2 2 7
3 3 8
4 4 9

>>> import os
>>> os.remove("./dummy.pkl")
"""
""" # noqa: E501
excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
with get_handle(
filepath_or_buffer,
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,9 @@ def read_hdf(

Examples
--------
>>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z'])
>>> df.to_hdf('./store.h5', 'data')
>>> reread = pd.read_hdf('./store.h5')
>>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP
>>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP
>>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP
"""
if mode not in ["r", "r+", "a"]:
raise ValueError(
Expand Down
20 changes: 8 additions & 12 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,26 +172,22 @@
Creating a dummy stata for this example
>>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon',
... 'parrot'],
... 'speed': [350, 18, 361, 15]}})
>>> df.to_stata('animals.dta')
... 'speed': [350, 18, 361, 15]}}) # doctest: +SKIP
>>> df.to_stata('animals.dta') # doctest: +SKIP

Read a Stata dta file:

>>> df = pd.read_stata('animals.dta')
>>> df = pd.read_stata('animals.dta') # doctest: +SKIP

Read a Stata dta file in 10,000 line chunks:
>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8")
>>> df = pd.DataFrame(values, columns=["i"])
>>> df.to_stata('filename.dta')
>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP
>>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP
>>> df.to_stata('filename.dta') # doctest: +SKIP

>>> itr = pd.read_stata('filename.dta', chunksize=10000)
>>> itr = pd.read_stata('filename.dta', chunksize=10000) # doctest: +SKIP
>>> for chunk in itr:
... # Operate on a single chunk, e.g., chunk.mean()
... pass

>>> import os
>>> os.remove("./filename.dta")
>>> os.remove("./animals.dta")
... pass # doctest: +SKIP
"""

_read_method_doc = f"""\
Expand Down
15 changes: 15 additions & 0 deletions scripts/tests/test_validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ def write_array_like_with_hyphen_not_underscore(self):
"""
pass

def leftover_files(self):
"""
Examples
--------
>>> import pathlib
>>> pathlib.Path("foo.txt").touch()
"""
pass


class TestValidator:
def _import_path(self, klass=None, func=None):
Expand Down Expand Up @@ -192,6 +201,12 @@ def test_bad_docstrings(self, capsys, klass, func, msgs):
for msg in msgs:
assert msg in " ".join([err[1] for err in result["errors"]])

def test_leftover_files_raises(self):
with pytest.raises(Exception, match="The following files"):
validate_docstrings.pandas_validate(
self._import_path(klass="BadDocstrings", func="leftover_files")
)

def test_validate_all_ignore_deprecated(self, monkeypatch):
monkeypatch.setattr(
validate_docstrings,
Expand Down
Loading