Skip to content

Commit 54e9988

Browse files
authored
DOC: validate_docstrings cleans up leftover files; doctest +SKIP file examples (#44711)
1 parent 633ff18 commit 54e9988

File tree

8 files changed

+96
-88
lines changed

8 files changed

+96
-88
lines changed

pandas/core/generic.py

+19-27
Original file line numberDiff line numberDiff line change
@@ -2709,32 +2709,27 @@ def to_hdf(
27092709
Examples
27102710
--------
27112711
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]},
2712-
... index=['a', 'b', 'c'])
2713-
>>> df.to_hdf('data.h5', key='df', mode='w')
2712+
... index=['a', 'b', 'c']) # doctest: +SKIP
2713+
>>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP
27142714
27152715
We can add another object to the same file:
27162716
2717-
>>> s = pd.Series([1, 2, 3, 4])
2718-
>>> s.to_hdf('data.h5', key='s')
2717+
>>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP
2718+
>>> s.to_hdf('data.h5', key='s') # doctest: +SKIP
27192719
27202720
Reading from HDF file:
27212721
2722-
>>> pd.read_hdf('data.h5', 'df')
2722+
>>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP
27232723
A B
27242724
a 1 4
27252725
b 2 5
27262726
c 3 6
2727-
>>> pd.read_hdf('data.h5', 's')
2727+
>>> pd.read_hdf('data.h5', 's') # doctest: +SKIP
27282728
0 1
27292729
1 2
27302730
2 3
27312731
3 4
27322732
dtype: int64
2733-
2734-
Deleting file with data:
2735-
2736-
>>> import os
2737-
>>> os.remove('data.h5')
27382733
"""
27392734
from pandas.io import pytables
27402735

@@ -2970,28 +2965,25 @@ def to_pickle(
29702965
29712966
Examples
29722967
--------
2973-
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
2974-
>>> original_df
2968+
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
2969+
>>> original_df # doctest: +SKIP
29752970
foo bar
29762971
0 0 5
29772972
1 1 6
29782973
2 2 7
29792974
3 3 8
29802975
4 4 9
2981-
>>> original_df.to_pickle("./dummy.pkl")
2976+
>>> original_df.to_pickle("./dummy.pkl") # doctest: +SKIP
29822977
2983-
>>> unpickled_df = pd.read_pickle("./dummy.pkl")
2984-
>>> unpickled_df
2978+
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
2979+
>>> unpickled_df # doctest: +SKIP
29852980
foo bar
29862981
0 0 5
29872982
1 1 6
29882983
2 2 7
29892984
3 3 8
29902985
4 4 9
2991-
2992-
>>> import os
2993-
>>> os.remove("./dummy.pkl")
2994-
"""
2986+
""" # noqa: E501
29952987
from pandas.io.pickle import to_pickle
29962988

29972989
to_pickle(
@@ -3509,14 +3501,14 @@ def to_csv(
35093501
To write a csv file to a new folder or nested folder you will first
35103502
need to create it using either Pathlib or os:
35113503
3512-
>>> from pathlib import Path
3513-
>>> filepath = Path('folder/subfolder/out.csv')
3514-
>>> filepath.parent.mkdir(parents=True, exist_ok=True)
3515-
>>> df.to_csv(filepath)
3504+
>>> from pathlib import Path # doctest: +SKIP
3505+
>>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP
3506+
>>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP
3507+
>>> df.to_csv(filepath) # doctest: +SKIP
35163508
3517-
>>> import os
3518-
>>> os.makedirs('folder/subfolder', exist_ok=True)
3519-
>>> df.to_csv('folder/subfolder/out.csv')
3509+
>>> import os # doctest: +SKIP
3510+
>>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP
3511+
>>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP
35203512
"""
35213513
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
35223514

pandas/errors/__init__.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -95,32 +95,29 @@ class DtypeWarning(Warning):
9595
9696
>>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 +
9797
... ['1'] * 100000),
98-
... 'b': ['b'] * 300000})
99-
>>> df.to_csv('test.csv', index=False)
100-
>>> df2 = pd.read_csv('test.csv')
98+
... 'b': ['b'] * 300000}) # doctest: +SKIP
99+
>>> df.to_csv('test.csv', index=False) # doctest: +SKIP
100+
>>> df2 = pd.read_csv('test.csv') # doctest: +SKIP
101101
... # DtypeWarning: Columns (0) have mixed types
102102
103103
Important to notice that ``df2`` will contain both `str` and `int` for the
104104
same input, '1'.
105105
106-
>>> df2.iloc[262140, 0]
106+
>>> df2.iloc[262140, 0] # doctest: +SKIP
107107
'1'
108-
>>> type(df2.iloc[262140, 0])
108+
>>> type(df2.iloc[262140, 0]) # doctest: +SKIP
109109
<class 'str'>
110-
>>> df2.iloc[262150, 0]
110+
>>> df2.iloc[262150, 0] # doctest: +SKIP
111111
1
112-
>>> type(df2.iloc[262150, 0])
112+
>>> type(df2.iloc[262150, 0]) # doctest: +SKIP
113113
<class 'int'>
114114
115115
One way to solve this issue is using the `dtype` parameter in the
116116
`read_csv` and `read_table` functions to explicit the conversion:
117117
118-
>>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str})
118+
>>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP
119119
120120
No warning was issued.
121-
122-
>>> import os
123-
>>> os.remove('test.csv')
124121
"""
125122

126123

pandas/io/excel/_base.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -756,40 +756,40 @@ class ExcelWriter(metaclass=abc.ABCMeta):
756756
--------
757757
Default usage:
758758
759-
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
759+
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP
760760
>>> with pd.ExcelWriter("path_to_file.xlsx") as writer:
761-
... df.to_excel(writer)
761+
... df.to_excel(writer) # doctest: +SKIP
762762
763763
To write to separate sheets in a single file:
764764
765-
>>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"])
766-
>>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
765+
>>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"]) # doctest: +SKIP
766+
>>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP
767767
>>> with pd.ExcelWriter("path_to_file.xlsx") as writer:
768-
... df1.to_excel(writer, sheet_name="Sheet1")
769-
... df2.to_excel(writer, sheet_name="Sheet2")
768+
... df1.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP
769+
... df2.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP
770770
771771
You can set the date format or datetime format:
772772
773-
>>> from datetime import date, datetime
773+
>>> from datetime import date, datetime # doctest: +SKIP
774774
>>> df = pd.DataFrame(
775775
... [
776776
... [date(2014, 1, 31), date(1999, 9, 24)],
777777
... [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
778778
... ],
779779
... index=["Date", "Datetime"],
780780
... columns=["X", "Y"],
781-
... )
781+
... ) # doctest: +SKIP
782782
>>> with pd.ExcelWriter(
783783
... "path_to_file.xlsx",
784784
... date_format="YYYY-MM-DD",
785785
... datetime_format="YYYY-MM-DD HH:MM:SS"
786786
... ) as writer:
787-
... df.to_excel(writer)
787+
... df.to_excel(writer) # doctest: +SKIP
788788
789789
You can also append to an existing Excel file:
790790
791791
>>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer:
792-
... df.to_excel(writer, sheet_name="Sheet3")
792+
... df.to_excel(writer, sheet_name="Sheet3") # doctest: +SKIP
793793
794794
Here, the `if_sheet_exists` parameter can be set to replace a sheet if it
795795
already exists:
@@ -800,7 +800,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
800800
... engine="openpyxl",
801801
... if_sheet_exists="replace",
802802
... ) as writer:
803-
... df.to_excel(writer, sheet_name="Sheet1")
803+
... df.to_excel(writer, sheet_name="Sheet1") # doctest: +SKIP
804804
805805
You can also write multiple DataFrames to a single sheet. Note that the
806806
``if_sheet_exists`` parameter needs to be set to ``overlay``:
@@ -811,7 +811,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
811811
... if_sheet_exists="overlay",
812812
... ) as writer:
813813
... df1.to_excel(writer, sheet_name="Sheet1")
814-
... df2.to_excel(writer, sheet_name="Sheet1", startcol=3)
814+
... df2.to_excel(writer, sheet_name="Sheet1", startcol=3) # doctest: +SKIP
815815
816816
You can store Excel file in RAM:
817817
@@ -823,12 +823,12 @@ class ExcelWriter(metaclass=abc.ABCMeta):
823823
824824
You can pack Excel file into zip archive:
825825
826-
>>> import zipfile
827-
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
826+
>>> import zipfile # doctest: +SKIP
827+
>>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) # doctest: +SKIP
828828
>>> with zipfile.ZipFile("path_to_file.zip", "w") as zf:
829829
... with zf.open("filename.xlsx", "w") as buffer:
830830
... with pd.ExcelWriter(buffer) as writer:
831-
... df.to_excel(writer)
831+
... df.to_excel(writer) # doctest: +SKIP
832832
833833
You can specify additional arguments to the underlying engine:
834834
@@ -837,7 +837,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
837837
... engine="xlsxwriter",
838838
... engine_kwargs={"options": {"nan_inf_to_errors": True}}
839839
... ) as writer:
840-
... df.to_excel(writer)
840+
... df.to_excel(writer) # doctest: +SKIP
841841
842842
In append mode, ``engine_kwargs`` are passed through to
843843
openpyxl's ``load_workbook``:
@@ -848,7 +848,7 @@ class ExcelWriter(metaclass=abc.ABCMeta):
848848
... mode="a",
849849
... engine_kwargs={"keep_vba": True}
850850
... ) as writer:
851-
... df.to_excel(writer, sheet_name="Sheet2")
851+
... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP
852852
"""
853853

854854
# Defining an ExcelWriter implementation (see abstract methods for more...)

pandas/io/pickle.py

+12-18
Original file line numberDiff line numberDiff line change
@@ -71,28 +71,25 @@ def to_pickle(
7171
7272
Examples
7373
--------
74-
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
75-
>>> original_df
74+
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
75+
>>> original_df # doctest: +SKIP
7676
foo bar
7777
0 0 5
7878
1 1 6
7979
2 2 7
8080
3 3 8
8181
4 4 9
82-
>>> pd.to_pickle(original_df, "./dummy.pkl")
82+
>>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
8383
84-
>>> unpickled_df = pd.read_pickle("./dummy.pkl")
85-
>>> unpickled_df
84+
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
85+
>>> unpickled_df # doctest: +SKIP
8686
foo bar
8787
0 0 5
8888
1 1 6
8989
2 2 7
9090
3 3 8
9191
4 4 9
92-
93-
>>> import os
94-
>>> os.remove("./dummy.pkl")
95-
"""
92+
""" # noqa: E501
9693
if protocol < 0:
9794
protocol = pickle.HIGHEST_PROTOCOL
9895

@@ -165,28 +162,25 @@ def read_pickle(
165162
166163
Examples
167164
--------
168-
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}})
169-
>>> original_df
165+
>>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP
166+
>>> original_df # doctest: +SKIP
170167
foo bar
171168
0 0 5
172169
1 1 6
173170
2 2 7
174171
3 3 8
175172
4 4 9
176-
>>> pd.to_pickle(original_df, "./dummy.pkl")
173+
>>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP
177174
178-
>>> unpickled_df = pd.read_pickle("./dummy.pkl")
179-
>>> unpickled_df
175+
>>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP
176+
>>> unpickled_df # doctest: +SKIP
180177
foo bar
181178
0 0 5
182179
1 1 6
183180
2 2 7
184181
3 3 8
185182
4 4 9
186-
187-
>>> import os
188-
>>> os.remove("./dummy.pkl")
189-
"""
183+
""" # noqa: E501
190184
excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError)
191185
with get_handle(
192186
filepath_or_buffer,

pandas/io/pytables.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -390,9 +390,9 @@ def read_hdf(
390390
391391
Examples
392392
--------
393-
>>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z'])
394-
>>> df.to_hdf('./store.h5', 'data')
395-
>>> reread = pd.read_hdf('./store.h5')
393+
>>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP
394+
>>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP
395+
>>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP
396396
"""
397397
if mode not in ["r", "r+", "a"]:
398398
raise ValueError(

pandas/io/stata.py

+8-12
Original file line numberDiff line numberDiff line change
@@ -172,26 +172,22 @@
172172
Creating a dummy stata for this example
173173
>>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon',
174174
... 'parrot'],
175-
... 'speed': [350, 18, 361, 15]}})
176-
>>> df.to_stata('animals.dta')
175+
... 'speed': [350, 18, 361, 15]}}) # doctest: +SKIP
176+
>>> df.to_stata('animals.dta') # doctest: +SKIP
177177
178178
Read a Stata dta file:
179179
180-
>>> df = pd.read_stata('animals.dta')
180+
>>> df = pd.read_stata('animals.dta') # doctest: +SKIP
181181
182182
Read a Stata dta file in 10,000 line chunks:
183-
>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8")
184-
>>> df = pd.DataFrame(values, columns=["i"])
185-
>>> df.to_stata('filename.dta')
183+
>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") # doctest: +SKIP
184+
>>> df = pd.DataFrame(values, columns=["i"]) # doctest: +SKIP
185+
>>> df.to_stata('filename.dta') # doctest: +SKIP
186186
187-
>>> itr = pd.read_stata('filename.dta', chunksize=10000)
187+
>>> itr = pd.read_stata('filename.dta', chunksize=10000) # doctest: +SKIP
188188
>>> for chunk in itr:
189189
... # Operate on a single chunk, e.g., chunk.mean()
190-
... pass
191-
192-
>>> import os
193-
>>> os.remove("./filename.dta")
194-
>>> os.remove("./animals.dta")
190+
... pass # doctest: +SKIP
195191
"""
196192

197193
_read_method_doc = f"""\

scripts/tests/test_validate_docstrings.py

+15
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,15 @@ def write_array_like_with_hyphen_not_underscore(self):
8888
"""
8989
pass
9090

91+
def leftover_files(self):
92+
"""
93+
Examples
94+
--------
95+
>>> import pathlib
96+
>>> pathlib.Path("foo.txt").touch()
97+
"""
98+
pass
99+
91100

92101
class TestValidator:
93102
def _import_path(self, klass=None, func=None):
@@ -192,6 +201,12 @@ def test_bad_docstrings(self, capsys, klass, func, msgs):
192201
for msg in msgs:
193202
assert msg in " ".join([err[1] for err in result["errors"]])
194203

204+
def test_leftover_files_raises(self):
205+
with pytest.raises(Exception, match="The following files"):
206+
validate_docstrings.pandas_validate(
207+
self._import_path(klass="BadDocstrings", func="leftover_files")
208+
)
209+
195210
def test_validate_all_ignore_deprecated(self, monkeypatch):
196211
monkeypatch.setattr(
197212
validate_docstrings,

0 commit comments

Comments
 (0)