diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d04d0eaee6ec4..46c03458e32c4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -123,6 +123,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/io/sql.py \ pandas/io/formats/format.py \ pandas/io/formats/style.py \ + pandas/io/stata.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 705f5c82be915..1f87a017f31e4 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -160,15 +160,30 @@ Examples -------- + +Creating a dummy stata for this example +>>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', +... 'parrot'], +... 'speed': [350, 18, 361, 15]}}) +>>> df.to_stata('animals.dta') + Read a Stata dta file: ->>> df = pd.read_stata('filename.dta') +>>> df = pd.read_stata('animals.dta') Read a Stata dta file in 10,000 line chunks: +>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") +>>> df = pd.DataFrame(values, columns=["i"]) +>>> df.to_stata('filename.dta') >>> itr = pd.read_stata('filename.dta', chunksize=10000) >>> for chunk in itr: -... do_something(chunk) +... # Operate on a single chunk, e.g., chunk.mean() +... pass + +>>> import os +>>> os.remove("./filename.dta") +>>> os.remove("./animals.dta") """ _read_method_doc = f"""\