From c79e7ee3dffbad77f44c15effd53dd06826bd84c Mon Sep 17 00:00:00 2001 From: Krishna Chivukula <63070026+KrishnaSai2020@users.noreply.github.com> Date: Sat, 31 Jul 2021 18:46:14 +0100 Subject: [PATCH 1/2] TST: fix read_stata doctest #42670 (#42701) * doctest fix for #42670 * added read_stata docstring into the doctests Co-authored-by: KrishnaSai2020 --- ci/code_checks.sh | 1 + pandas/io/stata.py | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index d04d0eaee6ec4..46c03458e32c4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -123,6 +123,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/io/sql.py \ pandas/io/formats/format.py \ pandas/io/formats/style.py \ + pandas/io/stata.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 705f5c82be915..1f87a017f31e4 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -160,15 +160,30 @@ Examples -------- + +Creating a dummy stata for this example +>>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', +... 'parrot'], +... 'speed': [350, 18, 361, 15]}}) +>>> df.to_stata('animals.dta') + Read a Stata dta file: ->>> df = pd.read_stata('filename.dta') +>>> df = pd.read_stata('animals.dta') Read a Stata dta file in 10,000 line chunks: +>>> values = np.random.randint(0, 10, size=(20_000, 1), dtype="uint8") +>>> df = pd.DataFrame(values, columns=["i"]) +>>> df.to_stata('filename.dta') >>> itr = pd.read_stata('filename.dta', chunksize=10000) >>> for chunk in itr: -... do_something(chunk) +... # Operate on a single chunk, e.g., chunk.mean() +... pass + +>>> import os +>>> os.remove("./filename.dta") +>>> os.remove("./animals.dta") """ _read_method_doc = f"""\ From a5f8c9a0760d6ca40081e295c4c2e93aef144e9a Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Sat, 31 Jul 2021 16:54:59 -0400 Subject: [PATCH 2/2] TST: Catch warnings due to division by 0 (#42836) --- pandas/tests/test_expressions.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 6ac85f9d36fdc..1348e62148cb1 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -1,5 +1,6 @@ import operator import re +import warnings import numpy as np import pytest @@ -167,8 +168,12 @@ def testit(): op = getattr(operator, opname) - result = expr.evaluate(op, left, left, use_numexpr=True) - expected = expr.evaluate(op, left, left, use_numexpr=False) + with warnings.catch_warnings(): + # array has 0s + msg = "invalid value encountered in true_divide" + warnings.filterwarnings("ignore", msg, RuntimeWarning) + result = expr.evaluate(op, left, left, use_numexpr=True) + expected = expr.evaluate(op, left, left, use_numexpr=False) tm.assert_numpy_array_equal(result, expected) result = expr._can_use_numexpr(op, op_str, right, right, "evaluate")