From 99d5e5b223c20c05281606c9d37590485cb40541 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Mon, 14 Dec 2020 22:47:53 +0100 Subject: [PATCH 1/4] DOC: Add doc-string examples for pd.read_sql using custom parse_dates arg values --- pandas/io/sql.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b7efb4a8d6947..1d9ef1c5c7ed1 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -478,6 +478,41 @@ def read_sql( ------- DataFrame or Iterator[DataFrame] + Examples + -------- + Read data from SQL via either a SQL tablename or a SQL query + + >>> pd.read_sql('table_name', 'postgres:///db_name') # doctest:+SKIP + + >>> pd.read_sql('SELECT * FROM table_name', 'postgres:///db_name') # doctest:+SKIP + + Apply dateparsing to columns through the "parse_dates" argument + + >>> pd.read_sql('table_name', + ... 'postgres:///db_name', + ... parse_dates=["date_column"]) # doctest:+SKIP + + The "parse_dates" argument calls pd.to_datetime on the provided columns. Custom + argument values for applying pd.to_datetime on a column are specified via a + dictionary format: + 1. Ignore errors while parsing the values of "date_column" + + >>> pd.read_sql('table_name', + ... 'postgres:///db_name', + ... parse_dates={"date_column": {"errors": "ignore"}) # doctest:+SKIP + + 2. Apply a dayfirst dateparsing order on the values of "date_column" + + >>> pd.read_sql('table_name', + ... 'postgres:///db_name', + ... parse_dates={"date_column": {"dayfirst": True}) # doctest:+SKIP + + 3. Apply custom formatting when dateparsing the values of "date_column" + + >>> pd.read_sql('table_name', + ... 'postgres:///db_name', + ... parse_dates={"date_column": {"format": "%d/%m/%Y"}) # doctest:+SKIP + See Also -------- read_sql_table : Read SQL database table into a DataFrame. From 46ffa213724ca18faa4a58d347f892d49d0c80d3 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Mon, 14 Dec 2020 23:51:03 +0100 Subject: [PATCH 2/4] Update doctests --- pandas/io/sql.py | 79 ++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 1d9ef1c5c7ed1..a71c4cf0ea808 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -478,45 +478,66 @@ def read_sql( ------- DataFrame or Iterator[DataFrame] - Examples + See Also -------- - Read data from SQL via either a SQL tablename or a SQL query - - >>> pd.read_sql('table_name', 'postgres:///db_name') # doctest:+SKIP - - >>> pd.read_sql('SELECT * FROM table_name', 'postgres:///db_name') # doctest:+SKIP - - Apply dateparsing to columns through the "parse_dates" argument - - >>> pd.read_sql('table_name', - ... 'postgres:///db_name', - ... parse_dates=["date_column"]) # doctest:+SKIP + read_sql_table : Read SQL database table into a DataFrame. + read_sql_query : Read SQL query into a DataFrame. - The "parse_dates" argument calls pd.to_datetime on the provided columns. Custom - argument values for applying pd.to_datetime on a column are specified via a - dictionary format: + Examples + -------- + Read data from SQL via either a SQL query or a SQL tablename (latter not + possible for SQLite tables) + >>> from sqlite3 import connect + >>> conn = connect('file.db') + >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], + ... columns=['int_column', 'date_column']) + >>> df.to_sql('test_data', conn) + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) + int_column date_column + 0 0 10/11/12 + 1 1 12/11/10 + + >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP + + Apply dateparsing to columns through the ``parse_dates`` argument + + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates=["date_column"]) + int_column date_column + 0 0 2012-10-11 + 1 1 2010-12-11 + + The ``parse_dates`` argument calls ``pd.to_datetime`` on the provided columns. + Custom argument values for applying ``pd.to_datetime`` on a column are specified + via a dictionary format: 1. Ignore errors while parsing the values of "date_column" - >>> pd.read_sql('table_name', - ... 'postgres:///db_name', - ... parse_dates={"date_column": {"errors": "ignore"}) # doctest:+SKIP + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"errors": "ignore"}}) + int_column date_column + 0 0 2012-10-11 + 1 1 2010-12-11 2. Apply a dayfirst dateparsing order on the values of "date_column" - >>> pd.read_sql('table_name', - ... 'postgres:///db_name', - ... parse_dates={"date_column": {"dayfirst": True}) # doctest:+SKIP + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"dayfirst": True}}) + int_column date_column + 0 0 2012-11-10 + 1 1 2010-11-12 3. Apply custom formatting when dateparsing the values of "date_column" - >>> pd.read_sql('table_name', - ... 'postgres:///db_name', - ... parse_dates={"date_column": {"format": "%d/%m/%Y"}) # doctest:+SKIP - - See Also - -------- - read_sql_table : Read SQL database table into a DataFrame. - read_sql_query : Read SQL query into a DataFrame. + >>> pd.read_sql('SELECT int_column, date_column FROM test_data', + ... conn, + ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) + int_column date_column + 0 0 2012-11-10 + 1 1 2010-11-12 """ pandas_sql = pandasSQL_builder(con) From 98eecb1fefaed0644941d747d0ab9fa9c0e11f67 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Tue, 15 Dec 2020 21:11:29 +0100 Subject: [PATCH 3/4] Run docstring test for sql.py in CI --- ci/code_checks.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 3eeee61f62a7e..d2f20a91cc654 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -178,6 +178,10 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pytest -q --doctest-modules pandas/core/strings/ RET=$(($RET + $?)) ; echo $MSG "DONE" + MSG='Doctests sql.py' ; echo $MSG + pytest -q --doctest-modules pandas/io/sql.py + RET=$(($RET + $?)) ; echo $MSG "DONE" + # Directories MSG='Doctests arrays'; echo $MSG From 82ff53399919c5b7a86e9a344d3114d09c627e51 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Wed, 16 Dec 2020 21:17:06 +0100 Subject: [PATCH 4/4] Address comments --- pandas/io/sql.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index a71c4cf0ea808..23f992ceb009a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -485,10 +485,12 @@ def read_sql( Examples -------- - Read data from SQL via either a SQL query or a SQL tablename (latter not - possible for SQLite tables) + Read data from SQL via either a SQL query or a SQL tablename. + When using a SQLite database only SQL queries are accepted, + providing only the SQL tablename will result in an error. + >>> from sqlite3 import connect - >>> conn = connect('file.db') + >>> conn = connect(':memory:') >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], ... columns=['int_column', 'date_column']) >>> df.to_sql('test_data', conn) @@ -500,12 +502,12 @@ def read_sql( >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP - Apply dateparsing to columns through the ``parse_dates`` argument + Apply date parsing to columns through the ``parse_dates`` argument >>> pd.read_sql('SELECT int_column, date_column FROM test_data', ... conn, ... parse_dates=["date_column"]) - int_column date_column + int_column date_column 0 0 2012-10-11 1 1 2010-12-11 @@ -517,11 +519,11 @@ def read_sql( >>> pd.read_sql('SELECT int_column, date_column FROM test_data', ... conn, ... parse_dates={"date_column": {"errors": "ignore"}}) - int_column date_column + int_column date_column 0 0 2012-10-11 1 1 2010-12-11 - 2. Apply a dayfirst dateparsing order on the values of "date_column" + 2. Apply a dayfirst date parsing order on the values of "date_column" >>> pd.read_sql('SELECT int_column, date_column FROM test_data', ... conn, @@ -530,12 +532,12 @@ def read_sql( 0 0 2012-11-10 1 1 2010-11-12 - 3. Apply custom formatting when dateparsing the values of "date_column" + 3. Apply custom formatting when date parsing the values of "date_column" >>> pd.read_sql('SELECT int_column, date_column FROM test_data', ... conn, ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) - int_column date_column + int_column date_column 0 0 2012-11-10 1 1 2010-11-12 """