Skip to content

Commit 1d65bc8

Browse files
Merge pull request #8340 from jorisvandenbossche/sql-pep8
CLN: pep8 clean up of sql.py
2 parents 0005e0f + 3a98bc3 commit 1d65bc8

File tree

1 file changed

+70
-60
lines changed

1 file changed

+70
-60
lines changed

pandas/io/sql.py

+70-60
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,28 @@
1+
# -*- coding: utf-8 -*-
12
"""
23
Collection of query wrappers / abstractions to both facilitate data
34
retrieval and to reduce dependency on DB-specific API.
45
"""
6+
57
from __future__ import print_function, division
6-
from datetime import datetime, date, timedelta
8+
from datetime import datetime, date
79

810
import warnings
911
import traceback
10-
import itertools
1112
import re
1213
import numpy as np
1314

1415
import pandas.lib as lib
1516
import pandas.core.common as com
1617
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
1718
from pandas.core.api import DataFrame, Series
18-
from pandas.core.common import notnull, isnull
19+
from pandas.core.common import isnull
1920
from pandas.core.base import PandasObject
2021
from pandas.tseries.tools import to_datetime
2122

2223
from contextlib import contextmanager
2324

25+
2426
class SQLAlchemyRequired(ImportError):
2527
pass
2628

@@ -34,6 +36,7 @@ class DatabaseError(IOError):
3436

3537
_SQLALCHEMY_INSTALLED = None
3638

39+
3740
def _is_sqlalchemy_engine(con):
3841
global _SQLALCHEMY_INSTALLED
3942
if _SQLALCHEMY_INSTALLED is None:
@@ -80,7 +83,8 @@ def _handle_date_column(col, format=None):
8083
else:
8184
if format in ['D', 's', 'ms', 'us', 'ns']:
8285
return to_datetime(col, coerce=True, unit=format)
83-
elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer):
86+
elif (issubclass(col.dtype.type, np.floating)
87+
or issubclass(col.dtype.type, np.integer)):
8488
# parse dates as timestamp
8589
format = 's' if format is None else format
8690
return to_datetime(col, coerce=True, unit=format)
@@ -89,8 +93,9 @@ def _handle_date_column(col, format=None):
8993

9094

9195
def _parse_date_columns(data_frame, parse_dates):
92-
""" Force non-datetime columns to be read as such.
93-
Supports both string formatted and integer timestamp columns
96+
"""
97+
Force non-datetime columns to be read as such.
98+
Supports both string formatted and integer timestamp columns
9499
"""
95100
# handle non-list entries for parse_dates gracefully
96101
if parse_dates is True or parse_dates is None or parse_dates is False:
@@ -152,6 +157,7 @@ def _safe_fetch(cur):
152157
if excName == 'OperationalError':
153158
return []
154159

160+
155161
def tquery(sql, con=None, cur=None, retry=True):
156162
"""
157163
DEPRECATED. Returns list of tuples corresponding to each row in given sql
@@ -209,8 +215,8 @@ def tquery(sql, con=None, cur=None, retry=True):
209215

210216
def uquery(sql, con=None, cur=None, retry=True, params=None):
211217
"""
212-
DEPRECATED. Does the same thing as tquery, but instead of returning results, it
213-
returns the number of rows affected. Good for update queries.
218+
DEPRECATED. Does the same thing as tquery, but instead of returning
219+
results, it returns the number of rows affected. Good for update queries.
214220
215221
To obtain the same result in the future, you can use the following:
216222
@@ -269,8 +275,8 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
269275
con : SQLAlchemy engine
270276
Sqlite DBAPI connection mode not supported
271277
schema : string, default None
272-
Name of SQL schema in database to query (if database flavor supports this).
273-
If None, use default schema (default).
278+
Name of SQL schema in database to query (if database flavor
279+
supports this). If None, use default schema (default).
274280
index_col : string, optional
275281
Column to set as index
276282
coerce_float : boolean, default True
@@ -343,7 +349,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
343349
decimal.Decimal) to floating point, useful for SQL result sets
344350
params : list, tuple or dict, optional
345351
List of parameters to pass to execute method. The syntax used
346-
to pass parameters is database driver dependent. Check your
352+
to pass parameters is database driver dependent. Check your
347353
database driver documentation for which of the five syntax styles,
348354
described in PEP 249's paramstyle, is supported.
349355
Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}
@@ -393,7 +399,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
393399
decimal.Decimal) to floating point, useful for SQL result sets
394400
params : list, tuple or dict, optional
395401
List of parameters to pass to execute method. The syntax used
396-
to pass parameters is database driver dependent. Check your
402+
to pass parameters is database driver dependent. Check your
397403
database driver documentation for which of the five syntax styles,
398404
described in PEP 249's paramstyle, is supported.
399405
Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'}
@@ -469,8 +475,8 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
469475
'mysql' is deprecated and will be removed in future versions, but it
470476
will be further supported through SQLAlchemy engines.
471477
schema : string, default None
472-
Name of SQL schema in database to write to (if database flavor supports
473-
this). If None, use default schema (default).
478+
Name of SQL schema in database to write to (if database flavor
479+
supports this). If None, use default schema (default).
474480
if_exists : {'fail', 'replace', 'append'}, default 'fail'
475481
- fail: If table exists, do nothing.
476482
- replace: If table exists, drop it, recreate it, and insert data.
@@ -482,7 +488,7 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail',
482488
`index` is True, then the index names are used.
483489
A sequence should be given if the DataFrame uses MultiIndex.
484490
chunksize : int, default None
485-
If not None, then rows will be written in batches of this size at a
491+
If not None, then rows will be written in batches of this size at a
486492
time. If None, all rows will be written at once.
487493
488494
"""
@@ -535,7 +541,9 @@ def has_table(table_name, con, flavor='sqlite', schema=None):
535541
"and will be removed in future versions. "
536542
"MySQL will be further supported with SQLAlchemy engines.")
537543

538-
def pandasSQL_builder(con, flavor=None, schema=None, meta=None, is_cursor=False):
544+
545+
def pandasSQL_builder(con, flavor=None, schema=None, meta=None,
546+
is_cursor=False):
539547
"""
540548
Convenience function to return the correct PandasSQL subclass based on the
541549
provided parameters
@@ -622,7 +630,7 @@ def insert_data(self):
622630
"duplicate name in index/columns: {0}".format(err))
623631
else:
624632
temp = self.frame
625-
633+
626634
column_names = list(map(str, temp.columns))
627635
ncols = len(column_names)
628636
data_list = [None] * ncols
@@ -631,7 +639,8 @@ def insert_data(self):
631639
for i in range(len(blocks)):
632640
b = blocks[i]
633641
if b.is_datetime:
634-
# convert to microsecond resolution so this yields datetime.datetime
642+
# convert to microsecond resolution so this yields
643+
# datetime.datetime
635644
d = b.values.astype('M8[us]').astype(object)
636645
else:
637646
d = np.array(b.values, dtype=object)
@@ -647,7 +656,7 @@ def insert_data(self):
647656
return column_names, data_list
648657

649658
def _execute_insert(self, conn, keys, data_iter):
650-
data = [dict( (k, v) for k, v in zip(keys, row) ) for row in data_iter]
659+
data = [dict((k, v) for k, v in zip(keys, row)) for row in data_iter]
651660
conn.execute(self.insert_statement(), data)
652661

653662
def insert(self, chunksize=None):
@@ -658,11 +667,11 @@ def insert(self, chunksize=None):
658667
if nrows == 0:
659668
return
660669

661-
if chunksize is None:
670+
if chunksize is None:
662671
chunksize = nrows
663672
elif chunksize == 0:
664673
raise ValueError('chunksize argument should be non-zero')
665-
674+
666675
chunks = int(nrows / chunksize) + 1
667676

668677
with self.pd_sql.run_transaction() as conn:
@@ -715,7 +724,8 @@ def _index_name(self, index, index_label):
715724
else:
716725
return index_label
717726
# return the used column labels for the index columns
718-
if nlevels == 1 and 'index' not in self.frame.columns and self.frame.index.name is None:
727+
if (nlevels == 1 and 'index' not in self.frame.columns
728+
and self.frame.index.name is None):
719729
return ['index']
720730
else:
721731
return [l if l is not None else "level_{0}".format(i)
@@ -739,7 +749,7 @@ def _get_column_names_and_types(self, dtype_mapper):
739749

740750
column_names_and_types += [
741751
(str(self.frame.columns[i]),
742-
dtype_mapper(self.frame.iloc[:,i]),
752+
dtype_mapper(self.frame.iloc[:, i]),
743753
False)
744754
for i in range(len(self.frame.columns))
745755
]
@@ -756,9 +766,8 @@ def _create_table_setup(self):
756766
for name, typ, is_index in column_names_and_types]
757767

758768
if self.keys is not None:
759-
columns.append(PrimaryKeyConstraint(self.keys,
760-
name=self.name+'_pk'))
761-
769+
pkc = PrimaryKeyConstraint(self.keys, name=self.name + '_pk')
770+
columns.append(pkc)
762771

763772
schema = self.schema or self.pd_sql.meta.schema
764773

@@ -770,17 +779,16 @@ def _create_table_setup(self):
770779
return Table(self.name, meta, *columns, schema=schema)
771780

772781
def _harmonize_columns(self, parse_dates=None):
773-
""" Make a data_frame's column type align with an sql_table
774-
column types
775-
Need to work around limited NA value support.
776-
Floats are always fine, ints must always
777-
be floats if there are Null values.
778-
Booleans are hard because converting bool column with None replaces
779-
all Nones with false. Therefore only convert bool if there are no
780-
NA values.
781-
Datetimes should already be converted
782-
to np.datetime if supported, but here we also force conversion
783-
if required
782+
"""
783+
Make the DataFrame's column types align with the SQL table
784+
column types.
785+
Need to work around limited NA value support. Floats are always
786+
fine, ints must always be floats if there are Null values.
787+
Booleans are hard because converting bool column with None replaces
788+
all Nones with false. Therefore only convert bool if there are no
789+
NA values.
790+
Datetimes should already be converted to np.datetime64 if supported,
791+
but here we also force conversion if required
784792
"""
785793
# handle non-list entries for parse_dates gracefully
786794
if parse_dates is True or parse_dates is None or parse_dates is False:
@@ -823,7 +831,7 @@ def _harmonize_columns(self, parse_dates=None):
823831

824832
def _sqlalchemy_type(self, col):
825833
from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
826-
DateTime, Date, Time, Interval)
834+
DateTime, Date, Time)
827835

828836
if com.is_datetime64_dtype(col):
829837
try:
@@ -874,12 +882,12 @@ class PandasSQL(PandasObject):
874882
"""
875883

876884
def read_sql(self, *args, **kwargs):
877-
raise ValueError(
878-
"PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor")
885+
raise ValueError("PandasSQL must be created with an SQLAlchemy engine"
886+
" or connection+sql flavor")
879887

880888
def to_sql(self, *args, **kwargs):
881-
raise ValueError(
882-
"PandasSQL must be created with an SQLAlchemy engine or connection+sql flavor")
889+
raise ValueError("PandasSQL must be created with an SQLAlchemy engine"
890+
" or connection+sql flavor")
883891

884892

885893
class PandasSQLAlchemy(PandasSQL):
@@ -897,7 +905,7 @@ def __init__(self, engine, schema=None, meta=None):
897905
self.meta = meta
898906

899907
def run_transaction(self):
900-
return self.engine.begin()
908+
return self.engine.begin()
901909

902910
def execute(self, *args, **kwargs):
903911
"""Simple passthrough to SQLAlchemy engine"""
@@ -964,8 +972,8 @@ def drop_table(self, table_name, schema=None):
964972
self.meta.clear()
965973

966974
def _create_sql_schema(self, frame, table_name, keys=None):
967-
table = PandasSQLTable(table_name, self, frame=frame, index=False,
968-
keys=keys)
975+
table = PandasSQLTable(table_name, self, frame=frame, index=False,
976+
keys=keys)
969977
return str(table.sql_schema())
970978

971979

@@ -1025,9 +1033,11 @@ def _create_sql_schema(self, frame, table_name, keys=None):
10251033

10261034

10271035
class PandasSQLTableLegacy(PandasSQLTable):
1028-
"""Patch the PandasSQLTable for legacy support.
1029-
Instead of a table variable just use the Create Table
1030-
statement"""
1036+
"""
1037+
Patch the PandasSQLTable for legacy support.
1038+
Instead of a table variable just use the Create Table statement.
1039+
"""
1040+
10311041
def sql_schema(self):
10321042
return str(";\n".join(self.table))
10331043

@@ -1058,11 +1068,11 @@ def _execute_insert(self, conn, keys, data_iter):
10581068
conn.executemany(self.insert_statement(), data_list)
10591069

10601070
def _create_table_setup(self):
1061-
"""Return a list of SQL statement that create a table reflecting the
1071+
"""
1072+
Return a list of SQL statement that create a table reflecting the
10621073
structure of a DataFrame. The first entry will be a CREATE TABLE
10631074
statement while the rest will be CREATE INDEX statements
10641075
"""
1065-
10661076
column_names_and_types = \
10671077
self._get_column_names_and_types(self._sql_type_name)
10681078

@@ -1159,15 +1169,15 @@ def execute(self, *args, **kwargs):
11591169
else:
11601170
cur.execute(*args)
11611171
return cur
1162-
except Exception as e:
1172+
except Exception as exc:
11631173
try:
11641174
self.con.rollback()
11651175
except Exception: # pragma: no cover
1166-
ex = DatabaseError(
1167-
"Execution failed on sql: %s\n%s\nunable to rollback" % (args[0], e))
1176+
ex = DatabaseError("Execution failed on sql: %s\n%s\nunable"
1177+
" to rollback" % (args[0], exc))
11681178
raise_with_traceback(ex)
11691179

1170-
ex = DatabaseError("Execution failed on sql '%s': %s" % (args[0], e))
1180+
ex = DatabaseError("Execution failed on sql '%s': %s" % (args[0], exc))
11711181
raise_with_traceback(ex)
11721182

11731183
def read_sql(self, sql, index_col=None, coerce_float=True, params=None,
@@ -1213,11 +1223,11 @@ def to_sql(self, frame, name, if_exists='fail', index=True,
12131223
`index` is True, then the index names are used.
12141224
A sequence should be given if the DataFrame uses MultiIndex.
12151225
schema : string, default None
1216-
Ignored parameter included for compatability with SQLAlchemy version
1217-
of `to_sql`.
1226+
Ignored parameter included for compatability with SQLAlchemy
1227+
version of ``to_sql``.
12181228
chunksize : int, default None
1219-
If not None, then rows will be written in batches of this size at a
1220-
time. If None, all rows will be written at once.
1229+
If not None, then rows will be written in batches of this
1230+
size at a time. If None, all rows will be written at once.
12211231
12221232
"""
12231233
table = PandasSQLTableLegacy(
@@ -1243,8 +1253,8 @@ def drop_table(self, name, schema=None):
12431253
self.execute(drop_sql)
12441254

12451255
def _create_sql_schema(self, frame, table_name, keys=None):
1246-
table = PandasSQLTableLegacy(table_name, self, frame=frame, index=False,
1247-
keys=keys)
1256+
table = PandasSQLTableLegacy(table_name, self, frame=frame,
1257+
index=False, keys=keys)
12481258
return str(table.sql_schema())
12491259

12501260

0 commit comments

Comments
 (0)