Skip to content

Commit 6c36769

Browse files
Merge pull request #6902 from danielballan/remove-safe-col-names
API: Stop modifying SQL column and names, and warn when pertinent.
2 parents d206f83 + 09f377e commit 6c36769

File tree

2 files changed

+32
-16
lines changed

2 files changed

+32
-16
lines changed

pandas/io/sql.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import warnings
99
import itertools
10+
import re
1011
import numpy as np
1112

1213
import pandas.core.common as com
@@ -38,11 +39,6 @@ def _convert_params(sql, params):
3839
return args
3940

4041

41-
def _safe_col_name(col_name):
42-
#TODO: probably want to forbid database reserved names, such as "database"
43-
return col_name.strip().replace(' ', '_')
44-
45-
4642
def _handle_date_column(col, format=None):
4743
if isinstance(format, dict):
4844
return to_datetime(col, **format)
@@ -587,11 +583,11 @@ def _index_name(self, index, index_label):
587583
def _create_table_statement(self):
588584
from sqlalchemy import Table, Column
589585

590-
safe_columns = map(_safe_col_name, self.frame.dtypes.index)
586+
columns = list(map(str, self.frame.columns))
591587
column_types = map(self._sqlalchemy_type, self.frame.dtypes)
592588

593589
columns = [Column(name, typ)
594-
for name, typ in zip(safe_columns, column_types)]
590+
for name, typ in zip(columns, column_types)]
595591

596592
if self.index is not None:
597593
for i, idx_label in enumerate(self.index[::-1]):
@@ -836,6 +832,11 @@ def _create_sql_schema(self, frame, table_name):
836832
}
837833

838834

835+
_SAFE_NAMES_WARNING = ("The spaces in these column names will not be changed."
836+
"In pandas versions < 0.14, spaces were converted to "
837+
"underscores.")
838+
839+
839840
class PandasSQLTableLegacy(PandasSQLTable):
840841
"""Patch the PandasSQLTable for legacy support.
841842
Instead of a table variable just use the Create Table
@@ -847,19 +848,18 @@ def create(self):
847848
self.pd_sql.execute(self.table)
848849

849850
def insert_statement(self):
850-
# Replace spaces in DataFrame column names with _.
851-
safe_names = [_safe_col_name(n) for n in self.frame.dtypes.index]
851+
names = list(map(str, self.frame.columns))
852852
flv = self.pd_sql.flavor
853853
br_l = _SQL_SYMB[flv]['br_l'] # left val quote char
854854
br_r = _SQL_SYMB[flv]['br_r'] # right val quote char
855855
wld = _SQL_SYMB[flv]['wld'] # wildcard char
856856

857857
if self.index is not None:
858-
[safe_names.insert(0, idx) for idx in self.index[::-1]]
858+
[names.insert(0, idx) for idx in self.index[::-1]]
859859

860-
bracketed_names = [br_l + column + br_r for column in safe_names]
860+
bracketed_names = [br_l + column + br_r for column in names]
861861
col_names = ','.join(bracketed_names)
862-
wildcards = ','.join([wld] * len(safe_names))
862+
wildcards = ','.join([wld] * len(names))
863863
insert_statement = 'INSERT INTO %s (%s) VALUES (%s)' % (
864864
self.name, col_names, wildcards)
865865
return insert_statement
@@ -881,13 +881,15 @@ def insert(self):
881881
def _create_table_statement(self):
882882
"Return a CREATE TABLE statement to suit the contents of a DataFrame."
883883

884-
# Replace spaces in DataFrame column names with _.
885-
safe_columns = [_safe_col_name(n) for n in self.frame.dtypes.index]
884+
columns = list(map(str, self.frame.columns))
885+
pat = re.compile('\s+')
886+
if any(map(pat.search, columns)):
887+
warnings.warn(_SAFE_NAMES_WARNING)
886888
column_types = [self._sql_type_name(typ) for typ in self.frame.dtypes]
887889

888890
if self.index is not None:
889891
for i, idx_label in enumerate(self.index[::-1]):
890-
safe_columns.insert(0, idx_label)
892+
columns.insert(0, idx_label)
891893
column_types.insert(0, self._sql_type_name(self.frame.index.get_level_values(i).dtype))
892894

893895
flv = self.pd_sql.flavor
@@ -898,7 +900,7 @@ def _create_table_statement(self):
898900
col_template = br_l + '%s' + br_r + ' %s'
899901

900902
columns = ',\n '.join(col_template %
901-
x for x in zip(safe_columns, column_types))
903+
x for x in zip(columns, column_types))
902904
template = """CREATE TABLE %(name)s (
903905
%(columns)s
904906
)"""

pandas/io/tests/test_sql.py

+14
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,11 @@ def test_to_sql_index_label_multiindex(self):
550550
'test_index_label', self.conn, if_exists='replace',
551551
index_label='C')
552552

553+
def test_integer_col_names(self):
554+
df = DataFrame([[1, 2], [3, 4]], columns=[0, 1])
555+
sql.to_sql(df, "test_frame_integer_col_names", self.conn,
556+
if_exists='replace')
557+
553558

554559
class TestSQLApi(_TestSQLApi):
555560
"""
@@ -661,10 +666,19 @@ def test_read_sql_delegate(self):
661666
self.assertRaises(ValueError, sql.read_sql, 'iris', self.conn,
662667
flavor=self.flavor)
663668

669+
def test_safe_names_warning(self):
670+
# GH 6798
671+
df = DataFrame([[1, 2], [3, 4]], columns=['a', 'b ']) # has a space
672+
# warns on create table with spaces in names
673+
with tm.assert_produces_warning():
674+
sql.to_sql(df, "test_frame3_legacy", self.conn,
675+
flavor="sqlite", index=False)
676+
664677

665678
#------------------------------------------------------------------------------
666679
#--- Database flavor specific tests
667680

681+
668682
class _TestSQLAlchemy(PandasSQLTest):
669683
"""
670684
Base class for testing the sqlalchemy backend.

0 commit comments

Comments
 (0)