Skip to content

Commit 019a932

Browse files
ENH: SQL support for writing NaN + datetime64 values (GH2754, GH7103)
Cast values to object dtype converts to native python types. For datetime64 columns these are converted to datetime.datetime which also fixes the datetime64 issue (supercedes PR GH8205). NaN issue is solved by converting all NaN values to None.
1 parent 41cc8cc commit 019a932

File tree

3 files changed

+16
-37
lines changed

3 files changed

+16
-37
lines changed

doc/source/v0.15.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,9 @@ Enhancements
494494
df.to_sql('table', engine, schema='other_schema')
495495
pd.read_sql_table('table', engine, schema='other_schema')
496496

497+
- Added support for writing ``NaN`` values with ``to_sql`` (:issue:`2754`).
498+
- Added support for writing datetime64 columns with ``to_sql`` for all database flavors (:issue:`7103`).
499+
497500
- Added support for bool, uint8, uint16 and uint32 datatypes in ``to_stata`` (:issue:`7097`, :issue:`7365`)
498501

499502
- Added ``layout`` keyword to ``DataFrame.plot`` (:issue:`6667`)

pandas/io/sql.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import pandas.core.common as com
1616
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
1717
from pandas.core.api import DataFrame, Series
18+
from pandas.core.common import notnull
1819
from pandas.core.base import PandasObject
1920
from pandas.tseries.tools import to_datetime
2021

@@ -615,7 +616,9 @@ def insert_data(self):
615616
"duplicate name in index/columns: {0}".format(err))
616617
else:
617618
temp = self.frame
618-
619+
620+
temp = temp.astype(object)
621+
temp = temp.where(notnull(temp), None)
619622
return temp
620623

621624
def insert(self, chunksize=None):
@@ -758,12 +761,12 @@ def _harmonize_columns(self, parse_dates=None):
758761

759762
elif col_type is float:
760763
# floats support NA, can always convert!
761-
self.frame[col_name].astype(col_type, copy=False)
764+
self.frame[col_name] = df_col.astype(col_type, copy=False)
762765

763766
elif len(df_col) == df_col.count():
764767
# No NA values, can convert ints and bools
765-
if col_type is int or col_type is bool:
766-
self.frame[col_name].astype(col_type, copy=False)
768+
if col_type is np.dtype('int64') or col_type is bool:
769+
self.frame[col_name] = df_col.astype(col_type, copy=False)
767770

768771
# Handle date parsing
769772
if col_name in parse_dates:
@@ -813,7 +816,7 @@ def _numpy_type(self, sqltype):
813816
return float
814817
if isinstance(sqltype, Integer):
815818
# TODO: Refine integer size.
816-
return int
819+
return np.dtype('int64')
817820
if isinstance(sqltype, DateTime):
818821
# Caution: np.datetime64 is also a subclass of np.number.
819822
return datetime

pandas/io/tests/test_sql.py

+5-32
Original file line numberDiff line numberDiff line change
@@ -952,9 +952,6 @@ def test_date_parsing(self):
952952
"IntDateCol loaded with incorrect type")
953953

954954
def test_datetime(self):
955-
if self.driver == 'pymysql':
956-
raise nose.SkipTest('writing datetime not working with pymysql')
957-
958955
df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3),
959956
'B': np.arange(3.0)})
960957
df.to_sql('test_datetime', self.conn)
@@ -975,17 +972,6 @@ def test_datetime(self):
975972
tm.assert_frame_equal(result, df)
976973

977974
def test_datetime_NaT(self):
978-
# status:
979-
# - postgresql: gives error on inserting "0001-255-255T00:00:00"
980-
# - sqlite3: works, but reading it with query returns '-001--1--1 -1:-1:-1.-00001'
981-
982-
if self.driver == 'pymysql':
983-
raise nose.SkipTest('writing datetime not working with pymysql')
984-
if self.driver == 'psycopg2':
985-
raise nose.SkipTest('writing datetime NaT not working with psycopg2')
986-
if self.flavor == 'sqlite':
987-
raise nose.SkipTest('reading datetime NaT not working with sqlite')
988-
989975
df = DataFrame({'A': date_range('2013-01-01 09:00:00', periods=3),
990976
'B': np.arange(3.0)})
991977
df.loc[1, 'A'] = np.nan
@@ -1032,9 +1018,6 @@ def test_mixed_dtype_insert(self):
10321018
tm.assert_frame_equal(df, df2, check_dtype=False, check_exact=True)
10331019

10341020
def test_nan_numeric(self):
1035-
if self.driver == 'pymysql':
1036-
raise nose.SkipTest('writing NaNs not working with pymysql')
1037-
10381021
# NaNs in numeric float column
10391022
df = DataFrame({'A':[0, 1, 2], 'B':[0.2, np.nan, 5.6]})
10401023
df.to_sql('test_nan', self.conn, index=False)
@@ -1048,37 +1031,27 @@ def test_nan_numeric(self):
10481031
tm.assert_frame_equal(result, df)
10491032

10501033
def test_nan_fullcolumn(self):
1051-
if self.driver == 'pymysql':
1052-
raise nose.SkipTest('writing NaNs not working with pymysql')
1053-
10541034
# full NaN column (numeric float column)
10551035
df = DataFrame({'A':[0, 1, 2], 'B':[np.nan, np.nan, np.nan]})
10561036
df.to_sql('test_nan', self.conn, index=False)
10571037

1058-
if self.flavor == 'sqlite':
1059-
df['B'] = df['B'].astype('object')
1060-
df['B'] = None
1061-
10621038
# with read_table
10631039
result = sql.read_sql_table('test_nan', self.conn)
10641040
tm.assert_frame_equal(result, df)
10651041

1066-
# with read_sql
1042+
# with read_sql -> not type info from table -> stays None
1043+
df['B'] = df['B'].astype('object')
1044+
df['B'] = None
10671045
result = sql.read_sql_query('SELECT * FROM test_nan', self.conn)
10681046
tm.assert_frame_equal(result, df)
10691047

10701048
def test_nan_string(self):
1071-
if self.driver == 'pymysql':
1072-
raise nose.SkipTest('writing NaNs not working with pymysql')
1073-
10741049
# NaNs in string column
10751050
df = DataFrame({'A':[0, 1, 2], 'B':['a', 'b', np.nan]})
10761051
df.to_sql('test_nan', self.conn, index=False)
10771052

1078-
if self.flavor == 'sqlite':
1079-
df.loc[2, 'B'] = None
1080-
elif self.flavor == 'postgresql':
1081-
df = df.fillna('NaN')
1053+
# NaNs are coming back as None
1054+
df.loc[2, 'B'] = None
10821055

10831056
# with read_table
10841057
result = sql.read_sql_table('test_nan', self.conn)

0 commit comments

Comments
 (0)