Skip to content

Commit 24774a1

Browse files
committed
Merge branch 'to_csv_date_format' of https://github.com/qwhelan/pandas into qwhelan-to_csv_date_format
Conflicts: doc/source/v0.13.0.txt
2 parents b7de580 + ce669d6 commit 24774a1

File tree

9 files changed

+143
-31
lines changed

9 files changed

+143
-31
lines changed

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ New features
6060
- Clipboard functionality now works with PySide (:issue:`4282`)
6161
- New ``extract`` string method returns regex matches more conveniently (:issue:`4685`)
6262
- Auto-detect field widths in read_fwf when unspecified (:issue:`4488`)
63+
- ``to_csv()`` now outputs datetime objects according to a specified format string
64+
via the ``date_format`` keyword (:issue:`4313`)
65+
6366

6467
Experimental Features
6568
~~~~~~~~~~~~~~~~~~~~~

doc/source/v0.13.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,10 @@ Enhancements
534534
.. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation
535535
.. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
536536

537+
- ``to_csv`` now takes a ``date_format`` keyword argument that specifies how
538+
output datetime objects should be formatted. Datetimes encountered in the
539+
index, columns, and values will all have this formatting applied. (:issue:`4313`)
540+
537541
.. _whatsnew_0130.experimental:
538542

539543
Experimental

pandas/core/format.py

+44-15
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import itertools
1919
import csv
2020

21-
from pandas.tseries.period import PeriodIndex
21+
from pandas.tseries.period import PeriodIndex, DatetimeIndex
2222

2323
docstring_to_string = """
2424
Parameters
@@ -850,7 +850,7 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
850850
cols=None, header=True, index=True, index_label=None,
851851
mode='w', nanRep=None, encoding=None, quoting=None,
852852
line_terminator='\n', chunksize=None, engine=None,
853-
tupleize_cols=False, quotechar='"'):
853+
tupleize_cols=False, quotechar='"', date_format=None):
854854

855855
self.engine = engine # remove for 0.13
856856
self.obj = obj
@@ -877,6 +877,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
877877

878878
self.line_terminator = line_terminator
879879

880+
self.date_format = date_format
881+
880882
#GH3457
881883
if not self.obj.columns.is_unique and engine == 'python':
882884
msg= "columns.is_unique == False not supported with engine='python'"
@@ -893,7 +895,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
893895

894896
if cols is not None:
895897
if isinstance(cols,Index):
896-
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
898+
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format,
899+
date_format=date_format)
897900
else:
898901
cols=list(cols)
899902
self.obj = self.obj.loc[:,cols]
@@ -902,7 +905,8 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
902905
# and make sure sure cols is just a list of labels
903906
cols = self.obj.columns
904907
if isinstance(cols,Index):
905-
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format)
908+
cols = cols.to_native_types(na_rep=na_rep,float_format=float_format,
909+
date_format=date_format)
906910
else:
907911
cols=list(cols)
908912

@@ -923,6 +927,9 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
923927
if isinstance(obj.index, PeriodIndex):
924928
self.data_index = obj.index.to_timestamp()
925929

930+
if isinstance(self.data_index, DatetimeIndex) and date_format is not None:
931+
self.data_index = Index([x.strftime(date_format) if notnull(x) else '' for x in self.data_index])
932+
926933
self.nlevels = getattr(self.data_index, 'nlevels', 1)
927934
if not index:
928935
self.nlevels = 0
@@ -931,15 +938,10 @@ def __init__(self, obj, path_or_buf, sep=",", na_rep='', float_format=None,
931938
# invoked by df.to_csv(engine=python)
932939
def _helper_csv(self, writer, na_rep=None, cols=None,
933940
header=True, index=True,
934-
index_label=None, float_format=None):
941+
index_label=None, float_format=None, date_format=None):
935942
if cols is None:
936943
cols = self.columns
937944

938-
series = {}
939-
for k, v in compat.iteritems(self.obj._series):
940-
series[k] = v.values
941-
942-
943945
has_aliases = isinstance(header, (tuple, list, np.ndarray))
944946
if has_aliases or header:
945947
if index:
@@ -981,10 +983,34 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
981983
encoded_cols = list(cols)
982984
writer.writerow(encoded_cols)
983985

986+
if date_format is None:
987+
date_formatter = lambda x: lib.Timestamp(x)._repr_base
988+
else:
989+
def strftime_with_nulls(x):
990+
x = lib.Timestamp(x)
991+
if notnull(x):
992+
return x.strftime(date_format)
993+
994+
date_formatter = lambda x: strftime_with_nulls(x)
995+
984996
data_index = self.obj.index
997+
985998
if isinstance(self.obj.index, PeriodIndex):
986999
data_index = self.obj.index.to_timestamp()
9871000

1001+
if isinstance(data_index, DatetimeIndex) and date_format is not None:
1002+
data_index = Index([date_formatter(x) for x in data_index])
1003+
1004+
values = self.obj.copy()
1005+
values.index = data_index
1006+
values.columns = values.columns.to_native_types(na_rep=na_rep,float_format=float_format,
1007+
date_format=date_format)
1008+
values = values[cols]
1009+
1010+
series = {}
1011+
for k, v in compat.iteritems(values._series):
1012+
series[k] = v.values
1013+
9881014
nlevels = getattr(data_index, 'nlevels', 1)
9891015
for j, idx in enumerate(data_index):
9901016
row_fields = []
@@ -1000,8 +1026,8 @@ def _helper_csv(self, writer, na_rep=None, cols=None,
10001026

10011027
if float_format is not None and com.is_float(val):
10021028
val = float_format % val
1003-
elif isinstance(val, np.datetime64):
1004-
val = lib.Timestamp(val)._repr_base
1029+
elif isinstance(val, (np.datetime64, lib.Timestamp)):
1030+
val = date_formatter(val)
10051031

10061032
row_fields.append(val)
10071033

@@ -1031,7 +1057,7 @@ def save(self):
10311057
self._helper_csv(self.writer, na_rep=self.na_rep,
10321058
float_format=self.float_format, cols=self.cols,
10331059
header=self.header, index=self.index,
1034-
index_label=self.index_label)
1060+
index_label=self.index_label, date_format=self.date_format)
10351061

10361062
else:
10371063
self._save()
@@ -1150,13 +1176,16 @@ def _save_chunk(self, start_i, end_i):
11501176
slicer = slice(start_i,end_i)
11511177
for i in range(len(self.blocks)):
11521178
b = self.blocks[i]
1153-
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1179+
d = b.to_native_types(slicer=slicer, na_rep=self.na_rep,
1180+
float_format=self.float_format, date_format=self.date_format)
1181+
11541182
for i, item in enumerate(b.items):
11551183

11561184
# self.data is a preallocated list
11571185
self.data[self.column_map[b][i]] = d[i]
11581186

1159-
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep, float_format=self.float_format)
1187+
ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
1188+
float_format=self.float_format, date_format=self.date_format)
11601189

11611190
lib.write_csv_rows(self.data, ix, self.nlevels, self.cols, self.writer)
11621191

pandas/core/frame.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1030,7 +1030,7 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
10301030
cols=None, header=True, index=True, index_label=None,
10311031
mode='w', nanRep=None, encoding=None, quoting=None,
10321032
line_terminator='\n', chunksize=None,
1033-
tupleize_cols=False, **kwds):
1033+
tupleize_cols=False, date_format=None, **kwds):
10341034
r"""Write DataFrame to a comma-separated values (csv) file
10351035
10361036
Parameters
@@ -1073,6 +1073,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
10731073
tupleize_cols : boolean, default False
10741074
write multi_index columns as a list of tuples (if True)
10751075
or new (expanded format) if False)
1076+
date_format : string, default None
1077+
Format string for datetime objects.
10761078
"""
10771079
if nanRep is not None: # pragma: no cover
10781080
warnings.warn("nanRep is deprecated, use na_rep",
@@ -1088,7 +1090,8 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None,
10881090
index_label=index_label, mode=mode,
10891091
chunksize=chunksize, engine=kwds.get(
10901092
"engine"),
1091-
tupleize_cols=tupleize_cols)
1093+
tupleize_cols=tupleize_cols,
1094+
date_format=date_format)
10921095
formatter.save()
10931096

10941097
def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='',

pandas/core/internals.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
from pandas.tslib import Timestamp
2424
from pandas import compat
25-
from pandas.compat import range, lrange, lmap, callable, map, zip
25+
from pandas.compat import range, lrange, lmap, callable, map, zip, u
2626
from pandas.tseries.timedeltas import _coerce_scalar_to_timedelta_type
2727

2828
class Block(PandasObject):
@@ -1471,7 +1471,7 @@ def fillna(self, value, inplace=False, downcast=None):
14711471
return [self if inplace else make_block(values, self.items,
14721472
self.ref_items, fastpath=True)]
14731473

1474-
def to_native_types(self, slicer=None, na_rep=None, **kwargs):
1474+
def to_native_types(self, slicer=None, na_rep=None, date_format=None, **kwargs):
14751475
""" convert to our native types format, slicing if desired """
14761476

14771477
values = self.values
@@ -1484,8 +1484,14 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
14841484
na_rep = 'NaT'
14851485
rvalues[mask] = na_rep
14861486
imask = (-mask).ravel()
1487-
rvalues.flat[imask] = np.array(
1488-
[Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object)
1487+
1488+
if date_format is None:
1489+
date_formatter = lambda x: Timestamp(x)._repr_base
1490+
else:
1491+
date_formatter = lambda x: Timestamp(x).strftime(date_format)
1492+
1493+
rvalues.flat[imask] = np.array([date_formatter(val) for val in
1494+
values.ravel()[imask]], dtype=object)
14891495

14901496
return rvalues.tolist()
14911497

pandas/core/series.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -2145,7 +2145,8 @@ def from_csv(cls, path, sep=',', parse_dates=True, header=None,
21452145

21462146
def to_csv(self, path, index=True, sep=",", na_rep='',
21472147
float_format=None, header=False,
2148-
index_label=None, mode='w', nanRep=None, encoding=None):
2148+
index_label=None, mode='w', nanRep=None, encoding=None,
2149+
date_format=None):
21492150
"""
21502151
Write Series to a comma-separated values (csv) file
21512152
@@ -2170,13 +2171,15 @@ def to_csv(self, path, index=True, sep=",", na_rep='',
21702171
encoding : string, optional
21712172
a string representing the encoding to use if the contents are
21722173
non-ascii, for python versions prior to 3
2174+
date_format: string, default None
2175+
Format string for datetime objects.
21732176
"""
21742177
from pandas.core.frame import DataFrame
21752178
df = DataFrame(self)
21762179
df.to_csv(path, index=index, sep=sep, na_rep=na_rep,
21772180
float_format=float_format, header=header,
21782181
index_label=index_label, mode=mode, nanRep=nanRep,
2179-
encoding=encoding)
2182+
encoding=encoding, date_format=date_format)
21802183

21812184
def dropna(self):
21822185
"""

pandas/tests/test_frame.py

+47
Original file line numberDiff line numberDiff line change
@@ -11415,6 +11415,53 @@ def test_isin_with_string_scalar(self):
1141511415
with tm.assertRaises(TypeError):
1141611416
df.isin('aaa')
1141711417

11418+
def test_to_csv_date_format(self):
11419+
from pandas import to_datetime
11420+
pname = '__tmp_to_csv_date_format__'
11421+
with ensure_clean(pname) as path:
11422+
for engine in [None, 'python']:
11423+
dt_index = self.tsframe.index
11424+
datetime_frame = DataFrame({'A': dt_index, 'B': dt_index.shift(1)}, index=dt_index)
11425+
11426+
datetime_frame.to_csv(path, date_format='%Y%m%d', engine=engine)
11427+
# Check that the data was put in the specified format
11428+
test = read_csv(path, index_col=0)
11429+
11430+
datetime_frame_int = datetime_frame.applymap(lambda x: int(x.strftime('%Y%m%d')))
11431+
datetime_frame_int.index = datetime_frame_int.index.map(lambda x: int(x.strftime('%Y%m%d')))
11432+
11433+
assert_frame_equal(test, datetime_frame_int)
11434+
11435+
datetime_frame.to_csv(path, date_format='%Y-%m-%d', engine=engine)
11436+
# Check that the data was put in the specified format
11437+
test = read_csv(path, index_col=0)
11438+
datetime_frame_str = datetime_frame.applymap(lambda x: x.strftime('%Y-%m-%d'))
11439+
datetime_frame_str.index = datetime_frame_str.index.map(lambda x: x.strftime('%Y-%m-%d'))
11440+
11441+
assert_frame_equal(test, datetime_frame_str)
11442+
11443+
# Check that columns get converted
11444+
datetime_frame_columns = datetime_frame.T
11445+
11446+
datetime_frame_columns.to_csv(path, date_format='%Y%m%d', engine=engine)
11447+
11448+
test = read_csv(path, index_col=0)
11449+
11450+
datetime_frame_columns = datetime_frame_columns.applymap(lambda x: int(x.strftime('%Y%m%d')))
11451+
# Columns don't get converted to ints by read_csv
11452+
datetime_frame_columns.columns = datetime_frame_columns.columns.map(lambda x: x.strftime('%Y%m%d'))
11453+
11454+
assert_frame_equal(test, datetime_frame_columns)
11455+
11456+
# test NaTs
11457+
nat_index = to_datetime(['NaT'] * 10 + ['2000-01-01', '1/1/2000', '1-1-2000'])
11458+
nat_frame = DataFrame({'A': nat_index}, index=nat_index)
11459+
11460+
nat_frame.to_csv(path, date_format='%Y-%m-%d', engine=engine)
11461+
11462+
test = read_csv(path, parse_dates=[0, 1], index_col=0)
11463+
11464+
assert_frame_equal(test, nat_frame)
1141811465

1141911466
def skip_if_no_ne(engine='numexpr'):
1142011467
if engine == 'numexpr':

pandas/tseries/index.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import numpy as np
88

99
from pandas.core.common import (isnull, _NS_DTYPE, _INT64_DTYPE,
10-
is_list_like,_values_from_object, _maybe_box)
10+
is_list_like,_values_from_object, _maybe_box,
11+
notnull)
1112
from pandas.core.index import Index, Int64Index, _Identity
1213
import pandas.compat as compat
1314
from pandas.compat import u
@@ -599,23 +600,29 @@ def __contains__(self, key):
599600
def _format_with_header(self, header, **kwargs):
600601
return header + self._format_native_types(**kwargs)
601602

602-
def _format_native_types(self, na_rep=u('NaT'), **kwargs):
603+
def _format_native_types(self, na_rep=u('NaT'), date_format=None, **kwargs):
603604
data = list(self)
604605

605606
# tz formatter or time formatter
606607
zero_time = time(0, 0)
607-
for d in data:
608-
if d.time() != zero_time or d.tzinfo is not None:
609-
return [u('%s') % x for x in data]
608+
if date_format is None:
609+
for d in data:
610+
if d.time() != zero_time or d.tzinfo is not None:
611+
return [u('%s') % x for x in data]
610612

611613
values = np.array(data, dtype=object)
612614
mask = isnull(self.values)
613615
values[mask] = na_rep
614616

615617
imask = -mask
616-
values[imask] = np.array([u('%d-%.2d-%.2d') % (dt.year, dt.month,
617-
dt.day)
618-
for dt in values[imask]])
618+
619+
if date_format is None:
620+
date_formatter = lambda x: u('%d-%.2d-%.2d' % (x.year, x.month, x.day))
621+
else:
622+
date_formatter = lambda x: u(x.strftime(date_format))
623+
624+
values[imask] = np.array([date_formatter(dt) for dt in values[imask]])
625+
619626
return values.tolist()
620627

621628
def isin(self, values):

vb_suite/io_bench.py

+10
Original file line numberDiff line numberDiff line change
@@ -88,3 +88,13 @@ def create_cols(name):
8888
" parse_dates=['foo'])")
8989
read_parse_dates_iso8601 = Benchmark(stmt, setup,
9090
start_date=datetime(2012, 3, 1))
91+
92+
setup = common_setup + """
93+
rng = date_range('1/1/2000', periods=1000)
94+
data = DataFrame(rng, index=rng)
95+
"""
96+
97+
stmt = ("data.to_csv('__test__.csv', date_format='%Y%m%d')")
98+
99+
frame_to_csv_date_formatting = Benchmark(stmt, setup,
100+
start_date=datetime(2013, 9, 1))

0 commit comments

Comments
 (0)