Skip to content

Commit c0c7402

Browse files
committed
ENH: change default header names in read_* functions from X.1, X.2, ... to X0, X1, ... close #2000
1 parent de6cce5 commit c0c7402

File tree

4 files changed

+44
-39
lines changed

4 files changed

+44
-39
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ pandas 0.9.0
6363

6464
**API Changes**
6565

66+
- Change default header names in read_* functions to more Pythonic X0, X1,
67+
etc. instead of X.1, X.2. (#2000)
6668
- Deprecated ``day_of_year`` API removed from PeriodIndex, use ``dayofyear``
6769
(#1723)
6870
- Don't modify NumPy suppress printoption at import time

pandas/io/parsers.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ def _infer_columns(self):
613613

614614
ncols = len(line)
615615
if not names:
616-
columns = ['X.%d' % (i + 1) for i in range(ncols)]
616+
columns = ['X%d' % i for i in range(ncols)]
617617
else:
618618
columns = names
619619

@@ -747,7 +747,7 @@ def _explicit_index_names(self, columns):
747747
else:
748748
index_name = columns[self.index_col]
749749

750-
if index_name is not None and 'Unnamed' in index_name:
750+
if index_name is not None and 'Unnamed' in str(index_name):
751751
index_name = None
752752

753753
elif self.index_col is not None:
@@ -1160,19 +1160,9 @@ def _convert_types(values, na_values):
11601160

11611161
return result, na_count
11621162

1163-
def _get_col_names(colspec, columns):
1164-
colset = set(columns)
1165-
colnames = []
1166-
for c in colspec:
1167-
if c in colset:
1168-
colnames.append(str(c))
1169-
elif isinstance(c, int):
1170-
colnames.append(str(columns[c]))
1171-
return colnames
1172-
11731163
def _try_convert_dates(parser, colspec, data_dict, columns):
11741164
colspec = _get_col_names(colspec, columns)
1175-
new_name = '_'.join(colspec)
1165+
new_name = '_'.join([str(x) for x in colspec])
11761166

11771167
to_parse = [data_dict[c] for c in colspec if c in data_dict]
11781168
try:
@@ -1181,6 +1171,17 @@ def _try_convert_dates(parser, colspec, data_dict, columns):
11811171
new_col = parser(_concat_date_cols(to_parse))
11821172
return new_name, new_col, colspec
11831173

1174+
def _get_col_names(colspec, columns):
1175+
colset = set(columns)
1176+
colnames = []
1177+
for c in colspec:
1178+
if c in colset:
1179+
colnames.append(c)
1180+
elif isinstance(c, int):
1181+
colnames.append(columns[c])
1182+
return colnames
1183+
1184+
11841185
def _concat_date_cols(date_cols):
11851186
if len(date_cols) == 1:
11861187
return np.array([str(x) for x in date_cols[0]], dtype=object)

pandas/io/tests/test_parsers.py

+27-25
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,10 @@ def func(*date_cols):
215215
'actual' : [1,3]})
216216
self.assert_('nominal' in df)
217217
self.assert_('actual' in df)
218-
self.assert_('X.2' not in df)
219-
self.assert_('X.3' not in df)
220-
self.assert_('X.4' not in df)
221-
from datetime import datetime
218+
self.assert_('X1' not in df)
219+
self.assert_('X2' not in df)
220+
self.assert_('X3' not in df)
221+
222222
d = datetime(1999, 1, 27, 19, 0)
223223
self.assert_(df.ix[0, 'nominal'] == d)
224224

@@ -229,9 +229,10 @@ def func(*date_cols):
229229
keep_date_col=True)
230230
self.assert_('nominal' in df)
231231
self.assert_('actual' in df)
232-
self.assert_('X.2' in df)
233-
self.assert_('X.3' in df)
234-
self.assert_('X.4' in df)
232+
233+
self.assert_('X1' in df)
234+
self.assert_('X2' in df)
235+
self.assert_('X3' in df)
235236

236237
data = """\
237238
KORD,19990127, 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
@@ -243,22 +244,24 @@ def func(*date_cols):
243244
"""
244245
df = read_csv(StringIO(data), header=None,
245246
parse_dates=[[1, 2], [1,3]])
246-
self.assert_('X.2_X.3' in df)
247-
self.assert_('X.2_X.4' in df)
248-
self.assert_('X.2' not in df)
249-
self.assert_('X.3' not in df)
250-
self.assert_('X.4' not in df)
251-
from datetime import datetime
247+
248+
self.assert_('X1_X2' in df)
249+
self.assert_('X1_X3' in df)
250+
self.assert_('X1' not in df)
251+
self.assert_('X2' not in df)
252+
self.assert_('X3' not in df)
253+
252254
d = datetime(1999, 1, 27, 19, 0)
253-
self.assert_(df.ix[0, 'X.2_X.3'] == d)
255+
self.assert_(df.ix[0, 'X1_X2'] == d)
254256

255257
df = read_csv(StringIO(data), header=None,
256258
parse_dates=[[1, 2], [1,3]], keep_date_col=True)
257-
self.assert_('X.2_X.3' in df)
258-
self.assert_('X.2_X.4' in df)
259-
self.assert_('X.2' in df)
260-
self.assert_('X.3' in df)
261-
self.assert_('X.4' in df)
259+
260+
self.assert_('X1_X2' in df)
261+
self.assert_('X1_X3' in df)
262+
self.assert_('X1' in df)
263+
self.assert_('X2' in df)
264+
self.assert_('X3' in df)
262265

263266
data = '''\
264267
KORD,19990127 19:00:00, 18:56:00, 0.8100, 2.8100, 7.2000, 0.0000, 280.0000
@@ -269,7 +272,6 @@ def func(*date_cols):
269272
'''
270273
df = read_csv(StringIO(data), sep=',', header=None,
271274
parse_dates=[1], index_col=1)
272-
from datetime import datetime
273275
d = datetime(1999, 1, 27, 19, 0)
274276
self.assert_(df.index[0] == d)
275277

@@ -561,7 +563,7 @@ def test_skiprows_bug(self):
561563
index_col=0, parse_dates=True)
562564

563565
expected = DataFrame(np.arange(1., 10.).reshape((3,3)),
564-
columns=['X.2', 'X.3', 'X.4'],
566+
columns=['X1', 'X2', 'X3'],
565567
index=[datetime(2000, 1, 1), datetime(2000, 1, 2),
566568
datetime(2000, 1, 3)])
567569
assert_frame_equal(data, expected)
@@ -706,7 +708,7 @@ def test_no_header(self):
706708
assert_almost_equal(df.values, expected)
707709
assert_almost_equal(df.values, df2.values)
708710
self.assert_(np.array_equal(df.columns,
709-
['X.1', 'X.2', 'X.3', 'X.4', 'X.5']))
711+
['X0', 'X1', 'X2', 'X3', 'X4']))
710712
self.assert_(np.array_equal(df2.columns, names))
711713

712714
def test_header_with_index_col(self):
@@ -848,7 +850,7 @@ def test_parse_cols_list(self):
848850
def test_read_table_unicode(self):
849851
fin = StringIO('\u0141aski, Jan;1')
850852
df1 = read_table(fin, sep=";", encoding="utf-8", header=None)
851-
self.assert_(isinstance(df1['X.1'].values[0], unicode))
853+
self.assert_(isinstance(df1['X0'].values[0], unicode))
852854

853855
def test_read_table_wrong_num_columns(self):
854856
data = """A,B,C,D,E,F
@@ -1286,8 +1288,8 @@ def test_read_csv_parse_simple_list(self):
12861288
foo
12871289
bar"""
12881290
df = read_csv(StringIO(text), header=None)
1289-
expected = DataFrame({'X.1' : ['foo', 'bar baz', 'qux foo',
1290-
'foo', 'bar']})
1291+
expected = DataFrame({'X0' : ['foo', 'bar baz', 'qux foo',
1292+
'foo', 'bar']})
12911293
assert_frame_equal(df, expected)
12921294

12931295
def test_parse_dates_custom_euroformat(self):

pandas/tests/test_format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def test_string_repr_encoding(self):
237237
filepath = os.path.join(pth, 'data', 'unicode_series.csv')
238238
df = pandas.read_csv(filepath, header=None)
239239
repr(df)
240-
repr(df['X.2'])
240+
repr(df['X1'])
241241

242242
def test_repr_corner(self):
243243
# representing infs poses no problems

0 commit comments

Comments
 (0)