Skip to content

Commit 6a0452b

Browse files
committed
BUG: print DataFrame columns in the right order, also convert NAs in string columns, GH #325
1 parent fc7315d commit 6a0452b

File tree

5 files changed

+47
-12
lines changed

5 files changed

+47
-12
lines changed

Diff for: pandas/core/frame.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -2890,7 +2890,11 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
28902890
self.formatters = formatters
28912891
self.na_rep = na_rep
28922892
self.col_space = col_space
2893-
self.column_filter = frame.columns if columns is None else set(columns)
2893+
2894+
if columns is not None:
2895+
self.columns = _ensure_index(columns)
2896+
else:
2897+
self.columns = frame.columns
28942898

28952899
self._write_to_buffer()
28962900

@@ -2909,8 +2913,7 @@ def _write_to_buffer(self):
29092913
str_columns = self._get_formatted_column_labels()
29102914

29112915
stringified = [str_columns[i] + format_col(c)
2912-
for i, c in enumerate(frame.columns)
2913-
if c in self.column_filter]
2916+
for i, c in enumerate(self.columns)]
29142917

29152918
to_write.append(adjoin(1, str_index, *stringified))
29162919

@@ -2946,18 +2949,16 @@ def _format_col(col):
29462949
def _get_formatted_column_labels(self):
29472950
from pandas.core.index import _sparsify
29482951

2949-
columns = self.frame.columns
2950-
2951-
if isinstance(columns, MultiIndex):
2952-
fmt_columns = columns.format(sparsify=False, adjoin=False)
2952+
if isinstance(self.columns, MultiIndex):
2953+
fmt_columns = self.columns.format(sparsify=False, adjoin=False)
29532954
str_columns = zip(*[[' %s' % y for y in x]
29542955
for x in zip(*fmt_columns)])
29552956
if self.sparsify:
29562957
str_columns = _sparsify(str_columns)
29572958

29582959
str_columns = [list(x) for x in zip(*str_columns)]
29592960
else:
2960-
str_columns = [[' %s' % x] for x in columns.format()]
2961+
str_columns = [[' %s' % x] for x in self.columns.format()]
29612962

29622963
if self.show_index_names and self.has_index_names:
29632964
for x in str_columns:

Diff for: pandas/io/parsers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def _convert_types(values, na_values):
424424
try:
425425
values = lib.maybe_convert_numeric(values, na_values)
426426
except Exception:
427-
lib.sanitize_objects(values)
427+
lib.sanitize_objects(values, na_values)
428428

429429
if values.dtype == np.object_:
430430
return lib.maybe_convert_bool(values)

Diff for: pandas/io/tests/test_parsers.py

+13
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,19 @@ def test_custom_na_values(self):
5151
skiprows=[1])
5252
assert_almost_equal(df2.values, expected)
5353

54+
def test_detect_string_na(self):
55+
data = """A,B
56+
foo,bar
57+
NA,baz
58+
NaN,nan
59+
"""
60+
expected = [['foo', 'bar'],
61+
[nan, 'baz'],
62+
[nan, nan]]
63+
64+
df = read_csv(StringIO(data))
65+
assert_almost_equal(df.values, expected)
66+
5467
def test_unnamed_columns(self):
5568
data = """A,B,C,,
5669
1,2,3,4,5

Diff for: pandas/src/parsing.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def try_parse_dates(ndarray[object] values, parser=None):
190190

191191
return result
192192

193-
def sanitize_objects(ndarray[object] values):
193+
def sanitize_objects(ndarray[object] values, set na_values):
194194
cdef:
195195
Py_ssize_t i, n
196196
object val, onan
@@ -200,7 +200,7 @@ def sanitize_objects(ndarray[object] values):
200200

201201
for i from 0 <= i < n:
202202
val = values[i]
203-
if val == '':
203+
if val == '' or val in na_values:
204204
values[i] = onan
205205

206206
def maybe_convert_bool(ndarray[object] arr):

Diff for: pandas/tests/test_frame.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -1324,6 +1324,9 @@ def test_repr_corner(self):
13241324
foo = repr(df)
13251325

13261326
def test_to_string(self):
1327+
from pandas import read_table
1328+
import re
1329+
13271330
# big mixed
13281331
biggie = DataFrame({'A' : randn(1000),
13291332
'B' : tm.makeStringIndex(1000)},
@@ -1340,7 +1343,25 @@ def test_to_string(self):
13401343

13411344
self.assert_(isinstance(s, basestring))
13421345

1343-
biggie.to_string(columns=['B', 'A'], colSpace=17)
1346+
# print in right order
1347+
result = biggie.to_string(columns=['B', 'A'], colSpace=17,
1348+
float_format='%.6f'.__mod__)
1349+
lines = result.split('\n')
1350+
header = lines[0].strip().split()
1351+
joined = '\n'.join([re.sub('\s+', ' ', x).strip() for x in lines[1:]])
1352+
recons = read_table(StringIO(joined), names=header, sep=' ')
1353+
assert_series_equal(recons['B'], biggie['B'])
1354+
assert_series_equal(np.round(recons['A'], 2),
1355+
np.round(biggie['A'], 2))
1356+
1357+
# expected = ['B', 'A']
1358+
# self.assertEqual(header, expected)
1359+
1360+
result = biggie.to_string(columns=['A'], colSpace=17)
1361+
header = result.split('\n')[0].strip().split()
1362+
expected = ['A']
1363+
self.assertEqual(header, expected)
1364+
13441365
biggie.to_string(columns=['B', 'A'],
13451366
formatters={'A' : lambda x: '%.1f' % x})
13461367

0 commit comments

Comments
 (0)