Skip to content

Commit 85bf8b3

Browse files
committed
addressing comments
1 parent 4a4f309 commit 85bf8b3

File tree

3 files changed

+22
-19
lines changed

3 files changed

+22
-19
lines changed

pandas/core/reshape/reshape.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -825,47 +825,47 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
825825
# determine columns being encoded
826826

827827
if columns is None:
828-
columns_to_encode = data.select_dtypes(
828+
data_to_encode = data.select_dtypes(
829829
include=['object', 'category'])
830830
else:
831-
columns_to_encode = data[columns]
831+
data_to_encode = data[columns]
832832

833833
# validate prefixes and separator to avoid silently dropping cols
834834
def check_len(item, name):
835835
len_msg = ("Length of '{name}' ({len_item}) did not match the "
836836
"length of the columns being encoded ({len_enc}).")
837837

838838
if is_list_like(item):
839-
if not len(item) == columns_to_encode.shape[1]:
839+
if not len(item) == data_to_encode.shape[1]:
840840
len_msg = \
841841
len_msg.format(name=name, len_item=len(item),
842-
len_enc=columns_to_encode.shape[1])
842+
len_enc=data_to_encode.shape[1])
843843
raise ValueError(len_msg)
844844

845845
check_len(prefix, 'prefix')
846846
check_len(prefix_sep, 'prefix_sep')
847847
if isinstance(prefix, compat.string_types):
848848
prefix = cycle([prefix])
849849
if isinstance(prefix, dict):
850-
prefix = [prefix[col] for col in columns_to_encode.columns]
850+
prefix = [prefix[col] for col in data_to_encode.columns]
851851

852852
if prefix is None:
853-
prefix = columns_to_encode.columns
853+
prefix = data_to_encode.columns
854854

855855
# validate separators
856856
if isinstance(prefix_sep, compat.string_types):
857857
prefix_sep = cycle([prefix_sep])
858858
elif isinstance(prefix_sep, dict):
859-
prefix_sep = [prefix_sep[col] for col in columns_to_encode.columns]
859+
prefix_sep = [prefix_sep[col] for col in data_to_encode.columns]
860860

861-
if columns_to_encode.shape == data.shape:
861+
if data_to_encode.shape == data.shape:
862862
with_dummies = []
863863
elif columns is not None:
864864
with_dummies = [data.drop(columns, axis=1)]
865865
else:
866866
with_dummies = [data.select_dtypes(exclude=['object', 'category'])]
867867

868-
for (col, pre, sep) in zip(columns_to_encode.iteritems(), prefix,
868+
for (col, pre, sep) in zip(data_to_encode.iteritems(), prefix,
869869
prefix_sep):
870870

871871
dummy = _get_dummies_1d(col[1], prefix=pre, prefix_sep=sep,

pandas/tests/frame/test_dtypes.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -288,19 +288,21 @@ def test_select_dtypes_include_exclude_mixed_scalars_lists(self):
288288
assert_frame_equal(ri, ei)
289289

290290
def test_select_dtypes_duplicate_columns(self):
291-
df = DataFrame({'a': list('abc'),
292-
'b': list(range(1, 4)),
293-
'c': np.arange(3, 6).astype('u1'),
294-
'd': np.arange(4.0, 7.0, dtype='float64'),
295-
'e': [True, False, True],
296-
'f': pd.date_range('now', periods=3).values})
291+
# GH20839
292+
odict = compat.OrderedDict
293+
df = DataFrame(odict([('a', list('abc')),
294+
('b', list(range(1, 4))),
295+
('c', np.arange(3, 6).astype('u1')),
296+
('d', np.arange(4.0, 7.0, dtype='float64')),
297+
('e', [True, False, True]),
298+
('f', pd.date_range('now', periods=3).values)]))
297299
df.columns = ['a', 'a', 'b', 'b', 'b', 'c']
298300

299-
e = DataFrame({'a': list(range(1, 4)),
300-
'b': np.arange(3, 6).astype('u1')})
301+
expected = DataFrame({'a': list(range(1, 4)),
302+
'b': np.arange(3, 6).astype('u1')})
301303

302-
r = df.select_dtypes(include=[np.number], exclude=['floating'])
303-
assert_frame_equal(r, e)
304+
result = df.select_dtypes(include=[np.number], exclude=['floating'])
305+
assert_frame_equal(result, expected)
304306

305307
def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
306308
df = DataFrame({'a': list('abc'),

pandas/tests/reshape/test_reshape.py

+1
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ def test_get_dummies_dont_sparsify_all_columns(self, sparse):
466466
tm.assert_frame_equal(df[['GDP']], df2)
467467

468468
def test_get_dummies_duplicate_columns(self, df):
469+
# GH20839
469470
df.columns = ["A", "A", "A"]
470471
result = get_dummies(df).sort_index(axis=1)
471472

0 commit comments

Comments
 (0)