Skip to content

Commit e96df8e

Browse files
committed
Merge pull request #5341 from jtratner/tweak-sanitize-column
BUG/CLN: Cleanup sanitize column and abstract broadcasting
2 parents be7c4c0 + bd088ec commit e96df8e

File tree

2 files changed

+47
-56
lines changed

2 files changed

+47
-56
lines changed

pandas/core/frame.py

+36-56
Original file line numberDiff line numberDiff line change
@@ -1915,71 +1915,51 @@ def _sanitize_column(self, key, value):
19151915
# Need to make sure new columns (which go into the BlockManager as new
19161916
# blocks) are always copied
19171917

1918-
# dont' need further processing on an equal index
1919-
if isinstance(value, Index) and (not len(self.index) or value.equals(self.index)):
1920-
value = value.values.copy()
1921-
elif isinstance(value, Series) or _is_sequence(value):
1918+
if isinstance(value, (Series, DataFrame)):
19221919
is_frame = isinstance(value, DataFrame)
1923-
if isinstance(value, Series) or is_frame:
1924-
if value.index.equals(self.index) or not len(self.index):
1925-
# copy the values
1926-
value = value.values.copy()
1927-
else:
1928-
1929-
# GH 4107
1930-
try:
1931-
value = value.reindex(self.index).values
1932-
except:
1933-
raise TypeError('incompatible index of inserted column '
1934-
'with frame index')
1935-
1936-
if is_frame:
1937-
value = value.T
1920+
if value.index.equals(self.index) or not len(self.index):
1921+
# copy the values
1922+
value = value.values.copy()
19381923
else:
1939-
if len(value) != len(self.index):
1940-
raise ValueError('Length of values does not match '
1941-
'length of index')
19421924

1943-
if not isinstance(value, np.ndarray):
1944-
if isinstance(value, list) and len(value) > 0:
1945-
value = com._possibly_convert_platform(value)
1946-
else:
1947-
value = com._asarray_tuplesafe(value)
1948-
elif isinstance(value, PeriodIndex):
1949-
value = value.asobject
1950-
elif value.ndim == 2:
1951-
value = value.copy().T
1925+
# GH 4107
1926+
try:
1927+
value = value.reindex(self.index).values
1928+
except:
1929+
raise TypeError('incompatible index of inserted column '
1930+
'with frame index')
1931+
1932+
if is_frame:
1933+
value = value.T
1934+
elif isinstance(value, Index) or _is_sequence(value):
1935+
if len(value) != len(self.index):
1936+
raise ValueError('Length of values does not match '
1937+
'length of index')
1938+
1939+
if not isinstance(value, (np.ndarray, Index)):
1940+
if isinstance(value, list) and len(value) > 0:
1941+
value = com._possibly_convert_platform(value)
19521942
else:
1953-
value = value.copy()
1943+
value = com._asarray_tuplesafe(value)
1944+
elif isinstance(value, PeriodIndex):
1945+
value = value.asobject
1946+
elif value.ndim == 2:
1947+
value = value.copy().T
1948+
else:
1949+
value = value.copy()
1950+
else:
1951+
# upcast the scalar
1952+
dtype, value = _infer_dtype_from_scalar(value)
1953+
value = np.repeat(value, len(self.index)).astype(dtype)
1954+
value = com._possibly_cast_to_datetime(value, dtype)
19541955

1955-
# Broadcasting funtimes
1956-
if key in self.columns and value.ndim == 1:
1956+
# broadcast across multiple columns if necessary
1957+
if key in self.columns and value.ndim == 1:
1958+
if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
19571959
existing_piece = self[key]
19581960
if isinstance(existing_piece, DataFrame):
19591961
value = np.tile(value, (len(existing_piece.columns), 1))
1960-
else:
1961-
if key in self.columns:
1962-
existing_piece = self[key]
19631962

1964-
# upcast the scalar
1965-
dtype, value = _infer_dtype_from_scalar(value)
1966-
1967-
# transpose hack
1968-
if isinstance(existing_piece, DataFrame):
1969-
shape = (len(existing_piece.columns), len(self.index))
1970-
value = np.repeat(value, np.prod(shape)).reshape(shape)
1971-
else:
1972-
value = np.repeat(value, len(self.index))
1973-
1974-
value = value.astype(dtype)
1975-
1976-
else:
1977-
# upcast the scalar
1978-
dtype, value = _infer_dtype_from_scalar(value)
1979-
value = np.array(
1980-
np.repeat(value, len(self.index)), dtype=dtype)
1981-
1982-
value = com._possibly_cast_to_datetime(value, dtype)
19831963
return np.atleast_2d(np.asarray(value))
19841964

19851965
@property

pandas/tests/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -3205,6 +3205,17 @@ def check(result, expected=None):
32053205
expected = pd.concat([ s, s, s],keys=df.columns,axis=1)
32063206
check(result,expected)
32073207

3208+
# check column dups with index equal and not equal to df's index
3209+
df = DataFrame(np.random.randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
3210+
columns=['A', 'B', 'A'])
3211+
for index in [df.index, pd.Index(list('edcba'))]:
3212+
this_df = df.copy()
3213+
expected_ser = pd.Series(index.values, index=this_df.index)
3214+
expected_df = DataFrame.from_items([('A', expected_ser),
3215+
('B', this_df['B']),
3216+
('A', expected_ser)])
3217+
this_df['A'] = index
3218+
check(this_df, expected_df)
32083219

32093220
def test_column_dups_indexing(self):
32103221

0 commit comments

Comments
 (0)