Skip to content

Commit 5fd98a5

Browse files
committed
BUG: do not cast ints to floats if inputs o crosstab are not aligned
closes pandas-dev#17005
1 parent fcb0263 commit 5fd98a5

File tree

3 files changed

+35
-11
lines changed

3 files changed

+35
-11
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,4 @@ Categorical
204204
Other
205205
^^^^^
206206
- Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
207+
- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`)

pandas/core/reshape/pivot.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pandas import Series, DataFrame, MultiIndex, Index
77
from pandas.core.groupby import Grouper
88
from pandas.core.reshape.util import cartesian_product
9+
from pandas.core.index import _get_combined_index
910
from pandas.compat import range, lrange, zip
1011
from pandas import compat
1112
import pandas.core.common as com
@@ -493,6 +494,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
493494
rownames = _get_names(index, rownames, prefix='row')
494495
colnames = _get_names(columns, colnames, prefix='col')
495496

497+
obs_idxes = [obj.index for objs in (index, columns) for obj in objs
498+
if hasattr(obj, 'index')]
499+
if obs_idxes:
500+
common_idx = _get_combined_index(obs_idxes, intersect=True)
501+
else:
502+
common_idx = None
503+
496504
data = {}
497505
data.update(zip(rownames, index))
498506
data.update(zip(colnames, columns))
@@ -503,20 +511,19 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
503511
if values is not None and aggfunc is None:
504512
raise ValueError("values cannot be used without an aggfunc.")
505513

514+
df = DataFrame(data, index=common_idx)
506515
if values is None:
507-
df = DataFrame(data)
508516
df['__dummy__'] = 0
509-
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
510-
aggfunc=len, margins=margins,
511-
margins_name=margins_name, dropna=dropna)
512-
table = table.fillna(0).astype(np.int64)
513-
517+
aggfunc = len
514518
else:
515-
data['__dummy__'] = values
516-
df = DataFrame(data)
517-
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
518-
aggfunc=aggfunc, margins=margins,
519-
margins_name=margins_name, dropna=dropna)
519+
df['__dummy__'] = values
520+
521+
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
522+
aggfunc=aggfunc, margins=margins,
523+
margins_name=margins_name, dropna=dropna)
524+
525+
if values is None:
526+
table = table.fillna(0).astype(np.int64)
520527

521528
# Post-process
522529
if normalize is not False:

pandas/tests/reshape/test_pivot.py

+16
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,22 @@ def test_crosstab_ndarray(self):
10581058
assert result.index.name == 'row_0'
10591059
assert result.columns.name == 'col_0'
10601060

1061+
def test_crosstab_non_aligned(self):
1062+
# GH 17005
1063+
a = pd.Series(list('ABB'), index=range(3))
1064+
b = pd.Series(list('DEDED'), index=[0, 1, 2, 3, 5])
1065+
c = np.array(list('DED'))
1066+
1067+
expected = pd.DataFrame([[1, 0], [1, 1]],
1068+
index=Index(['A', 'B'], name='row_0'),
1069+
columns=Index(['D', 'E'], name='col_0'))
1070+
1071+
result = crosstab(a, b)
1072+
tm.assert_frame_equal(result, expected)
1073+
1074+
result = crosstab(a, c)
1075+
tm.assert_frame_equal(result, expected)
1076+
10611077
def test_crosstab_margins(self):
10621078
a = np.random.randint(0, 7, size=100)
10631079
b = np.random.randint(0, 3, size=100)

0 commit comments

Comments
 (0)