Skip to content

Commit a2d03d4

Browse files
toobazjreback
authored andcommitted
BUG: do not cast ints to floats if inputs o crosstab are not aligned (#17011)
closes #17005
1 parent 7d9d6d3 commit a2d03d4

File tree

3 files changed

+37
-12
lines changed

3 files changed

+37
-12
lines changed

doc/source/whatsnew/v0.21.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,10 @@ Sparse
225225
Reshaping
226226
^^^^^^^^^
227227
- Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`)
228+
- Bug in :func:`crosstab` where non-aligned series of integers were casted to float (:issue:`17005`)
228229
- Bug when using :func:`isin` on a large object series and large comparison array (:issue:`16012`)
229230
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
230231

231-
232232
Numeric
233233
^^^^^^^
234234
- Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)

pandas/core/reshape/pivot.py

+20-11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pandas import Series, DataFrame, MultiIndex, Index
77
from pandas.core.groupby import Grouper
88
from pandas.core.reshape.util import cartesian_product
9+
from pandas.core.index import _get_combined_index
910
from pandas.compat import range, lrange, zip
1011
from pandas import compat
1112
import pandas.core.common as com
@@ -493,6 +494,13 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
493494
rownames = _get_names(index, rownames, prefix='row')
494495
colnames = _get_names(columns, colnames, prefix='col')
495496

497+
obs_idxes = [obj.index for objs in (index, columns) for obj in objs
498+
if hasattr(obj, 'index')]
499+
if obs_idxes:
500+
common_idx = _get_combined_index(obs_idxes, intersect=True)
501+
else:
502+
common_idx = None
503+
496504
data = {}
497505
data.update(zip(rownames, index))
498506
data.update(zip(colnames, columns))
@@ -503,20 +511,21 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
503511
if values is not None and aggfunc is None:
504512
raise ValueError("values cannot be used without an aggfunc.")
505513

514+
df = DataFrame(data, index=common_idx)
506515
if values is None:
507-
df = DataFrame(data)
508516
df['__dummy__'] = 0
509-
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
510-
aggfunc=len, margins=margins,
511-
margins_name=margins_name, dropna=dropna)
512-
table = table.fillna(0).astype(np.int64)
513-
517+
kwargs = {'aggfunc': len, 'fill_value': 0}
514518
else:
515-
data['__dummy__'] = values
516-
df = DataFrame(data)
517-
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
518-
aggfunc=aggfunc, margins=margins,
519-
margins_name=margins_name, dropna=dropna)
519+
df['__dummy__'] = values
520+
kwargs = {'aggfunc': aggfunc}
521+
522+
table = df.pivot_table('__dummy__', index=rownames, columns=colnames,
523+
margins=margins, margins_name=margins_name,
524+
dropna=dropna, **kwargs)
525+
526+
# GH 17013:
527+
if values is None and margins:
528+
table = table.fillna(0).astype(np.int64)
520529

521530
# Post-process
522531
if normalize is not False:

pandas/tests/reshape/test_pivot.py

+16
Original file line numberDiff line numberDiff line change
@@ -1058,6 +1058,22 @@ def test_crosstab_ndarray(self):
10581058
assert result.index.name == 'row_0'
10591059
assert result.columns.name == 'col_0'
10601060

1061+
def test_crosstab_non_aligned(self):
1062+
# GH 17005
1063+
a = pd.Series([0, 1, 1], index=['a', 'b', 'c'])
1064+
b = pd.Series([3, 4, 3, 4, 3], index=['a', 'b', 'c', 'd', 'f'])
1065+
c = np.array([3, 4, 3])
1066+
1067+
expected = pd.DataFrame([[1, 0], [1, 1]],
1068+
index=Index([0, 1], name='row_0'),
1069+
columns=Index([3, 4], name='col_0'))
1070+
1071+
result = crosstab(a, b)
1072+
tm.assert_frame_equal(result, expected)
1073+
1074+
result = crosstab(a, c)
1075+
tm.assert_frame_equal(result, expected)
1076+
10611077
def test_crosstab_margins(self):
10621078
a = np.random.randint(0, 7, size=100)
10631079
b = np.random.randint(0, 3, size=100)

0 commit comments

Comments
 (0)