Skip to content

Commit 52db43a

Browse files
committed
Merge branch 'master' into TST--split-tests-for-windows-to-sub-modules-pandas-dev#19228
2 parents 9cb7a4f + 9a4469e commit 52db43a

File tree

6 files changed

+187
-180
lines changed

6 files changed

+187
-180
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,7 @@ Sparse
822822
- Bug in which creating a ``SparseDataFrame`` from a dense ``Series`` or an unsupported type raised an uncontrolled exception (:issue:`19374`)
823823
- Bug in :class:`SparseDataFrame.to_csv` causing exception (:issue:`19384`)
824824
- Bug in :class:`SparseSeries.memory_usage` which caused segfault by accessing non sparse elements (:issue:`19368`)
825+
- Bug in constructing a ``SparseArray``: if ``data`` is a scalar and ``index`` is defined it will coerce to ``float64`` regardless of scalar's dtype. (:issue:`19163`)
825826

826827
Reshaping
827828
^^^^^^^^^

pandas/core/sparse/array.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
is_scalar, is_dtype_equal)
2727
from pandas.core.dtypes.cast import (
2828
maybe_convert_platform, maybe_promote,
29-
astype_nansafe, find_common_type)
29+
astype_nansafe, find_common_type, infer_dtype_from_scalar,
30+
construct_1d_arraylike_from_scalar)
3031
from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype
3132

3233
import pandas._libs.sparse as splib
@@ -162,9 +163,9 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
162163
data = np.nan
163164
if not is_scalar(data):
164165
raise Exception("must only pass scalars with an index ")
165-
values = np.empty(len(index), dtype='float64')
166-
values.fill(data)
167-
data = values
166+
dtype = infer_dtype_from_scalar(data)[0]
167+
data = construct_1d_arraylike_from_scalar(
168+
data, len(index), dtype)
168169

169170
if isinstance(data, ABCSparseSeries):
170171
data = data.values

pandas/tests/indexes/datetimes/test_tools.py

+10-176
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import dateutil
99
import numpy as np
1010
from dateutil.parser import parse
11-
from datetime import datetime, date, time, timedelta
11+
from datetime import datetime, date, time
1212
from distutils.version import LooseVersion
1313

1414
import pandas as pd
@@ -19,7 +19,6 @@
1919

2020
from pandas.errors import OutOfBoundsDatetime
2121
from pandas.compat import lmap, PY3
22-
from pandas.compat.numpy import np_array_datetime64_compat
2322
from pandas.core.dtypes.common import is_datetime64_ns_dtype
2423
from pandas.util import testing as tm
2524
import pandas.util._test_decorators as td
@@ -803,6 +802,15 @@ def test_dataframe_dtypes(self, cache):
803802

804803

805804
class TestToDatetimeMisc(object):
805+
def test_to_datetime_barely_out_of_bounds(self):
806+
# GH#19529
807+
# GH#19382 close enough to bounds that dropping nanos would result
808+
# in an in-bounds datetime
809+
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)
810+
811+
with pytest.raises(OutOfBoundsDatetime):
812+
to_datetime(arr)
813+
806814
@pytest.mark.parametrize('cache', [True, False])
807815
def test_to_datetime_iso8601(self, cache):
808816
result = to_datetime(["2012-01-01 00:00:00"], cache=cache)
@@ -1464,180 +1472,6 @@ def test_parsers_timezone_minute_offsets_roundtrip(self, cache):
14641472
converted_time = dt_time.tz_localize('UTC').tz_convert(tz)
14651473
assert dt_string_repr == repr(converted_time)
14661474

1467-
def test_parsers_iso8601(self):
1468-
# GH 12060
1469-
# test only the iso parser - flexibility to different
1470-
# separators and leadings 0s
1471-
# Timestamp construction falls back to dateutil
1472-
cases = {'2011-01-02': datetime(2011, 1, 2),
1473-
'2011-1-2': datetime(2011, 1, 2),
1474-
'2011-01': datetime(2011, 1, 1),
1475-
'2011-1': datetime(2011, 1, 1),
1476-
'2011 01 02': datetime(2011, 1, 2),
1477-
'2011.01.02': datetime(2011, 1, 2),
1478-
'2011/01/02': datetime(2011, 1, 2),
1479-
'2011\\01\\02': datetime(2011, 1, 2),
1480-
'2013-01-01 05:30:00': datetime(2013, 1, 1, 5, 30),
1481-
'2013-1-1 5:30:00': datetime(2013, 1, 1, 5, 30)}
1482-
for date_str, exp in compat.iteritems(cases):
1483-
actual = tslib._test_parse_iso8601(date_str)
1484-
assert actual == exp
1485-
1486-
# separators must all match - YYYYMM not valid
1487-
invalid_cases = ['2011-01/02', '2011^11^11',
1488-
'201401', '201111', '200101',
1489-
# mixed separated and unseparated
1490-
'2005-0101', '200501-01',
1491-
'20010101 12:3456', '20010101 1234:56',
1492-
# HHMMSS must have two digits in each component
1493-
# if unseparated
1494-
'20010101 1', '20010101 123', '20010101 12345',
1495-
'20010101 12345Z',
1496-
# wrong separator for HHMMSS
1497-
'2001-01-01 12-34-56']
1498-
for date_str in invalid_cases:
1499-
with pytest.raises(ValueError):
1500-
tslib._test_parse_iso8601(date_str)
1501-
# If no ValueError raised, let me know which case failed.
1502-
raise Exception(date_str)
1503-
1504-
1505-
class TestArrayToDatetime(object):
1506-
def test_coerce_out_of_bounds_utc(self):
1507-
# GH#19612
1508-
ts = Timestamp('1900-01-01', tz='US/Pacific')
1509-
dt = ts.to_pydatetime() - timedelta(days=365 * 300) # ~1600AD
1510-
arr = np.array([dt])
1511-
result = tslib.array_to_datetime(arr, utc=True, errors='coerce')
1512-
expected = np.array(['NaT'], dtype='datetime64[ns]')
1513-
tm.assert_numpy_array_equal(result, expected)
1514-
1515-
def test_parsing_valid_dates(self):
1516-
arr = np.array(['01-01-2013', '01-02-2013'], dtype=object)
1517-
tm.assert_numpy_array_equal(
1518-
tslib.array_to_datetime(arr),
1519-
np_array_datetime64_compat(
1520-
[
1521-
'2013-01-01T00:00:00.000000000-0000',
1522-
'2013-01-02T00:00:00.000000000-0000'
1523-
],
1524-
dtype='M8[ns]'
1525-
)
1526-
)
1527-
1528-
arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object)
1529-
tm.assert_numpy_array_equal(
1530-
tslib.array_to_datetime(arr),
1531-
np_array_datetime64_compat(
1532-
[
1533-
'2013-09-16T00:00:00.000000000-0000',
1534-
'2013-09-17T00:00:00.000000000-0000'
1535-
],
1536-
dtype='M8[ns]'
1537-
)
1538-
)
1539-
1540-
def test_parsing_timezone_offsets(self):
1541-
# All of these datetime strings with offsets are equivalent
1542-
# to the same datetime after the timezone offset is added
1543-
dt_strings = [
1544-
'01-01-2013 08:00:00+08:00',
1545-
'2013-01-01T08:00:00.000000000+0800',
1546-
'2012-12-31T16:00:00.000000000-0800',
1547-
'12-31-2012 23:00:00-01:00'
1548-
]
1549-
1550-
expected_output = tslib.array_to_datetime(np.array(
1551-
['01-01-2013 00:00:00'], dtype=object))
1552-
1553-
for dt_string in dt_strings:
1554-
tm.assert_numpy_array_equal(
1555-
tslib.array_to_datetime(
1556-
np.array([dt_string], dtype=object)
1557-
),
1558-
expected_output
1559-
)
1560-
1561-
def test_number_looking_strings_not_into_datetime(self):
1562-
# #4601
1563-
# These strings don't look like datetimes so they shouldn't be
1564-
# attempted to be converted
1565-
arr = np.array(['-352.737091', '183.575577'], dtype=object)
1566-
tm.assert_numpy_array_equal(
1567-
tslib.array_to_datetime(arr, errors='ignore'), arr)
1568-
1569-
arr = np.array(['1', '2', '3', '4', '5'], dtype=object)
1570-
tm.assert_numpy_array_equal(
1571-
tslib.array_to_datetime(arr, errors='ignore'), arr)
1572-
1573-
def test_coercing_dates_outside_of_datetime64_ns_bounds(self):
1574-
invalid_dates = [
1575-
date(1000, 1, 1),
1576-
datetime(1000, 1, 1),
1577-
'1000-01-01',
1578-
'Jan 1, 1000',
1579-
np.datetime64('1000-01-01'),
1580-
]
1581-
1582-
for invalid_date in invalid_dates:
1583-
pytest.raises(ValueError,
1584-
tslib.array_to_datetime,
1585-
np.array([invalid_date], dtype='object'),
1586-
errors='raise', )
1587-
tm.assert_numpy_array_equal(
1588-
tslib.array_to_datetime(
1589-
np.array([invalid_date], dtype='object'),
1590-
errors='coerce'),
1591-
np.array([tslib.iNaT], dtype='M8[ns]')
1592-
)
1593-
1594-
arr = np.array(['1/1/1000', '1/1/2000'], dtype=object)
1595-
tm.assert_numpy_array_equal(
1596-
tslib.array_to_datetime(arr, errors='coerce'),
1597-
np_array_datetime64_compat(
1598-
[
1599-
tslib.iNaT,
1600-
'2000-01-01T00:00:00.000000000-0000'
1601-
],
1602-
dtype='M8[ns]'
1603-
)
1604-
)
1605-
1606-
def test_coerce_of_invalid_datetimes(self):
1607-
arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)
1608-
1609-
# Without coercing, the presence of any invalid dates prevents
1610-
# any values from being converted
1611-
tm.assert_numpy_array_equal(
1612-
tslib.array_to_datetime(arr, errors='ignore'), arr)
1613-
1614-
# With coercing, the invalid dates becomes iNaT
1615-
tm.assert_numpy_array_equal(
1616-
tslib.array_to_datetime(arr, errors='coerce'),
1617-
np_array_datetime64_compat(
1618-
[
1619-
'2013-01-01T00:00:00.000000000-0000',
1620-
tslib.iNaT,
1621-
tslib.iNaT
1622-
],
1623-
dtype='M8[ns]'
1624-
)
1625-
)
1626-
1627-
def test_to_datetime_barely_out_of_bounds(self):
1628-
# GH#19529
1629-
# GH#19382 close enough to bounds that dropping nanos would result
1630-
# in an in-bounds datetime
1631-
arr = np.array(['2262-04-11 23:47:16.854775808'], dtype=object)
1632-
1633-
with pytest.raises(OutOfBoundsDatetime):
1634-
to_datetime(arr)
1635-
1636-
with pytest.raises(OutOfBoundsDatetime):
1637-
# Essentially the same as above, but more directly calling
1638-
# the relevant function
1639-
tslib.array_to_datetime(arr)
1640-
16411475

16421476
def test_normalize_date():
16431477
value = date(2012, 9, 7)

pandas/tests/sparse/frame/test_frame.py

+11
Original file line numberDiff line numberDiff line change
@@ -1257,3 +1257,14 @@ def test_quantile_multi(self):
12571257

12581258
tm.assert_frame_equal(result, dense_expected)
12591259
tm.assert_sp_frame_equal(result, sparse_expected)
1260+
1261+
def test_assign_with_sparse_frame(self):
1262+
# GH 19163
1263+
df = pd.DataFrame({"a": [1, 2, 3]})
1264+
res = df.to_sparse(fill_value=False).assign(newcol=False)
1265+
exp = df.assign(newcol=False).to_sparse(fill_value=False)
1266+
1267+
tm.assert_sp_frame_equal(res, exp)
1268+
1269+
for column in res.columns:
1270+
assert type(res[column]) is SparseSeries

pandas/tests/sparse/test_array.py

+15
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,21 @@ def test_constructor_spindex_dtype(self):
113113
assert arr.dtype == np.int64
114114
assert arr.fill_value == 0
115115

116+
@pytest.mark.parametrize('scalar,dtype', [
117+
(False, bool),
118+
(0.0, 'float64'),
119+
(1, 'int64'),
120+
('z', 'object')])
121+
def test_scalar_with_index_infer_dtype(self, scalar, dtype):
122+
# GH 19163
123+
arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
124+
exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
125+
126+
tm.assert_sp_array_equal(arr, exp)
127+
128+
assert arr.dtype == dtype
129+
assert exp.dtype == dtype
130+
116131
def test_sparseseries_roundtrip(self):
117132
# GH 13999
118133
for kind in ['integer', 'block']:

0 commit comments

Comments
 (0)