Skip to content

Commit 934fc81

Browse files
authored
SAS validate null dates (#39726)
1 parent eda7592 commit 934fc81

File tree

4 files changed

+38
-14
lines changed

4 files changed

+38
-14
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ I/O
390390
- Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`)
391391
- :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`)
392392
- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
393+
- Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
393394

394395
Period
395396
^^^^^^

pandas/io/sas/sas7bdat.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,28 @@
2323
from pandas.errors import EmptyDataError, OutOfBoundsDatetime
2424

2525
import pandas as pd
26+
from pandas import isna
2627

2728
from pandas.io.common import get_handle
2829
from pandas.io.sas._sas import Parser
2930
import pandas.io.sas.sas_constants as const
3031
from pandas.io.sas.sasreader import ReaderBase
3132

3233

34+
def _parse_datetime(sas_datetime: float, unit: str):
35+
if isna(sas_datetime):
36+
return pd.NaT
37+
38+
if unit == "s":
39+
return datetime(1960, 1, 1) + timedelta(seconds=sas_datetime)
40+
41+
elif unit == "d":
42+
return datetime(1960, 1, 1) + timedelta(days=sas_datetime)
43+
44+
else:
45+
raise ValueError("unit must be 'd' or 's'")
46+
47+
3348
def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
3449
"""
3550
Convert to Timestamp if possible, otherwise to datetime.datetime.
@@ -51,20 +66,9 @@ def _convert_datetimes(sas_datetimes: pd.Series, unit: str) -> pd.Series:
5166
try:
5267
return pd.to_datetime(sas_datetimes, unit=unit, origin="1960-01-01")
5368
except OutOfBoundsDatetime:
54-
if unit == "s":
55-
s_series = sas_datetimes.apply(
56-
lambda sas_float: datetime(1960, 1, 1) + timedelta(seconds=sas_float)
57-
)
58-
s_series = cast(pd.Series, s_series)
59-
return s_series
60-
elif unit == "d":
61-
d_series = sas_datetimes.apply(
62-
lambda sas_float: datetime(1960, 1, 1) + timedelta(days=sas_float)
63-
)
64-
d_series = cast(pd.Series, d_series)
65-
return d_series
66-
else:
67-
raise ValueError("unit must be 'd' or 's'")
69+
s_series = sas_datetimes.apply(_parse_datetime, unit=unit)
70+
s_series = cast(pd.Series, s_series)
71+
return s_series
6872

6973

7074
class _SubheaderPointer:
128 KB
Binary file not shown.

pandas/tests/io/sas/test_sas7bdat.py

+19
Original file line numberDiff line numberDiff line change
@@ -315,3 +315,22 @@ def test_max_sas_date_iterator(datapath):
315315
]
316316
for result, expected in zip(results, expected):
317317
tm.assert_frame_equal(result, expected)
318+
319+
320+
def test_null_date(datapath):
321+
fname = datapath("io", "sas", "data", "dates_null.sas7bdat")
322+
df = pd.read_sas(fname, encoding="utf-8")
323+
324+
expected = pd.DataFrame(
325+
{
326+
"datecol": [
327+
datetime(9999, 12, 29),
328+
pd.NaT,
329+
],
330+
"datetimecol": [
331+
datetime(9999, 12, 29, 23, 59, 59, 998993),
332+
pd.NaT,
333+
],
334+
},
335+
)
336+
tm.assert_frame_equal(df, expected)

0 commit comments

Comments
 (0)