Skip to content

Commit 0da3339

Browse files
authored
fix: feature group should ignore nan values (#2169)
1 parent d28d478 commit 0da3339

File tree

2 files changed

+6
-0
lines changed

2 files changed

+6
-0
lines changed

src/sagemaker/feature_store/feature_group.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ def _ingest_single_batch(
185185
feature_name=data_frame.columns[index], value_as_string=str(row[index])
186186
)
187187
for index in range(len(row))
188+
if pd.notna(row[index])
188189
]
189190
sagemaker_session.put_record(
190191
feature_group_name=feature_group_name, record=[value.to_dict() for value in record]

tests/integ/test_feature_store.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def pandas_data_frame():
100100
"feature1": pd.Series(np.arange(10.0), dtype="float64"),
101101
"feature2": pd.Series(np.arange(10), dtype="int64"),
102102
"feature3": pd.Series(["2020-10-30T03:43:21Z"] * 10, dtype="string"),
103+
"feature4": pd.Series(np.arange(5.0), dtype="float64"), # contains nan
103104
}
104105
)
105106
return df
@@ -132,6 +133,7 @@ def create_table_ddl():
132133
" feature1 FLOAT\n"
133134
" feature2 INT\n"
134135
" feature3 STRING\n"
136+
" feature4 FLOAT\n"
135137
" write_time TIMESTAMP\n"
136138
" event_time TIMESTAMP\n"
137139
" is_deleted BOOLEAN\n"
@@ -214,6 +216,9 @@ def test_create_feature_store(
214216
time.sleep(60)
215217

216218
assert df.shape[0] == 11
219+
nans = pd.isna(df.loc[df["feature1"].isin([5, 6, 7, 8, 9])]["feature4"])
220+
for is_na in nans.items():
221+
assert is_na
217222
assert (
218223
create_table_ddl.format(
219224
feature_group_name=feature_group_name,

0 commit comments

Comments
 (0)