Skip to content

Commit 6f412cc

Browse files
fixed issue 412, ordinal encoder converting timestamps
1 parent 21c64e2 commit 6f412cc

File tree

2 files changed

+28
-1
lines changed

2 files changed

+28
-1
lines changed

category_encoders/ordinal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
224224
if pd.isna(categories).any():
225225
categories = [c for c in categories if not pd.isna(c)] + [nan_identity]
226226
else:
227-
categories = categories.tolist()
227+
categories = list(categories)
228228
if util.is_category(X[col].dtype):
229229
# Avoid using pandas category dtype meta-data if possible, see #235, #238.
230230
if X[col].dtype.ordered:

tests/test_ordinal.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,33 @@ def test_HaveNaNInTrain_ExpectCodedAsOne(self):
177177

178178
self.assertEqual(expected, result)
179179

180+
def test_Timestamp(self):
181+
df = pd.DataFrame(
182+
{
183+
"timestamps": {
184+
0: pd.Timestamp("1997-09-03 00:00:00"),
185+
1: pd.Timestamp("1997-09-03 00:00:00"),
186+
2: pd.Timestamp("2000-09-03 00:00:00"),
187+
3: pd.Timestamp("1997-09-03 00:00:00"),
188+
4: pd.Timestamp("1999-09-04 00:00:00"),
189+
5: pd.Timestamp("2001-09-03 00:00:00"),
190+
},
191+
}
192+
)
193+
enc = encoders.OrdinalEncoder(cols=["timestamps"])
194+
encoded_df = enc.fit_transform(df)
195+
expected_index = [pd.Timestamp("1997-09-03 00:00:00"),
196+
pd.Timestamp("2000-09-03 00:00:00"),
197+
pd.Timestamp("1999-09-04 00:00:00"),
198+
pd.Timestamp("2001-09-03 00:00:00"),
199+
pd.NaT
200+
]
201+
expected_mapping = pd.Series([1, 2, 3, 4, -2], index=expected_index)
202+
expected_values = [1, 1, 2, 1, 3, 4]
203+
204+
pd.testing.assert_series_equal(expected_mapping, enc.mapping[0]["mapping"])
205+
self.assertListEqual(expected_values, encoded_df["timestamps"].tolist())
206+
180207
def test_NoGaps(self):
181208
train = pd.DataFrame({"city": ["New York", np.nan, "Rio", None, "Rosenheim"]})
182209
expected_mapping_value = pd.Series([1, 2, 3, 4], index=["New York", "Rio", "Rosenheim", np.nan])

0 commit comments

Comments
 (0)