Skip to content

Commit d73479e

Browse files
Merge pull request #415 from PaulWestenthanner/fix/issue_414_412
Fix/issue 414 412
2 parents eeff8e3 + 6f412cc commit d73479e

File tree

8 files changed

+38
-12
lines changed

8 files changed

+38
-12
lines changed

category_encoders/backward_difference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class BackwardDifferenceEncoder(BaseContrastEncoder):
7171
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
7272
7373
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
74-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
74+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
7575
7676
"""
7777

category_encoders/base_contrast_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class BaseContrastEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin):
4141
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
4242
4343
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
44-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
44+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
4545
4646
"""
4747
prefit_ordinal = True

category_encoders/helmert.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ class HelmertEncoder(BaseContrastEncoder):
7272
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
7373
7474
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
75-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
76-
75+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
76+
7777
"""
7878
def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix:
7979
return Helmert().code_without_intercept(values_to_encode)

category_encoders/one_hot.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ class OneHotEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin):
8686
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
8787
8888
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
89-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
90-
89+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
90+
9191
"""
9292
prefit_ordinal = True
9393
encoding_relation = util.EncodingRelation.ONE_TO_N_UNIQUE

category_encoders/ordinal.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ class OrdinalEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin):
7777
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
7878
7979
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
80-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
81-
80+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
8281
"""
8382
prefit_ordinal = False
8483
encoding_relation = util.EncodingRelation.ONE_TO_ONE
@@ -225,7 +224,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand
225224
if pd.isna(categories).any():
226225
categories = [c for c in categories if not pd.isna(c)] + [nan_identity]
227226
else:
228-
categories = categories.tolist()
227+
categories = list(categories)
229228
if util.is_category(X[col].dtype):
230229
# Avoid using pandas category dtype meta-data if possible, see #235, #238.
231230
if X[col].dtype.ordered:

category_encoders/polynomial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ class PolynomialEncoder(BaseContrastEncoder):
7171
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
7272
7373
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
74-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
75-
74+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
75+
7676
"""
7777
def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix:
7878
return Poly().code_without_intercept(values_to_encode)

category_encoders/sum_coding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class SumEncoder(BaseContrastEncoder):
7171
https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/
7272
7373
.. [2] Gregory Carey (2003). Coding Categorical Variables, from
74-
http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf
74+
http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf
7575
7676
"""
7777

tests/test_ordinal.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,33 @@ def test_HaveNaNInTrain_ExpectCodedAsOne(self):
177177

178178
self.assertEqual(expected, result)
179179

180+
def test_Timestamp(self):
181+
df = pd.DataFrame(
182+
{
183+
"timestamps": {
184+
0: pd.Timestamp("1997-09-03 00:00:00"),
185+
1: pd.Timestamp("1997-09-03 00:00:00"),
186+
2: pd.Timestamp("2000-09-03 00:00:00"),
187+
3: pd.Timestamp("1997-09-03 00:00:00"),
188+
4: pd.Timestamp("1999-09-04 00:00:00"),
189+
5: pd.Timestamp("2001-09-03 00:00:00"),
190+
},
191+
}
192+
)
193+
enc = encoders.OrdinalEncoder(cols=["timestamps"])
194+
encoded_df = enc.fit_transform(df)
195+
expected_index = [pd.Timestamp("1997-09-03 00:00:00"),
196+
pd.Timestamp("2000-09-03 00:00:00"),
197+
pd.Timestamp("1999-09-04 00:00:00"),
198+
pd.Timestamp("2001-09-03 00:00:00"),
199+
pd.NaT
200+
]
201+
expected_mapping = pd.Series([1, 2, 3, 4, -2], index=expected_index)
202+
expected_values = [1, 1, 2, 1, 3, 4]
203+
204+
pd.testing.assert_series_equal(expected_mapping, enc.mapping[0]["mapping"])
205+
self.assertListEqual(expected_values, encoded_df["timestamps"].tolist())
206+
180207
def test_NoGaps(self):
181208
train = pd.DataFrame({"city": ["New York", np.nan, "Rio", None, "Rosenheim"]})
182209
expected_mapping_value = pd.Series([1, 2, 3, 4], index=["New York", "Rio", "Rosenheim", np.nan])

0 commit comments

Comments
 (0)