File tree 2 files changed +12
-5
lines changed 2 files changed +12
-5
lines changed Original file line number Diff line number Diff line change @@ -175,12 +175,13 @@ def simplify_category(data, catname):
175
175
def clean_categories (data ):
176
176
"""get data categories ready for one-hot-encoding"""
177
177
cat_set = set (CLEAN_CAT_VALUES ) | set (CAT_FILL_NA_VALUES )
178
- data = data .astype ({cat : 'object' for cat in cat_set }) \
178
+ cat_set = cat_set .intersection (set (data .columns ))
179
+ data = data .astype ({cat : 'object' for cat in cat_set }, errors = 'ignore' ) \
179
180
.replace (CLEAN_CAT_VALUES ) \
180
181
.fillna (CAT_FILL_NA_VALUES ) \
181
182
.dropna (subset = CLEAN_CAT_VALUES .keys ()) \
182
183
.drop (columns = MODEL_IGNORE_COLS , errors = 'ignore' ) \
183
- .astype ({cat : 'category' for cat in cat_set })
184
+ .astype ({cat : 'category' for cat in cat_set }, errors = 'ignore' )
184
185
for catname in REPLACE_REVERSE_DICT :
185
186
data = simplify_category (data , catname )
186
187
return data
Original file line number Diff line number Diff line change 10
10
},
11
11
{
12
12
"cell_type" : " code" ,
13
- "execution_count" : 81 ,
13
+ "execution_count" : 103 ,
14
14
"metadata" : {},
15
15
"outputs" : [
16
16
{
19
19
" <module 'data_modeler' from '/Users/brad/Documents/Scripts/flatiron/stop-and-frisk/data_modeler.py'>"
20
20
]
21
21
},
22
- "execution_count" : 81 ,
22
+ "execution_count" : 103 ,
23
23
"metadata" : {},
24
24
"output_type" : " execute_result"
25
25
}
53
53
"text" : [
54
54
" Loading 2003...\n " ,
55
55
" Loading 2004...\n " ,
56
- " Loading 2005...\n "
56
+ " Loading 2005...\n " ,
57
+ " Loading 2006...\n " ,
58
+ " Loading 2007...\n " ,
59
+ " Loading 2008...\n " ,
60
+ " Loading 2009...\n " ,
61
+ " Loading 2010...\n " ,
62
+ " Loading 2011...\n "
57
63
]
58
64
}
59
65
],
You can’t perform that action at this time.
0 commit comments