Skip to content

Commit d755084

Browse files
committed
category cleaning works
1 parent fe8f01e commit d755084

File tree

2 files changed

+12
-5
lines changed

2 files changed

+12
-5
lines changed

data_cleaner.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,13 @@ def simplify_category(data, catname):
175175
def clean_categories(data):
176176
"""get data categories ready for one-hot-encoding"""
177177
cat_set = set(CLEAN_CAT_VALUES) | set(CAT_FILL_NA_VALUES)
178-
data = data.astype({cat : 'object' for cat in cat_set}) \
178+
cat_set = cat_set.intersection(set(data.columns))
179+
data = data.astype({cat : 'object' for cat in cat_set}, errors='ignore') \
179180
.replace(CLEAN_CAT_VALUES) \
180181
.fillna(CAT_FILL_NA_VALUES) \
181182
.dropna(subset=CLEAN_CAT_VALUES.keys()) \
182183
.drop(columns=MODEL_IGNORE_COLS, errors='ignore') \
183-
.astype({cat : 'category' for cat in cat_set})
184+
.astype({cat : 'category' for cat in cat_set}, errors='ignore')
184185
for catname in REPLACE_REVERSE_DICT:
185186
data = simplify_category(data, catname)
186187
return data

fullmodel.ipynb

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": 81,
13+
"execution_count": 103,
1414
"metadata": {},
1515
"outputs": [
1616
{
@@ -19,7 +19,7 @@
1919
"<module 'data_modeler' from '/Users/brad/Documents/Scripts/flatiron/stop-and-frisk/data_modeler.py'>"
2020
]
2121
},
22-
"execution_count": 81,
22+
"execution_count": 103,
2323
"metadata": {},
2424
"output_type": "execute_result"
2525
}
@@ -53,7 +53,13 @@
5353
"text": [
5454
"Loading 2003...\n",
5555
"Loading 2004...\n",
56-
"Loading 2005...\n"
56+
"Loading 2005...\n",
57+
"Loading 2006...\n",
58+
"Loading 2007...\n",
59+
"Loading 2008...\n",
60+
"Loading 2009...\n",
61+
"Loading 2010...\n",
62+
"Loading 2011...\n"
5763
]
5864
}
5965
],

0 commit comments

Comments
 (0)