|
| 1 | +import pickle |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +from sklearn.preprocessing import LabelEncoder |
| 6 | + |
| 7 | + |
| 8 | +class EncoderExt(object): |
| 9 | + def __init__(self): |
| 10 | + self.label_encoder = LabelEncoder() |
| 11 | + |
| 12 | + def fit(self, data_list): |
| 13 | + self.label_encoder = self.label_encoder.fit(list(data_list) + ['Unknown']) |
| 14 | + self.classes_ = self.label_encoder.classes_ |
| 15 | + return self |
| 16 | + |
| 17 | + def transform(self, data_list): |
| 18 | + new_data_list = list(data_list) |
| 19 | + for unique_item in np.unique(data_list): |
| 20 | + if unique_item not in self.label_encoder.classes_: |
| 21 | + new_data_list = ['Unknown' if x == unique_item else x for x in new_data_list] |
| 22 | + return self.label_encoder.transform(new_data_list) |
| 23 | + |
| 24 | + |
| 25 | +def predict(nulldata): |
| 26 | + print(" Predict") |
| 27 | + # nulldata = pd.read_csv("data.csv") |
| 28 | + |
| 29 | + nulldata["clear_date"] = pd.to_datetime(nulldata.clear_date) |
| 30 | + nulldata["posting_date"] = pd.to_datetime(nulldata.posting_date) |
| 31 | + nulldata["due_in_date"] = pd.to_datetime(nulldata.due_in_date) |
| 32 | + nulldata["baseline_create_date"] = pd.to_datetime(nulldata.baseline_create_date) |
| 33 | + |
| 34 | + nulldata1 = nulldata.copy() |
| 35 | + |
| 36 | + from sklearn.preprocessing import LabelEncoder |
| 37 | + business_codern = LabelEncoder() |
| 38 | + business_codern.fit(nulldata['business_code']) |
| 39 | + nulldata['business_code_enc'] = business_codern.transform(nulldata['business_code']) |
| 40 | + |
| 41 | + # if : nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCCA',"1").str.replace('CCU', |
| 42 | + # "2").str.replace('CC',"3").astype(int) |
| 43 | + nulldata['cust_number'] = nulldata['cust_number'].replace('CCCA02', "1").replace(['CCU001', 'CCU013', 'CCU002'], |
| 44 | + "2").replace(['CC3411', 'CC6000'], |
| 45 | + "3").astype(int) |
| 46 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCCA',"1").astype(int) |
| 47 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCU',"2").astype(int) |
| 48 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CC',"3").astype(int) |
| 49 | + |
| 50 | + nulldata['day_of_cleardate'] = nulldata['clear_date'].dt.day |
| 51 | + nulldata['month_of_cleardate'] = nulldata['clear_date'].dt.month |
| 52 | + nulldata['year_of_cleardate'] = nulldata['clear_date'].dt.year |
| 53 | + |
| 54 | + nulldata['day_of_postingdate'] = nulldata['posting_date'].dt.day |
| 55 | + nulldata['month_of_postingdate'] = nulldata['posting_date'].dt.month |
| 56 | + nulldata['year_of_postingdate'] = nulldata['posting_date'].dt.year |
| 57 | + |
| 58 | + nulldata['day_of_due'] = nulldata['due_in_date'].dt.day |
| 59 | + nulldata['month_of_due'] = nulldata['due_in_date'].dt.month |
| 60 | + nulldata['year_of_due'] = nulldata['due_in_date'].dt.year |
| 61 | + |
| 62 | + nulldata['day_of_createdate'] = nulldata['baseline_create_date'].dt.day |
| 63 | + nulldata['month_of_createdate'] = nulldata['baseline_create_date'].dt.month |
| 64 | + nulldata['year_of_createdate'] = nulldata['baseline_create_date'].dt.year |
| 65 | + |
| 66 | + label_encoder = EncoderExt() |
| 67 | + label_encoder1 = EncoderExt() |
| 68 | + label_encoder1.fit(nulldata['cust_payment_terms']) |
| 69 | + label_encoder1.fit(nulldata['business_code']) |
| 70 | + label_encoder.fit(nulldata['name_customer']) |
| 71 | + nulldata['cust_payment_terms_enc'] = label_encoder1.transform(nulldata['cust_payment_terms']) |
| 72 | + nulldata['business_code_enc'] = label_encoder1.transform(nulldata['business_code']) |
| 73 | + nulldata['name_customer_enc'] = label_encoder.transform(nulldata['name_customer']) |
| 74 | + |
| 75 | + nulldata.drop( |
| 76 | + ['business_code', "baseline_create_date", "due_in_date", "posting_date", "name_customer", "clear_date", |
| 77 | + "cust_payment_terms"], axis=1, inplace=True) |
| 78 | + nulldata.drop(['day_of_cleardate', "month_of_cleardate", "year_of_cleardate"], axis=1, inplace=True) |
| 79 | + |
| 80 | + nulldata2 = nulldata[['cust_number', 'buisness_year', 'doc_id', 'converted_usd', |
| 81 | + 'business_code_enc', 'name_customer_enc', 'cust_payment_terms_enc', |
| 82 | + 'day_of_postingdate', 'month_of_postingdate', 'year_of_postingdate', |
| 83 | + 'day_of_createdate', 'month_of_createdate', 'year_of_createdate', |
| 84 | + 'day_of_due', 'month_of_due', 'year_of_due']] |
| 85 | + |
| 86 | + model = pickle.load(open("model.sav", 'rb')) |
| 87 | + |
| 88 | + final_result = model.predict(nulldata2) |
| 89 | + |
| 90 | + final_result = pd.Series(final_result, name='avg_delay') |
| 91 | + |
| 92 | + nulldata1.reset_index(drop=True, inplace=True) |
| 93 | + Final = nulldata1.merge(final_result, on=nulldata.index) |
| 94 | + |
| 95 | + Final['clear_date'] = pd.to_datetime(Final['due_in_date']) + pd.to_timedelta(Final['avg_delay'], unit='s') |
| 96 | + |
| 97 | + Final['avg_delay'] = Final.apply(lambda row: row.avg_delay // (24 * 3600), axis=1) |
| 98 | + |
| 99 | + bins = [0, 15, 30, 45, 60, 100] |
| 100 | + labels = ['0-15', '16-30', '31-45', '46-60', 'Greatar than 60'] |
| 101 | + Final['Aging Bucket'] = pd.cut(Final['avg_delay'], bins=bins, labels=labels, right=False) |
| 102 | + |
| 103 | + Final.drop(['key_0', "avg_delay"], axis=1, inplace=True) |
| 104 | + Final.to_csv("Final.csv") |
| 105 | + Final1 = Final[['doc_id', 'Aging Bucket']].copy() |
| 106 | + doc = Final1["doc_id"].tolist() |
| 107 | + aging = Final1["Aging Bucket"].tolist() |
| 108 | + a = [] |
| 109 | + for i in range(len(doc)): |
| 110 | + a.append({"doc_id": doc[i], "aging_bucket": str(aging[i])}) |
| 111 | + |
| 112 | + return a |
| 113 | + |
| 114 | + |
| 115 | +def doc_id_bucket(doc_list): |
| 116 | + print(" Predict") |
| 117 | + |
| 118 | + nulldata = pd.read_csv("nulldata.csv", index_col=0) |
| 119 | + # nulldata = pd.read_csv("nulldata.csv") |
| 120 | + |
| 121 | + nulldata["clear_date"] = pd.to_datetime(nulldata.clear_date) |
| 122 | + nulldata["posting_date"] = pd.to_datetime(nulldata.posting_date) |
| 123 | + nulldata["due_in_date"] = pd.to_datetime(nulldata.due_in_date) |
| 124 | + nulldata["baseline_create_date"] = pd.to_datetime(nulldata.baseline_create_date) |
| 125 | + |
| 126 | + nulldata1 = nulldata.copy() |
| 127 | + |
| 128 | + from sklearn.preprocessing import LabelEncoder |
| 129 | + business_codern = LabelEncoder() |
| 130 | + business_codern.fit(nulldata['business_code']) |
| 131 | + nulldata['business_code_enc'] = business_codern.transform(nulldata['business_code']) |
| 132 | + |
| 133 | + # if : nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCCA',"1").str.replace('CCU', |
| 134 | + # "2").str.replace('CC',"3").astype(int) |
| 135 | + nulldata['cust_number'] = nulldata['cust_number'].replace('CCCA02', "1").replace(['CCU001', 'CCU013', 'CCU002'], |
| 136 | + "2").replace(['CC3411', 'CC6000'], |
| 137 | + "3").astype(int) |
| 138 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCCA',"1").astype(int) |
| 139 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCU',"2").astype(int) |
| 140 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CC',"3").astype(int) |
| 141 | + |
| 142 | + nulldata['day_of_cleardate'] = nulldata['clear_date'].dt.day |
| 143 | + nulldata['month_of_cleardate'] = nulldata['clear_date'].dt.month |
| 144 | + nulldata['year_of_cleardate'] = nulldata['clear_date'].dt.year |
| 145 | + |
| 146 | + nulldata['day_of_postingdate'] = nulldata['posting_date'].dt.day |
| 147 | + nulldata['month_of_postingdate'] = nulldata['posting_date'].dt.month |
| 148 | + nulldata['year_of_postingdate'] = nulldata['posting_date'].dt.year |
| 149 | + |
| 150 | + nulldata['day_of_due'] = nulldata['due_in_date'].dt.day |
| 151 | + nulldata['month_of_due'] = nulldata['due_in_date'].dt.month |
| 152 | + nulldata['year_of_due'] = nulldata['due_in_date'].dt.year |
| 153 | + |
| 154 | + nulldata['day_of_createdate'] = nulldata['baseline_create_date'].dt.day |
| 155 | + nulldata['month_of_createdate'] = nulldata['baseline_create_date'].dt.month |
| 156 | + nulldata['year_of_createdate'] = nulldata['baseline_create_date'].dt.year |
| 157 | + |
| 158 | + label_encoder = EncoderExt() |
| 159 | + label_encoder1 = EncoderExt() |
| 160 | + label_encoder1.fit(nulldata['cust_payment_terms']) |
| 161 | + label_encoder1.fit(nulldata['business_code']) |
| 162 | + label_encoder.fit(nulldata['name_customer']) |
| 163 | + nulldata['cust_payment_terms_enc'] = label_encoder1.transform(nulldata['cust_payment_terms']) |
| 164 | + nulldata['business_code_enc'] = label_encoder1.transform(nulldata['business_code']) |
| 165 | + nulldata['name_customer_enc'] = label_encoder.transform(nulldata['name_customer']) |
| 166 | + |
| 167 | + nulldata.drop( |
| 168 | + ['business_code', "baseline_create_date", "due_in_date", "posting_date", "name_customer", "clear_date", |
| 169 | + "cust_payment_terms"], axis=1, inplace=True) |
| 170 | + nulldata.drop(['day_of_cleardate', "month_of_cleardate", "year_of_cleardate"], axis=1, inplace=True) |
| 171 | + |
| 172 | + nulldata2 = nulldata[['cust_number', 'buisness_year', 'doc_id', 'converted_usd', |
| 173 | + 'business_code_enc', 'name_customer_enc', 'cust_payment_terms_enc', |
| 174 | + 'day_of_postingdate', 'month_of_postingdate', 'year_of_postingdate', |
| 175 | + 'day_of_createdate', 'month_of_createdate', 'year_of_createdate', |
| 176 | + 'day_of_due', 'month_of_due', 'year_of_due']] |
| 177 | + |
| 178 | + model = pickle.load(open("model.sav", 'rb')) |
| 179 | + |
| 180 | + final_result = model.predict(nulldata2) |
| 181 | + |
| 182 | + final_result = pd.Series(final_result, name='avg_delay') |
| 183 | + |
| 184 | + nulldata1.reset_index(drop=True, inplace=True) |
| 185 | + Final = nulldata1.merge(final_result, on=nulldata.index) |
| 186 | + |
| 187 | + Final['clear_date'] = pd.to_datetime(Final['due_in_date']) + pd.to_timedelta(Final['avg_delay'], unit='s') |
| 188 | + |
| 189 | + Final['avg_delay'] = Final.apply(lambda row: row.avg_delay // (24 * 3600), axis=1) |
| 190 | + |
| 191 | + bins = [0, 15, 30, 45, 60, 100] |
| 192 | + labels = ['0-15', '16-30', '31-45', '46-60', 'Greatar than 60'] |
| 193 | + Final['Aging Bucket'] = pd.cut(Final['avg_delay'], bins=bins, labels=labels, right=False) |
| 194 | + |
| 195 | + Final.drop(['key_0', "avg_delay"], axis=1, inplace=True) |
| 196 | + Final.to_csv("Final.csv") |
| 197 | + Final1 = Final[['doc_id', 'Aging Bucket']].copy() |
| 198 | + doc = Final1["doc_id"].tolist() |
| 199 | + aging = Final1["Aging Bucket"].tolist() |
| 200 | + a = [] |
| 201 | + for i in range(len(doc)): |
| 202 | + if doc[i] in doc_list: |
| 203 | + a.append({"doc_id": str(doc[i]), "aging_bucket": str(aging[i])}) |
| 204 | + |
| 205 | + return a |
| 206 | + |
| 207 | + |
| 208 | +def alll(): |
| 209 | + print(" Predict") |
| 210 | + |
| 211 | + nulldata = pd.read_csv("nulldata.csv", index_col=0) |
| 212 | + # nulldata = pd.read_csv("nulldata.csv") |
| 213 | + |
| 214 | + nulldata["clear_date"] = pd.to_datetime(nulldata.clear_date) |
| 215 | + nulldata["posting_date"] = pd.to_datetime(nulldata.posting_date) |
| 216 | + nulldata["due_in_date"] = pd.to_datetime(nulldata.due_in_date) |
| 217 | + nulldata["baseline_create_date"] = pd.to_datetime(nulldata.baseline_create_date) |
| 218 | + |
| 219 | + nulldata1 = nulldata.copy() |
| 220 | + |
| 221 | + from sklearn.preprocessing import LabelEncoder |
| 222 | + business_codern = LabelEncoder() |
| 223 | + business_codern.fit(nulldata['business_code']) |
| 224 | + nulldata['business_code_enc'] = business_codern.transform(nulldata['business_code']) |
| 225 | + |
| 226 | + # if : nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCCA',"1").str.replace('CCU', |
| 227 | + # "2").str.replace('CC',"3").astype(int) |
| 228 | + nulldata['cust_number'] = nulldata['cust_number'].replace('CCCA02', "1").replace(['CCU001', 'CCU013', 'CCU002'], |
| 229 | + "2").replace(['CC3411', 'CC6000'], |
| 230 | + "3").astype(int) |
| 231 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCCA',"1").astype(int) |
| 232 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CCU',"2").astype(int) |
| 233 | + # nulldata['cust_number'] = nulldata['cust_number'].str.replace('CC',"3").astype(int) |
| 234 | + |
| 235 | + nulldata['day_of_cleardate'] = nulldata['clear_date'].dt.day |
| 236 | + nulldata['month_of_cleardate'] = nulldata['clear_date'].dt.month |
| 237 | + nulldata['year_of_cleardate'] = nulldata['clear_date'].dt.year |
| 238 | + |
| 239 | + nulldata['day_of_postingdate'] = nulldata['posting_date'].dt.day |
| 240 | + nulldata['month_of_postingdate'] = nulldata['posting_date'].dt.month |
| 241 | + nulldata['year_of_postingdate'] = nulldata['posting_date'].dt.year |
| 242 | + |
| 243 | + nulldata['day_of_due'] = nulldata['due_in_date'].dt.day |
| 244 | + nulldata['month_of_due'] = nulldata['due_in_date'].dt.month |
| 245 | + nulldata['year_of_due'] = nulldata['due_in_date'].dt.year |
| 246 | + |
| 247 | + nulldata['day_of_createdate'] = nulldata['baseline_create_date'].dt.day |
| 248 | + nulldata['month_of_createdate'] = nulldata['baseline_create_date'].dt.month |
| 249 | + nulldata['year_of_createdate'] = nulldata['baseline_create_date'].dt.year |
| 250 | + |
| 251 | + label_encoder = EncoderExt() |
| 252 | + label_encoder1 = EncoderExt() |
| 253 | + label_encoder1.fit(nulldata['cust_payment_terms']) |
| 254 | + label_encoder1.fit(nulldata['business_code']) |
| 255 | + label_encoder.fit(nulldata['name_customer']) |
| 256 | + nulldata['cust_payment_terms_enc'] = label_encoder1.transform(nulldata['cust_payment_terms']) |
| 257 | + nulldata['business_code_enc'] = label_encoder1.transform(nulldata['business_code']) |
| 258 | + nulldata['name_customer_enc'] = label_encoder.transform(nulldata['name_customer']) |
| 259 | + |
| 260 | + nulldata.drop( |
| 261 | + ["business_code", "baseline_create_date", "due_in_date", "posting_date", "name_customer", "clear_date", |
| 262 | + "cust_payment_terms", 'day_of_cleardate', "month_of_cleardate", "year_of_cleardate"], axis=1, |
| 263 | + inplace=True) |
| 264 | + |
| 265 | + nulldata2 = nulldata[['cust_number', 'buisness_year', 'doc_id', 'converted_usd', |
| 266 | + 'business_code_enc', 'name_customer_enc', 'cust_payment_terms_enc', |
| 267 | + 'day_of_postingdate', 'month_of_postingdate', 'year_of_postingdate', |
| 268 | + 'day_of_createdate', 'month_of_createdate', 'year_of_createdate', |
| 269 | + 'day_of_due', 'month_of_due', 'year_of_due']] |
| 270 | + |
| 271 | + model = pickle.load(open("model.sav", 'rb')) |
| 272 | + |
| 273 | + final_result = model.predict(nulldata2) |
| 274 | + |
| 275 | + final_result = pd.Series(final_result, name='avg_delay') |
| 276 | + |
| 277 | + nulldata1.reset_index(drop=True, inplace=True) |
| 278 | + |
| 279 | + Final = nulldata1.merge(final_result, on=nulldata.index) |
| 280 | + |
| 281 | + Final['clear_date'] = pd.to_datetime(Final['due_in_date']) + pd.to_timedelta(Final['avg_delay'], unit='s') |
| 282 | + |
| 283 | + Final['avg_delay'] = Final.apply(lambda row: row.avg_delay // (24 * 3600), axis=1) |
| 284 | + |
| 285 | + bins = [0, 15, 30, 45, 60, 100] |
| 286 | + labels = ['0-15', '16-30', '31-45', '46-60', 'Greatar than 60'] |
| 287 | + Final['Aging Bucket'] = pd.cut(Final['avg_delay'], bins=bins, labels=labels, right=False) |
| 288 | + |
| 289 | + Final.drop(['key_0', "avg_delay"], axis=1, inplace=True) |
| 290 | + Final.to_csv("Final.csv") |
| 291 | + Final1 = Final[['doc_id', 'Aging Bucket']].copy() |
| 292 | + doc = Final1["doc_id"].tolist() |
| 293 | + aging = Final1["Aging Bucket"].tolist() |
| 294 | + a = [] |
| 295 | + for i in range(len(doc)): |
| 296 | + # if doc[i] in doc_list: |
| 297 | + a.append({"doc_id": doc[i], "aging_bucket": str(aging[i])}) |
| 298 | + |
| 299 | + return a |
0 commit comments