@@ -72,6 +72,7 @@ def __init__(
72
72
y_train = None ,
73
73
y_test = None ,
74
74
y_valid = None ,
75
+ max_len = None ,
75
76
digits_word = "$NUM$" ,
76
77
ukn_words = "out-of-vocabulary" ,
77
78
padding_style = "pre" ,
@@ -126,7 +127,7 @@ def __init__(
126
127
self .X_validation = list ()
127
128
self .X_testing = list ()
128
129
129
- self .max_len = int ()
130
+ self .max_len = max_len
130
131
self .max_char = int ()
131
132
self .max_words = int ()
132
133
@@ -156,7 +157,7 @@ def prepare_data(self, save=False):
156
157
Save(bool): If True, then data objects will be saved to
157
158
`self.output_path`.
158
159
"""
159
- self .max_len = max ([len (xx ) for xx in self .X_train ])
160
+ # self.max_len = max([len(xx) for xx in self.X_train])
160
161
161
162
self .X_train_merged , self .X_test_merged , self .X_valid_merged = merge_digits (
162
163
[self .X_train , self .X_test , self .X_valid ], self .digits_word
@@ -253,7 +254,7 @@ def prepare_data(self, save=False):
253
254
self .X_train_char = character_data (
254
255
self .X_train ,
255
256
self .char2ind ,
256
- self .max_words ,
257
+ self .max_len ,
257
258
self .max_char ,
258
259
self .digits_word ,
259
260
self .padding_style ,
@@ -262,7 +263,7 @@ def prepare_data(self, save=False):
262
263
self .X_test_char = character_data (
263
264
self .X_test ,
264
265
self .char2ind ,
265
- self .max_words ,
266
+ self .max_len ,
266
267
self .max_char ,
267
268
self .digits_word ,
268
269
self .padding_style ,
@@ -271,7 +272,7 @@ def prepare_data(self, save=False):
271
272
self .X_valid_char = character_data (
272
273
self .X_valid ,
273
274
self .char2ind ,
274
- self .max_words ,
275
+ self .max_len ,
275
276
self .max_char ,
276
277
self .digits_word ,
277
278
self .padding_style ,
@@ -370,7 +371,7 @@ def build_model(
370
371
371
372
if word_embeddings :
372
373
373
- word_input = Input ((self .max_words ,))
374
+ word_input = Input ((self .max_len ,))
374
375
inputs .append (word_input )
375
376
376
377
# TODO: More sensible handling of options for pretrained embedding.
@@ -406,7 +407,7 @@ def build_model(
406
407
407
408
if self .max_char != 0 :
408
409
409
- character_input = Input ((self .max_words , self .max_char ,))
410
+ character_input = Input ((self .max_len , self .max_char ,))
410
411
411
412
char_embedding = self .character_embedding_layer (
412
413
char_embedding_type = char_embedding_type ,
@@ -474,7 +475,7 @@ def build_model(
474
475
475
476
self .model = model
476
477
477
- logger .debug (self .model .summary (line_length = 150 ))
478
+ # logger.debug(self.model.summary(line_length=150))
478
479
479
480
def train_model (
480
481
self , epochs = 25 , batch_size = 100 , early_stopping_patience = 5 , metric = "val_f1"
@@ -970,7 +971,7 @@ def prepare_X_data(self, X):
970
971
X_char = character_data (
971
972
X ,
972
973
self .char2ind ,
973
- self .max_words ,
974
+ self .max_len ,
974
975
self .max_char ,
975
976
self .digits_word ,
976
977
self .padding_style ,
0 commit comments