@@ -72,6 +72,7 @@ def __init__(
72
72
y_train = None ,
73
73
y_test = None ,
74
74
y_valid = None ,
75
+ max_len = 250 ,
75
76
digits_word = "$NUM$" ,
76
77
ukn_words = "out-of-vocabulary" ,
77
78
padding_style = "pre" ,
@@ -126,9 +127,8 @@ def __init__(
126
127
self .X_validation = list ()
127
128
self .X_testing = list ()
128
129
129
- self .max_len = int ()
130
+ self .max_len = max_len
130
131
self .max_char = int ()
131
- self .max_words = int ()
132
132
133
133
# Defined in prepare_data
134
134
@@ -156,7 +156,7 @@ def prepare_data(self, save=False):
156
156
Save(bool): If True, then data objects will be saved to
157
157
`self.output_path`.
158
158
"""
159
- self .max_len = max ([len (xx ) for xx in self .X_train ])
159
+ # self.max_len = max([len(xx) for xx in self.X_train])
160
160
161
161
self .X_train_merged , self .X_test_merged , self .X_valid_merged = merge_digits (
162
162
[self .X_train , self .X_test , self .X_valid ], self .digits_word
@@ -246,14 +246,14 @@ def prepare_data(self, save=False):
246
246
# Create character level data
247
247
248
248
# Create the character level data
249
- self .char2ind , self .max_words , self . max_char = character_index (
249
+ self .char2ind , self .max_char = character_index (
250
250
self .X_train , self .digits_word
251
251
)
252
252
253
253
self .X_train_char = character_data (
254
254
self .X_train ,
255
255
self .char2ind ,
256
- self .max_words ,
256
+ self .max_len ,
257
257
self .max_char ,
258
258
self .digits_word ,
259
259
self .padding_style ,
@@ -262,7 +262,7 @@ def prepare_data(self, save=False):
262
262
self .X_test_char = character_data (
263
263
self .X_test ,
264
264
self .char2ind ,
265
- self .max_words ,
265
+ self .max_len ,
266
266
self .max_char ,
267
267
self .digits_word ,
268
268
self .padding_style ,
@@ -271,7 +271,7 @@ def prepare_data(self, save=False):
271
271
self .X_valid_char = character_data (
272
272
self .X_valid ,
273
273
self .char2ind ,
274
- self .max_words ,
274
+ self .max_len ,
275
275
self .max_char ,
276
276
self .digits_word ,
277
277
self .padding_style ,
@@ -292,7 +292,6 @@ def prepare_data(self, save=False):
292
292
write_pickle (self .char2ind , "char2ind.pickle" , path = self .output_path )
293
293
294
294
maxes = {
295
- "max_words" : self .max_words ,
296
295
"max_char" : self .max_char ,
297
296
"max_len" : self .max_len ,
298
297
}
@@ -317,11 +316,9 @@ def load_data(self, out_path):
317
316
318
317
self .max_len = maxes ["max_len" ]
319
318
self .max_char = maxes ["max_char" ]
320
- self .max_words = maxes ["max_words" ]
321
319
322
320
logger .debug ("Setting max_len to %s" , self .max_len )
323
321
logger .debug ("Setting max_char to %s" , self .max_char )
324
- logger .debug ("Setting max_words to %s" , self .max_words )
325
322
326
323
def build_model (
327
324
self ,
@@ -370,7 +367,7 @@ def build_model(
370
367
371
368
if word_embeddings :
372
369
373
- word_input = Input ((self .max_words ,))
370
+ word_input = Input ((self .max_len ,))
374
371
inputs .append (word_input )
375
372
376
373
# TODO: More sensible handling of options for pretrained embedding.
@@ -406,7 +403,7 @@ def build_model(
406
403
407
404
if self .max_char != 0 :
408
405
409
- character_input = Input ((self .max_words , self .max_char ,))
406
+ character_input = Input ((self .max_len , self .max_char ,))
410
407
411
408
char_embedding = self .character_embedding_layer (
412
409
char_embedding_type = char_embedding_type ,
@@ -474,7 +471,7 @@ def build_model(
474
471
475
472
self .model = model
476
473
477
- logger .debug (self .model .summary (line_length = 150 ))
474
+ # logger.debug(self.model.summary(line_length=150))
478
475
479
476
def train_model (
480
477
self , epochs = 25 , batch_size = 100 , early_stopping_patience = 5 , metric = "val_f1"
@@ -613,10 +610,6 @@ def evaluate(
613
610
614
611
# Compute classification report
615
612
616
- # Initialise list for storing predictions which will be written
617
- # to tsv file.
618
-
619
-
620
613
for i , y_target in enumerate (self .y_valid_encoded ):
621
614
622
615
# Compute predictions, flatten
@@ -970,7 +963,7 @@ def prepare_X_data(self, X):
970
963
X_char = character_data (
971
964
X ,
972
965
self .char2ind ,
973
- self .max_words ,
966
+ self .max_len ,
974
967
self .max_char ,
975
968
self .digits_word ,
976
969
self .padding_style ,
0 commit comments