20
20
import java .util .Arrays ;
21
21
import java .util .Collections ;
22
22
import java .util .List ;
23
+ import java .util .function .Predicate ;
23
24
import java .util .regex .Matcher ;
24
25
import java .util .regex .Pattern ;
25
26
import java .util .stream .Collectors ;
34
35
35
36
import ru .mystamps .web .service .dto .RawParsedDataDto ;
36
37
import ru .mystamps .web .service .dto .SeriesExtractedInfo ;
38
+ import ru .mystamps .web .validation .ValidationRules ;
37
39
38
40
@ RequiredArgsConstructor
41
+ @ SuppressWarnings ({
42
+ // predicate names in camel case more readable than in uppercase
43
+ "PMD.VariableNamingConventions" , "checkstyle:constantname" ,
44
+ // these "||" on the same line because it's more readable
45
+ "checkstyle:operatorwrap"
46
+ })
39
47
public class SeriesInfoExtractorServiceImpl implements SeriesInfoExtractorService {
40
48
41
49
// Related to RELEASE_YEAR_REGEXP and used in unit tests.
@@ -51,6 +59,28 @@ public class SeriesInfoExtractorServiceImpl implements SeriesInfoExtractorServic
51
59
Pattern .CASE_INSENSITIVE | Pattern .UNICODE_CASE
52
60
);
53
61
62
+ // CheckStyle: ignore LineLength for next 9 lines
63
+ private static final Pattern VALID_CATEGORY_NAME_EN = Pattern .compile (ValidationRules .CATEGORY_NAME_EN_REGEXP );
64
+ private static final Pattern VALID_CATEGORY_NAME_RU = Pattern .compile (ValidationRules .CATEGORY_NAME_RU_REGEXP );
65
+ private static final Pattern VALID_COUNTRY_NAME_EN = Pattern .compile (ValidationRules .COUNTRY_NAME_EN_REGEXP );
66
+ private static final Pattern VALID_COUNTRY_NAME_RU = Pattern .compile (ValidationRules .COUNTRY_NAME_RU_REGEXP );
67
+
68
+ private static final Predicate <String > tooShortCategoryName = name -> name .length () >= ValidationRules .CATEGORY_NAME_MIN_LENGTH ;
69
+ private static final Predicate <String > tooLongCategoryName = name -> name .length () <= ValidationRules .CATEGORY_NAME_MAX_LENGTH ;
70
+ private static final Predicate <String > tooShortCountryName = name -> name .length () >= ValidationRules .COUNTRY_NAME_MIN_LENGTH ;
71
+ private static final Predicate <String > tooLongCountryName = name -> name .length () <= ValidationRules .COUNTRY_NAME_MAX_LENGTH ;
72
+
73
+ private static final Predicate <String > invalidCategoryName = name ->
74
+ VALID_CATEGORY_NAME_EN .matcher (name ).matches () ||
75
+ VALID_CATEGORY_NAME_RU .matcher (name ).matches ();
76
+
77
+ private static final Predicate <String > invalidCountryName = name ->
78
+ VALID_COUNTRY_NAME_EN .matcher (name ).matches () ||
79
+ VALID_COUNTRY_NAME_RU .matcher (name ).matches ();
80
+
81
+ // Max number of candidates that will be used in the SQL query within IN() statement.
82
+ private static final long MAX_CANDIDATES_FOR_LOOKUP = 50 ;
83
+
54
84
private final Logger log ;
55
85
private final CategoryService categoryService ;
56
86
private final CountryService countryService ;
@@ -74,6 +104,8 @@ public SeriesExtractedInfo extract(RawParsedDataDto data) {
74
104
);
75
105
}
76
106
107
+ // CheckStyle: ignore LineLength for next 1 line
108
+ // @todo #821 SeriesInfoExtractorServiceImpl.extractCategory(): add unit tests for filtering invalid names
77
109
protected List <Integer > extractCategory (String fragment ) {
78
110
if (StringUtils .isBlank (fragment )) {
79
111
return Collections .emptyList ();
@@ -83,7 +115,11 @@ protected List<Integer> extractCategory(String fragment) {
83
115
84
116
String [] candidates = StringUtils .split (fragment , "\n \t ," );
85
117
List <String > uniqueCandidates = Arrays .stream (candidates )
118
+ .filter (tooShortCategoryName )
119
+ .filter (tooLongCategoryName )
120
+ .filter (invalidCategoryName )
86
121
.distinct ()
122
+ .limit (MAX_CANDIDATES_FOR_LOOKUP )
87
123
.collect (Collectors .toList ());
88
124
89
125
log .debug ("Possible candidates: {}" , uniqueCandidates );
@@ -108,6 +144,8 @@ protected List<Integer> extractCategory(String fragment) {
108
144
return Collections .emptyList ();
109
145
}
110
146
147
+ // CheckStyle: ignore LineLength for next 1 line
148
+ // @todo #821 SeriesInfoExtractorServiceImpl.extractCountry(): add unit tests for filtering invalid names
111
149
protected List <Integer > extractCountry (String fragment ) {
112
150
if (StringUtils .isBlank (fragment )) {
113
151
return Collections .emptyList ();
@@ -117,7 +155,11 @@ protected List<Integer> extractCountry(String fragment) {
117
155
118
156
String [] candidates = StringUtils .split (fragment , "\n \t ," );
119
157
List <String > uniqueCandidates = Arrays .stream (candidates )
158
+ .filter (tooShortCountryName )
159
+ .filter (tooLongCountryName )
160
+ .filter (invalidCountryName )
120
161
.distinct ()
162
+ .limit (MAX_CANDIDATES_FOR_LOOKUP )
121
163
.collect (Collectors .toList ());
122
164
123
165
log .debug ("Possible candidates: {}" , uniqueCandidates );
0 commit comments