20
20
import java .math .BigDecimal ;
21
21
import java .util .Arrays ;
22
22
import java .util .Collections ;
23
+ import java .util .LinkedHashSet ;
23
24
import java .util .List ;
25
+ import java .util .Set ;
24
26
import java .util .regex .Matcher ;
25
27
import java .util .regex .Pattern ;
26
28
import java .util .stream .Collectors ;
29
+ import java .util .stream .IntStream ;
27
30
import java .util .stream .Stream ;
28
31
29
32
import org .apache .commons .lang3 .StringUtils ;
40
43
import ru .mystamps .web .validation .ValidationRules ;
41
44
42
45
@ RequiredArgsConstructor
43
- @ SuppressWarnings ("PMD.TooManyMethods" )
46
+ @ SuppressWarnings ({ "PMD.TooManyMethods" , "PMD.GodClass" } )
44
47
public class SeriesInfoExtractorServiceImpl implements SeriesInfoExtractorService {
45
48
46
49
// Related to RELEASE_YEAR_REGEXP and used in unit tests.
@@ -56,6 +59,10 @@ public class SeriesInfoExtractorServiceImpl implements SeriesInfoExtractorServic
56
59
Pattern .CASE_INSENSITIVE | Pattern .UNICODE_CASE
57
60
);
58
61
62
+ // Regular expression matches range of Michel catalog numbers (from 1 to 9999).
63
+ private static final Pattern MICHEL_NUMBERS_REGEXP =
64
+ Pattern .compile ("#[ ]?([1-9][0-9]{0,3})-([1-9][0-9]{0,3})" );
65
+
59
66
// CheckStyle: ignore LineLength for next 4 lines
60
67
private static final Pattern VALID_CATEGORY_NAME_EN = Pattern .compile (ValidationRules .CATEGORY_NAME_EN_REGEXP );
61
68
private static final Pattern VALID_CATEGORY_NAME_RU = Pattern .compile (ValidationRules .CATEGORY_NAME_RU_REGEXP );
@@ -79,6 +86,7 @@ public SeriesExtractedInfo extract(RawParsedDataDto data) {
79
86
Integer releaseYear = extractReleaseYear (data .getReleaseYear ());
80
87
Integer quantity = extractQuantity (data .getQuantity ());
81
88
Boolean perforated = extractPerforated (data .getPerforated ());
89
+ Set <String > michelNumbers = extractMichelNumbers (data .getMichelNumbers ());
82
90
Integer sellerId = extractSeller (data .getSellerName (), data .getSellerUrl ());
83
91
String sellerName = extractSellerName (sellerId , data .getSellerName ());
84
92
String sellerUrl = extractSellerUrl (sellerId , data .getSellerUrl ());
@@ -91,6 +99,7 @@ public SeriesExtractedInfo extract(RawParsedDataDto data) {
91
99
releaseYear ,
92
100
quantity ,
93
101
perforated ,
102
+ michelNumbers ,
94
103
sellerId ,
95
104
sellerName ,
96
105
sellerUrl ,
@@ -254,6 +263,34 @@ protected Boolean extractPerforated(String fragment) {
254
263
return null ;
255
264
}
256
265
266
+ // @todo #694 SeriesInfoExtractorServiceImpl.extractMichelNumbers(): add unit tests
267
+ // @todo #694 SeriesInfoExtractorServiceImpl: support for a single Michel number
268
+ // @todo #694 SeriesInfoExtractorServiceImpl: support for a comma separated Michel numbers
269
+ protected Set <String > extractMichelNumbers (String fragment ) {
270
+ if (StringUtils .isBlank (fragment )) {
271
+ return Collections .emptySet ();
272
+ }
273
+
274
+ log .debug ("Determining michel numbers from a fragment: '{}'" , fragment );
275
+
276
+ Matcher matcher = MICHEL_NUMBERS_REGEXP .matcher (fragment );
277
+ if (matcher .find ()) {
278
+ Integer begin = Integer .valueOf (matcher .group (1 ));
279
+ Integer end = Integer .valueOf (matcher .group (2 ));
280
+ if (begin < end ) {
281
+ Set <String > numbers = IntStream .rangeClosed (begin , end )
282
+ .mapToObj (String ::valueOf )
283
+ .collect (Collectors .toCollection (LinkedHashSet ::new ));
284
+ log .debug ("Extracted michel numbers: {}" , numbers );
285
+ return numbers ;
286
+ }
287
+ }
288
+
289
+ log .debug ("Could not extract michel numbers from a fragment" );
290
+
291
+ return Collections .emptySet ();
292
+ }
293
+
257
294
public Integer extractSeller (String name , String url ) {
258
295
if (StringUtils .isBlank (name ) || StringUtils .isBlank (url )) {
259
296
return null ;
0 commit comments