Skip to content

Commit 0e85423

Browse files
committed
Import series: add support for extracting issue year.
Addressed to #693
1 parent ffc0acd commit 0e85423

22 files changed

+143
-20
lines changed

src/main/java/ru/mystamps/web/controller/SeriesController.java

+5
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ public void showForm(
131131
@Category @RequestParam(name = "category", required = false) LinkEntityDto category,
132132
@Country @RequestParam(name = "country", required = false) LinkEntityDto country,
133133
@RequestParam(name = "image_url", required = false) String imageUrl,
134+
@RequestParam(name = "year", required = false) Integer year,
134135
Model model,
135136
Locale userLocale) {
136137

@@ -161,6 +162,10 @@ public void showForm(
161162
addSeriesForm.setImageUrl(imageUrl);
162163
}
163164

165+
if (year != null) {
166+
addSeriesForm.setYear(year);
167+
}
168+
164169
model.addAttribute("addSeriesForm", addSeriesForm);
165170
}
166171

src/main/java/ru/mystamps/web/controller/event/DownloadingSucceededEventListener.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,8 @@ public void onApplicationEvent(DownloadingSucceeded event) {
8989
RawParsedDataDto data = new RawParsedDataDto(
9090
info.getCategoryName(),
9191
info.getCountryName(),
92-
info.getImageUrl()
92+
info.getImageUrl(),
93+
info.getIssueDate()
9394
);
9495
importService.saveParsedData(requestId, data);
9596
}

src/main/java/ru/mystamps/web/dao/dto/ParsedDataDto.java

+1
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,5 @@ public class ParsedDataDto {
2626
private final EntityWithSlugDto category;
2727
private final EntityWithSlugDto country;
2828
private final String imageUrl;
29+
private final Integer issueYear;
2930
}

src/main/java/ru/mystamps/web/dao/dto/SaveParsedDataDbDto.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@ public class SaveParsedDataDbDto {
3030
private Integer categoryId;
3131
private Integer countryId;
3232
private String imageUrl;
33+
private Integer releaseYear;
3334
private Date createdAt;
3435
private Date updatedAt;
3536

3637
public boolean hasAtLeastOneFieldFilled() {
3738
return categoryId != null
3839
|| countryId != null
39-
|| imageUrl != null;
40+
|| imageUrl != null
41+
|| releaseYear != null;
4042
}
4143

4244
}

src/main/java/ru/mystamps/web/dao/impl/JdbcSeriesImportDao.java

+1
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ public void addParsedContent(Integer requestId, SaveParsedDataDbDto data) {
170170
params.put("category_id", data.getCategoryId());
171171
params.put("country_id", data.getCountryId());
172172
params.put("image_url", data.getImageUrl());
173+
params.put("release_year", data.getReleaseYear());
173174
params.put("created_at", data.getCreatedAt());
174175
params.put("updated_at", data.getUpdatedAt());
175176

src/main/java/ru/mystamps/web/dao/impl/RowMappers.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,9 @@ public static ParsedDataDto forParsedDataDto(ResultSet rs, int i) throws SQLExce
288288
}
289289

290290
String imageUrl = rs.getString("image_url");
291+
Integer releaseYear = JdbcUtils.getInteger(rs, "release_year");
291292

292-
return new ParsedDataDto(category, country, imageUrl);
293+
return new ParsedDataDto(category, country, imageUrl, releaseYear);
293294
}
294295

295296
}

src/main/java/ru/mystamps/web/service/SeriesImportServiceImpl.java

+5
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,11 @@ public void saveParsedData(Integer requestId, RawParsedDataDto data) {
147147
processedData.setCountryId(countryIds.get(0));
148148
}
149149

150+
Integer releaseYear = extractorService.extractReleaseYear(data.getReleaseYear());
151+
if (releaseYear != null) {
152+
processedData.setReleaseYear(releaseYear);
153+
}
154+
150155
// TODO: handle it gracefully by publishing ParsingFailed event
151156
Validate.validState(processedData.hasAtLeastOneFieldFilled(), "");
152157

src/main/java/ru/mystamps/web/service/SeriesInfoExtractorService.java

+1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@
2222
public interface SeriesInfoExtractorService {
2323
List<Integer> extractCategory(String fragment);
2424
List<Integer> extractCountry(String fragment);
25+
Integer extractReleaseYear(String fragment);
2526
}

src/main/java/ru/mystamps/web/service/SeriesInfoExtractorServiceImpl.java

+33
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.HashSet;
2222
import java.util.List;
2323
import java.util.Set;
24+
import java.util.regex.Pattern;
2425

2526
import org.apache.commons.lang3.StringUtils;
2627

@@ -33,6 +34,9 @@
3334
@RequiredArgsConstructor
3435
public class SeriesInfoExtractorServiceImpl implements SeriesInfoExtractorService {
3536

37+
// Regular expression matches release year of the stamps (from 1840 till 2099).
38+
private static final Pattern RELEASE_YEAR_REGEXP = Pattern.compile("18[4-9][0-9]|19[0-9]{2}|20[0-9]{2}");
39+
3640
private final Logger log;
3741
private final CategoryService categoryService;
3842
private final CountryService countryService;
@@ -107,4 +111,33 @@ public List<Integer> extractCountry(String fragment) {
107111
return Collections.emptyList();
108112
}
109113

114+
@Override
115+
public Integer extractReleaseYear(String fragment) {
116+
if (StringUtils.isBlank(fragment)) {
117+
return null;
118+
}
119+
120+
log.debug("Determining release year from a fragment: '{}'", fragment);
121+
122+
String[] candidates = StringUtils.split(fragment);
123+
for (String candidate : candidates) {
124+
if (!RELEASE_YEAR_REGEXP.matcher(candidate).matches()) {
125+
continue;
126+
}
127+
128+
try {
129+
Integer year = Integer.valueOf(candidate);
130+
log.debug("Release year is {}", year);
131+
return year;
132+
133+
} catch (NumberFormatException ignored) {
134+
// continue with the next element
135+
}
136+
}
137+
138+
log.debug("Could not extract release year from a fragment");
139+
140+
return null;
141+
}
142+
110143
}

src/main/java/ru/mystamps/web/service/dto/RawParsedDataDto.java

+1
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,5 @@ public class RawParsedDataDto {
2626
private final String categoryName;
2727
private final String countryName;
2828
private final String imageUrl;
29+
private final String releaseYear;
2930
}

src/main/java/ru/mystamps/web/util/extractor/SeriesInfo.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,16 @@ public class SeriesInfo {
2929
private String categoryName;
3030
private String countryName;
3131
private String imageUrl;
32+
private String issueDate;
3233

3334
/**
3435
* Check whether any info about a series is available.
3536
*/
3637
public boolean isEmpty() {
3738
return categoryName == null
3839
&& countryName == null
39-
&& imageUrl == null;
40+
&& imageUrl == null
41+
&& issueDate == null;
4042
}
4143

4244
}

src/main/java/ru/mystamps/web/util/extractor/SiteParser.java

+24
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public class SiteParser {
4141
private String countryLocator;
4242
private String shortDescriptionLocator;
4343
private String imageUrlLocator;
44+
private String issueDateLocator;
4445

4546
public boolean setField(String name, String value) {
4647
Validate.validState(StringUtils.isNotBlank(name), "Field name must be non-null");
@@ -80,6 +81,11 @@ public boolean setField(String name, String value) {
8081
valid = true;
8182
break;
8283

84+
case "issue-date-locator":
85+
setIssueDateLocator(value);
86+
valid = true;
87+
break;
88+
8389
default:
8490
break;
8591
}
@@ -95,6 +101,7 @@ public boolean isFullyInitialized() {
95101
|| countryLocator != null
96102
|| shortDescriptionLocator != null
97103
|| imageUrlLocator != null
104+
|| issueDateLocator != null
98105
);
99106
}
100107

@@ -122,6 +129,7 @@ public SeriesInfo parse(String htmlPage) {
122129
info.setCategoryName(extractCategory(body));
123130
info.setCountryName(extractCountry(body));
124131
info.setImageUrl(extractImageUrl(body));
132+
info.setIssueDate(extractIssueDate(body));
125133

126134
return info;
127135
}
@@ -178,4 +186,20 @@ private String extractImageUrl(Element body) {
178186
return StringUtils.trimToNull(url);
179187
}
180188

189+
private String extractIssueDate(Element body) {
190+
String locator = ObjectUtils.firstNonNull(issueDateLocator, shortDescriptionLocator);
191+
if (locator == null) {
192+
return null;
193+
}
194+
195+
Elements elements = body.select(locator);
196+
if (elements.isEmpty()) {
197+
return null;
198+
}
199+
200+
String date = elements.first().text();
201+
LOG.debug("Extracted issue date: '{}'", date);
202+
return date;
203+
}
204+
181205
}

src/main/resources/application-test.properties

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ app.site-parser[0].matched-url = http://127.0.0.1:8080
4848
app.site-parser[0].category-locator = #category_name
4949
app.site-parser[0].country-locator = #country_name
5050
app.site-parser[0].image-url-locator = #series-image-link-1
51+
app.site-parser[0].issue-date-locator = #issue_date
5152

5253
app.site-parser[1].name = localhost-by-name
5354
app.site-parser[1].matched-url = http://localhost:8080

src/main/resources/application-travis.properties

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ app.site-parser[0].matched-url = http://127.0.0.1:8080
4545
app.site-parser[0].category-locator = #category_name
4646
app.site-parser[0].country-locator = #country_name
4747
app.site-parser[0].image-url-locator = #series-image-link-1
48+
app.site-parser[0].issue-date-locator = #issue_date
4849

4950
app.site-parser[1].name = localhost-by-name
5051
app.site-parser[1].matched-url = http://localhost:8080

src/main/resources/liquibase/version/0.4.xml

+1
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,6 @@
3434
<include file="0.4/2017-10-31--non_nullable_catalog_codes.xml" relativeToChangelogFile="true" />
3535
<include file="0.4/2017-11-08--import_series.xml" relativeToChangelogFile="true" />
3636
<include file="0.4/2017-11-09--series_import_requests_url_length.xml" relativeToChangelogFile="true" />
37+
<include file="0.4/2017-11-09--series_import_parsed_data_release_year_field.xml" relativeToChangelogFile="true" />
3738

3839
</databaseChangeLog>

src/main/resources/liquibase/version/0.4/2017-11-08--import_series.xml

+1
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@
139139
<column name="perforated" valueBoolean="false" />
140140
<column name="category_id" valueNumeric="1" />
141141
<column name="country_id" valueNumeric="1" />
142+
<column name="release_year" valueNumeric="2000" />
142143
<column name="created_at" valueComputed="${NOW}" />
143144
<column name="created_by" valueComputed="(SELECT id FROM users WHERE role = 'USER' ORDER by id LIMIT 1)" />
144145
<column name="updated_at" valueComputed="${NOW}" />
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<databaseChangeLog
3+
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
4+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
5+
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
6+
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-3.0.xsd">
7+
8+
<changeSet id="add-release_year-column-to-series_import_parsed_data-table" author="php-coder" context="scheme">
9+
10+
<addColumn tableName="series_import_parsed_data">
11+
<column name="release_year" type="INTEGER" />
12+
</addColumn>
13+
14+
</changeSet>
15+
16+
</databaseChangeLog>

src/main/resources/sql/series_import_request_dao_queries.properties

+3
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ INSERT \
6767
, category_id \
6868
, country_id \
6969
, image_url \
70+
, release_year \
7071
, created_at \
7172
, updated_at \
7273
) \
@@ -75,6 +76,7 @@ VALUES \
7576
, :category_id \
7677
, :country_id \
7778
, :image_url \
79+
, :release_year \
7880
, :created_at \
7981
, :updated_at \
8082
)
@@ -85,6 +87,7 @@ series_import_requests.find_parsed_data_by_request_id = \
8587
, CASE WHEN 'ru' = :lang THEN COALESCE(cat.name_ru, cat.name) ELSE cat.name END AS category_name \
8688
, count.slug AS country_slug \
8789
, CASE WHEN 'ru' = :lang THEN COALESCE(count.name_ru, count.name) ELSE count.name END AS country_name \
90+
, pd.release_year \
8891
FROM series_import_parsed_data pd \
8992
LEFT JOIN categories cat \
9093
ON cat.id = pd.category_id \

src/main/webapp/WEB-INF/views/series/import/info.html

+13-2
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ <h3 th:text="#{t_gathered_data}">
9999
<form id="start-import-series-form" method="get" action="../add.html" th:action="@{${ADD_SERIES_PAGE}}">
100100
<div class="table-responsive">
101101
<table class="table"
102-
th:with="hasCategory=${parsedData.category != null},hasCountry=${parsedData.country != null},hasImageUrl=${parsedData.imageUrl != null}">
102+
th:with="hasCategory=${parsedData.category != null},hasCountry=${parsedData.country != null},hasImageUrl=${parsedData.imageUrl != null},hasIssueYear=${parsedData.issueYear != null}">
103103

104104
<tr th:if="${hasCategory}">
105105
<th th:text="#{t_category}">Category</th>
@@ -118,7 +118,12 @@ <h3 th:text="#{t_gathered_data}">
118118
<td id="parsed-image-url" th:text="${parsedData.imageUrl}">
119119
http://localhost:8080/image/1
120120
</td>
121-
121+
</tr>
122+
<tr th:if="${hasIssueYear}">
123+
<th th:text="#{t_year}">Year</th>
124+
<td id="parsed-issue-year" th:text="${parsedData.issueYear}">
125+
1960
126+
</td>
122127
</tr>
123128
<tr>
124129
<td></td>
@@ -141,6 +146,12 @@ <h3 th:text="#{t_gathered_data}">
141146
value="http://localhost:8080/image/1"
142147
th:if="${hasImageUrl}"
143148
th:value="${parsedData.imageUrl}" />
149+
<input
150+
type="hidden"
151+
name="year"
152+
value="1960"
153+
th:if="${hasIssueYear}"
154+
th:value="${parsedData.issueYear}" />
144155
<input
145156
type="submit"
146157
class="btn btn-primary"

src/test/java/ru/mystamps/web/service/TestObjects.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,8 @@ public static ParsedDataDto createParsedDataDto() {
196196
return new ParsedDataDto(
197197
new EntityWithSlugDto(categoryName, categorySlug),
198198
new EntityWithSlugDto(countryName, countrySlug),
199-
Random.url()
199+
Random.url(),
200+
Random.issueYear()
200201
);
201202
}
202203

src/test/java/ru/mystamps/web/tests/Random.java

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*/
1818
package ru.mystamps.web.tests;
1919

20+
import java.time.Year;
2021
import java.util.HashSet;
2122
import java.util.List;
2223
import java.util.Set;
@@ -84,6 +85,10 @@ public static String countryName() {
8485
.english();
8586
}
8687

88+
public static Integer issueYear() {
89+
return between(ValidationRules.MIN_RELEASE_YEAR, Year.now().getValue()).integer();
90+
}
91+
8792
public static String importRequestStatus() {
8893
return sample(STATUSES);
8994
}

0 commit comments

Comments
 (0)