Skip to content

Commit e15196c

Browse files
committed
Add additional email address formats
1 parent b20b405 commit e15196c

File tree

4 files changed

+299
-20
lines changed

4 files changed

+299
-20
lines changed

CHANGELOG.md

+26
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,32 @@ Thanks, @Sprokof, for contributing (introducing `removeDots` and `adjustCase` op
105105
| removeSubAddress | Remove any sub-addressing (or tagged-addressing) from the local-part of the address. For example, `[email protected]` will become `[email protected]` | `removeSubAddress()` or `removeSubAddress(String)` |
106106
| performUnicodeNormalization | Perform unicode normalization on the local-part of the email address | `performUnicodeNormalization()` or `performUnicodeNormalization(Normalizer.Form)` |
107107

108+
109+
### Additional Address Formats
110+
111+
Version 2.0.0 introduces new additional email address formats that can be obtained from
112+
the `Email` object (similar to the `normalize()` method).
113+
114+
- `Email#reference()` returns an MD5 hash of the normalized email address.
115+
116+
```
117+
"[email protected]" => "1aedb8d9dc4751e229a335e371db8058"
118+
```
119+
120+
- `Email#redacted()` returns a version of the normalized email address where the local-part
121+
is replaced with the SHA-1 hash of the local-part.
122+
123+
```
124+
"[email protected]" => "{a94a8fe5ccb19ba61c4c0873d391e987982fbbd3}@gmail.com"
125+
```
126+
127+
- `Email#munged()` returns a version of the normalized email address where the local-part
128+
and domain are obfuscated with five asterisk characters.
129+
130+
```
131+
"[email protected]" => "te*****@gm*****"
132+
```
133+
108134
---
109135
## 1.6.3
110136

src/main/java/com/sanctionco/jmail/Email.java

+179-20
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
import com.sanctionco.jmail.normalization.CaseOption;
44
import com.sanctionco.jmail.normalization.NormalizationOptions;
5-
import com.sanctionco.jmail.normalization.NormalizationOptionsBuilder;
65

6+
import java.nio.charset.StandardCharsets;
7+
import java.security.MessageDigest;
8+
import java.security.NoSuchAlgorithmException;
79
import java.text.Normalizer;
810
import java.util.Collections;
911
import java.util.List;
@@ -235,40 +237,178 @@ public TopLevelDomain topLevelDomain() {
235237
}
236238

237239
/**
238-
* <p>Return a "normalized" version of this email address. The actual result of normalization
239-
* depends on the configured normalization options (see {@link NormalizationOptions}),
240-
* but in general this method returns a version of the email address that is the same as
241-
* the original email address, except that all comments and optional parts
242-
* (identifiers, source routing) are removed. For example, the address
243-
* {@code "test@(comment)example.com"} will return {@code "[email protected]"}.</p>
240+
* <p>Return a "normalized" version of this email address. This method returns a version of the
241+
* email address that is the same as the original email address, except:</p>
242+
*
243+
* <ul>
244+
* <li>All comments are removed</li>
245+
* <li>All identifiers or source routing are removed</li>
246+
* <li>Any unnecessary quotes within the local-part are removed</li>
247+
* <li>The entire address is lowercased</li>
248+
* </ul>
244249
*
245-
* <p>This method uses the default set of {@link NormalizationOptions}. This default set
246-
* of options can be adjusted using system properties. See {@link NormalizationOptions}
247-
* for more details on which properties to set to adjust the defaults.</p>
250+
* <p>For example, the address {@code "tEST@(comment)example.com"} will return
251+
* {@code "[email protected]"}.</p>
248252
*
249-
* <p>Alternatively, one can use the {@link Email#normalized(NormalizationOptions)} method
250-
* and pass in a custom set of options to adjust the behavior.</p>
253+
* <p>This method uses the default set of {@link NormalizationOptions} when performing
254+
* normalization. Use {@link #normalized(NormalizationOptions)} instead of this method
255+
* to further customize how normalization behaves.</p>
251256
*
252257
* @return the normalized version of this email address
253258
*/
254259
public String normalized() {
255-
return normalized(NormalizationOptions.builder().build());
260+
return normalized(NormalizationOptions.DEFAULT_OPTIONS);
256261
}
257262

258263
/**
259264
* <p>Return a "normalized" version of this email address. The actual result of normalization
260265
* depends on the configured normalization options, but in general this method returns
261-
* a version of the email address that is the same as the original email address, except
262-
* that all comments and optional parts (identifiers, source routing) are removed.
263-
* For example, the address {@code "test@(comment)example.com"} will return
266+
* a version of the email address that is the same as the original email address, except:</p>
267+
*
268+
* <ul>
269+
* <li>All comments are removed</li>
270+
* <li>All identifiers or source routing are removed</li>
271+
* <li>Any unnecessary quotes within the local-part are removed</li>
272+
* <li>The entire address is lowercased</li>
273+
* </ul>
274+
*
275+
* <p>For example, the address {@code "tEST@(comment)example.com"} will return
264276
* {@code "[email protected]"}.</p>
265277
*
266278
* <p>See {@link NormalizationOptions} for more details on all of the configurable options.</p>
267279
*
268280
* @param options the {@link NormalizationOptions} to use when normalizing
269281
* @return the normalized version of this email address
282+
* @see NormalizationOptions
270283
*/
271284
public String normalized(NormalizationOptions options) {
285+
return normalizedLocalPart(options) + "@" + normalizedDomain(options);
286+
}
287+
288+
/**
289+
* <p>Returns an MD5 reference to the email address. This format can be useful to share references
290+
* to the email address without sharing the actual address.</p>
291+
*
292+
* <p>The reference is calculated by first performing normalization on the address, and then
293+
* taking the MD5 hash of the normalized address. See {@link #normalized()} for more details
294+
* on how normalization works.</p>
295+
*
296+
* <p>This method uses the default {@link NormalizationOptions}. If you wish to customize how the
297+
* normalization happens, use {@link #reference(NormalizationOptions)} instead.</p>
298+
*
299+
* @return the MD5 reference string for the address
300+
* @throws NoSuchAlgorithmException if the MD5 algorithm is unable to be loaded
301+
*/
302+
public String reference() throws NoSuchAlgorithmException {
303+
return reference(NormalizationOptions.DEFAULT_OPTIONS);
304+
}
305+
306+
/**
307+
* <p>Returns an MD5 reference to the email address. This format can be useful to share references
308+
* to the email address without sharing the actual address.</p>
309+
*
310+
* <p>The reference is calculated by first performing normalization on the address, and then
311+
* taking the MD5 hash of the normalized address. See {@link #normalized(NormalizationOptions)}
312+
* for more details on how normalization works.</p>
313+
*
314+
* @param options the {@link NormalizationOptions} to use when normalizing
315+
* @return the MD5 reference string for the address
316+
* @throws NoSuchAlgorithmException if the MD5 algorithm is unable to be loaded
317+
*/
318+
public String reference(NormalizationOptions options) throws NoSuchAlgorithmException {
319+
MessageDigest md = MessageDigest.getInstance("MD5");
320+
321+
byte[] normalized = normalized(options).getBytes(StandardCharsets.UTF_8);
322+
byte[] digest = md.digest(normalized);
323+
324+
return toHexString(digest);
325+
}
326+
327+
/**
328+
* <p>Returns a redacted version of the email address in the format {@code "{local-part}@domain"}.
329+
* This format can be useful when storing addresses in a data store (to avoid storing the original
330+
* address).</p>
331+
*
332+
* <p>The redacted address is calculated by first performing normalization on the address, and
333+
* then taking the SHA-1 hash of the local-part of the normalized address to construct the final
334+
* redacted version of the address. See {@link #normalized()} for more details on how
335+
* normalization works.</p>
336+
*
337+
* <p>This method uses the default {@link NormalizationOptions}. If you wish to customize how the
338+
* normalization happens, use {@link #redacted(NormalizationOptions)} instead.</p>
339+
*
340+
* @return the redacted version of the email address
341+
* @throws NoSuchAlgorithmException if the SHA-A algorithm is unable to be loaded
342+
*/
343+
public String redacted() throws NoSuchAlgorithmException {
344+
return redacted(NormalizationOptions.DEFAULT_OPTIONS);
345+
}
346+
347+
/**
348+
* <p>Returns a redacted version of the email address in the format {@code "{local-part}@domain"}.
349+
* This format can be useful when storing addresses in a data store (to avoid storing the original
350+
* address).</p>
351+
*
352+
* <p>The redacted address is calculated by first performing normalization on the address, and
353+
* then taking the SHA-1 hash of the local-part of the normalized address to construct the final
354+
* redacted version of the address. See {@link #normalized(NormalizationOptions)} for more
355+
* details on how normalization works.</p>
356+
*
357+
* @param options the {@link NormalizationOptions} to use when normalizing
358+
* @return the redacted version of the email address
359+
* @throws NoSuchAlgorithmException if the SHA-1 algorithm is unable to be loaded
360+
*/
361+
public String redacted(NormalizationOptions options) throws NoSuchAlgorithmException {
362+
MessageDigest md = MessageDigest.getInstance("SHA1");
363+
364+
byte[] normalizedLocalPart = normalizedLocalPart(options).getBytes(StandardCharsets.UTF_8);
365+
byte[] digest = md.digest(normalizedLocalPart);
366+
367+
return "{" + toHexString(digest) + "}@" + normalizedDomain(options);
368+
}
369+
370+
/**
371+
* <p>Returns a munged version of the email address in the format {@code "lo*****@do*****"}.
372+
* This format can be useful when displaying addresses on a user account page.
373+
*
374+
* <p>The munged address is calculated by first performing normalization on the address, and
375+
* then taking the first two characters of both the local-part and the domain, and adding
376+
* five {@code *} characters to each. See {@link #normalized()} for more
377+
* details on how normalization works.</p>
378+
*
379+
* <p>This method uses the default {@link NormalizationOptions}. If you wish to customize how the
380+
* normalization happens, use {@link #munged(NormalizationOptions)} instead.</p>
381+
*
382+
* @return the munged version of the email address
383+
*/
384+
public String munged() {
385+
return munged(NormalizationOptions.DEFAULT_OPTIONS);
386+
}
387+
388+
389+
/**
390+
* <p>Returns a munged version of the email address in the format {@code "lo*****@do*****"}.
391+
* This format can be useful when displaying addresses on a user account page.
392+
*
393+
* <p>The munged address is calculated by first performing normalization on the address, and
394+
* then taking the first two characters of both the local-part and the domain, and adding
395+
* five {@code *} characters to each. See {@link #normalized(NormalizationOptions)} for more
396+
* details on how normalization works.</p>
397+
*
398+
* @param options the {@link NormalizationOptions} to use when normalizing
399+
* @return the munged version of the email address
400+
*/
401+
public String munged(NormalizationOptions options) {
402+
String localPart = normalizedLocalPart(options);
403+
localPart = localPart.length() < 2 ? localPart : localPart.substring(0, 2);
404+
405+
String domain = normalizedDomain(options);
406+
domain = domain.length() < 2 ? domain : domain.substring(0, 2);
407+
408+
return localPart + "*****@" + domain + "*****";
409+
}
410+
411+
private String normalizedLocalPart(NormalizationOptions options) {
272412
String localPart = options.shouldStripQuotes()
273413
? localPartWithoutQuotes
274414
: localPartWithoutComments;
@@ -287,15 +427,34 @@ public String normalized(NormalizationOptions options) {
287427
? caseOption.adjustLocalPart(localPart.replace(".", ""))
288428
: caseOption.adjustLocalPart(localPart);
289429

290-
String domain = isIpAddress
430+
if (options.shouldPerformUnicodeNormalization()) {
431+
localPart = Normalizer.normalize(localPart, options.getUnicodeNormalizationForm());
432+
}
433+
434+
return localPart;
435+
}
436+
437+
private String normalizedDomain(NormalizationOptions options) {
438+
CaseOption caseOption = options.getCaseOption();
439+
440+
return isIpAddress
291441
? "[" + this.domainWithoutComments + "]"
292442
: caseOption.adjustDomain(this.domainWithoutComments);
443+
}
293444

294-
if (options.shouldPerformUnicodeNormalization()) {
295-
localPart = Normalizer.normalize(localPart, options.getUnicodeNormalizationForm());
445+
private String toHexString(byte[] bytes) {
446+
char[] hexArray = {
447+
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
448+
'a', 'b', 'c', 'd', 'e', 'f'};
449+
char[] hexChars = new char[bytes.length * 2];
450+
451+
for (int j = 0; j < bytes.length; j++) {
452+
int v = bytes[j] & 0xFF;
453+
hexChars[j * 2] = hexArray[v / 16];
454+
hexChars[j * 2 + 1] = hexArray[v % 16];
296455
}
297456

298-
return localPart + "@" + domain;
457+
return new String(hexChars);
299458
}
300459

301460
/**

src/main/java/com/sanctionco/jmail/normalization/NormalizationOptions.java

+5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
* {@link #builder()}</p>
1414
*/
1515
public class NormalizationOptions {
16+
/**
17+
* A default {@code NormalizationOptions} object.
18+
*/
19+
public static final NormalizationOptions DEFAULT_OPTIONS = builder().build();
20+
1621
private final CaseOption caseOption;
1722
private final boolean removeDots;
1823
private final boolean removeSubAddress;

src/test/java/com/sanctionco/jmail/EmailTest.java

+89
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import com.sanctionco.jmail.normalization.CaseOption;
44
import com.sanctionco.jmail.normalization.NormalizationOptions;
55

6+
import java.security.NoSuchAlgorithmException;
67
import java.text.Normalizer;
78
import java.util.stream.Stream;
89

@@ -215,6 +216,94 @@ void ensureNormalizedDoesNotStripQuotesIfInvalid(String address) {
215216
.build()));
216217
}
217218

219+
@Test
220+
void ensureReferenceFormat() {
221+
String address = "[email protected]";
222+
String md5 = "1aedb8d9dc4751e229a335e371db8058";
223+
224+
assertThat(Email.of(address))
225+
.isPresent().get()
226+
.returns(md5, e -> {
227+
try {
228+
return e.reference();
229+
} catch (NoSuchAlgorithmException ex) {
230+
return "NoSuchAlgorithmException";
231+
}
232+
});
233+
234+
// With custom options
235+
String capitalizedMD5 = "e307f5ddc2a641dc63ace209e17b4f80";
236+
237+
assertThat(Email.of(address))
238+
.isPresent().get()
239+
.returns(capitalizedMD5, e -> {
240+
try {
241+
return e.reference(NormalizationOptions.builder()
242+
.adjustCase(CaseOption.UPPERCASE)
243+
.build());
244+
} catch (NoSuchAlgorithmException ex) {
245+
return "NoSuchAlgorithmException";
246+
}
247+
});
248+
}
249+
250+
@Test
251+
void ensureRedactedFormat() {
252+
String address = "[email protected]";
253+
String redacted = "{a94a8fe5ccb19ba61c4c0873d391e987982fbbd3}@gmail.com";
254+
255+
assertThat(Email.of(address))
256+
.isPresent().get()
257+
.returns(redacted, e -> {
258+
try {
259+
return e.redacted();
260+
} catch (NoSuchAlgorithmException ex) {
261+
return "NoSuchAlgorithmException";
262+
}
263+
});
264+
265+
// With custom options
266+
String capitalizedRedacted = "{984816fd329622876e14907634264e6f332e9fb3}@GMAIL.COM";
267+
268+
assertThat(Email.of(address))
269+
.isPresent().get()
270+
.returns(capitalizedRedacted, e -> {
271+
try {
272+
return e.redacted(NormalizationOptions.builder()
273+
.adjustCase(CaseOption.UPPERCASE)
274+
.build());
275+
} catch (NoSuchAlgorithmException ex) {
276+
return "NoSuchAlgorithmException";
277+
}
278+
});
279+
}
280+
281+
@Test
282+
void ensureMungedFormat() {
283+
String address = "[email protected]";
284+
String munged = "te*****@gm*****";
285+
286+
assertThat(Email.of(address))
287+
.isPresent().get()
288+
.returns(munged, Email::munged);
289+
290+
// With custom options
291+
String capitalizedMunged = "TE*****@GM*****";
292+
293+
assertThat(Email.of(address))
294+
.isPresent().get()
295+
.returns(capitalizedMunged, e -> e.munged(NormalizationOptions.builder()
296+
.adjustCase(CaseOption.UPPERCASE)
297+
.build()));
298+
299+
// With a very short address
300+
String shortAddress = "t@r";
301+
302+
assertThat(Email.of(shortAddress))
303+
.isPresent().get()
304+
.returns("t*****@r*****", Email::munged);
305+
}
306+
218307
static Stream<Arguments> provideValidForStripQuotes() {
219308
return Stream.of(
220309
Arguments.of("\"test.1\"@example.org", "[email protected]"),

0 commit comments

Comments
 (0)