Skip to content

Commit 5ad75b3

Browse files
Added support for custom tokenizer for diffWords
1 parent 322c95e commit 5ad75b3

File tree

2 files changed

+25
-1
lines changed

2 files changed

+25
-1
lines changed

src/diff/word.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ wordDiff.equals = function(left, right) {
3232
return left === right || (this.options.ignoreWhitespace && !reWhitespace.test(left) && !reWhitespace.test(right));
3333
};
3434
wordDiff.tokenize = function(value) {
35-
let tokens = value.split(/(\s+|[()[\]{}'"]|\b)/);
35+
const tokenizer = this.options.tokenizer || /(\s+|[()[\]{}'"]|\b)/; // Use the tokenizer regex in the options or use the default regex
36+
const tokens = value.split(tokenizer); // Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
3637

3738
// Join the boundary splits that we do not consider to be boundaries. This is primarily the extended Latin character set.
3839
for (let i = 0; i < tokens.length - 1; i++) {

test/diff/word.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,29 @@ describe('WordDiff', function() {
171171
done();
172172
});
173173
});
174+
175+
// With custom tokenizer
176+
it('should utilize a custom tokenizer', function() {
177+
178+
const diff = diffWords('foo_bar', 'something_bar', {
179+
tokenizer: /(\s+|[()[\]{}_'"]|\b)/
180+
});
181+
182+
expect(diff).to.eql([{
183+
count: 1,
184+
added: undefined,
185+
removed: true,
186+
value: 'foo'
187+
}, {
188+
count: 1,
189+
added: true,
190+
removed: undefined,
191+
value: 'something'
192+
}, {
193+
count: 2,
194+
value: '_bar'
195+
}]);
196+
});
174197
});
175198

176199
describe('#diffWordsWithSpace', function() {

0 commit comments

Comments
 (0)