Tweak example of using a Chinese word segmenter to one where the segmenter segments in a way that seems more correct (or at least more self-consistent) to a native Chinese speaker

ExplodingCabbage · ExplodingCabbage · commit 817c9888a860 · 2025-05-19T10:29:58.000+01:00
See discussion at #539 (comment) for explanation
diff --git a/test/diff/word.js b/test/diff/word.js
@@ -240,18 +240,16 @@ describe('WordDiff', function() {
 
     it('supports tokenizing with an Intl.Segmenter', () => {
       // Example 1: Diffing Chinese text with no spaces.
-      // I am not a Chinese speaker but I believe these sentences to mean:
-      // 1. "I have (我有) many (很多) tables (桌子)"
-      // 2. "Mei (梅) has (有) many (很多) sons (儿子)"
+      // a. "He (他) has (有) many (很多) tables (桌子)"
+      // b. "Mei (梅) has (有) many (很多) sons (儿子)"
       // We want to see that diffWords will get the word counts right and won't try to treat the
       // trailing 子 as common to both texts (since it's part of a different word each time).
-      // TODO: Check with a Chinese speaker that this example is correct Chinese.
       const chineseSegmenter = new Intl.Segmenter('zh', {granularity: 'word'});
       const diffResult = diffWords('我有很多桌子。', '梅有很多儿子。', {intlSegmenter: chineseSegmenter});
       expect(diffResult).to.deep.equal([
-        { count: 1, added: false, removed: true, value: '我有' },
-        { count: 2, added: true, removed: false, value: '梅有' },
-        { count: 1, added: false, removed: false, value: '很多' },
+        { count: 1, added: false, removed: true, value: '他' },
+        { count: 1, added: true, removed: false, value: '梅' },
+        { count: 2, added: false, removed: false, value: '有很多' },
         { count: 1, added: false, removed: true, value: '桌子' },
         { count: 1, added: true, removed: false, value: '儿子' },
         { count: 1, added: false, removed: false, value: '。' }