mysql
diff --git a/‎include/mysys_err.h
+2-1 b/‎include/mysys_err.h
+2-1
diff --git a/‎mysql-test/r/ctype_ldml.result
+1 b/‎mysql-test/r/ctype_ldml.result
+1
diff --git a/‎mysql-test/r/ctype_unicode900_as_cs.result
+61 b/‎mysql-test/r/ctype_unicode900_as_cs.result
+61
diff --git a/‎mysql-test/suite/collations/r/chinese.result
+24,877 b/‎mysql-test/suite/collations/r/chinese.result
+24,877
diff --git a/‎mysql-test/suite/collations/t/chinese.test
+8 b/‎mysql-test/suite/collations/t/chinese.test
+8
diff --git a/‎mysql-test/suite/engines/funcs/r/db_alter_collate_ascii.result
+1 b/‎mysql-test/suite/engines/funcs/r/db_alter_collate_ascii.result
+1
diff --git a/‎mysql-test/suite/engines/funcs/r/db_alter_collate_utf8.result
+1 b/‎mysql-test/suite/engines/funcs/r/db_alter_collate_utf8.result
+1
diff --git a/‎mysql-test/suite/innodb/r/innodb-2byte-collation.result
+1 b/‎mysql-test/suite/innodb/r/innodb-2byte-collation.result
+1
diff --git a/‎mysql-test/suite/innodb/r/innodb_ctype_ldml.result
+1 b/‎mysql-test/suite/innodb/r/innodb_ctype_ldml.result
+1
diff --git a/‎mysql-test/t/ctype_unicode900_as_cs.test
+45 b/‎mysql-test/t/ctype_unicode900_as_cs.test
+45
diff --git a/‎mysys/charset-def.cc
+3-1 b/‎mysys/charset-def.cc
+3-1
diff --git a/‎mysys/errors.cc
+3-2 b/‎mysys/errors.cc
+3-2
diff --git a/‎strings/CMakeLists.txt
+24-3 b/‎strings/CMakeLists.txt
+24-3
@@ -132,7 +132,8 @@ extern const char *globerrs[]; /* my_error_messages is here */
 #define EE_SHIFT_CHAR_OUT_OF_RANGE 88
 #define EE_RESET_CHAR_OUT_OF_RANGE 89
 #define EE_UNKNOWN_LDML_TAG 90
-#define EE_ERROR_LAST 90 /* Copy last error nr */
+#define EE_FAILED_TO_RESET_BEFORE_SECONDARY_IGNORABLE_CHAR 91
+#define EE_ERROR_LAST 91 /* Copy last error nr */
 /* Add error numbers before EE_ERROR_LAST and change it accordingly. */
 
 /* Exit codes for option processing. When exiting from server use the
 
@@ -529,6 +529,7 @@ utf8mb4_ja_0900_as_cs_ks	utf8mb4	304
 utf8mb4_0900_as_ci	utf8mb4	305
 utf8mb4_ru_0900_ai_ci	utf8mb4	306
 utf8mb4_ru_0900_as_cs	utf8mb4	307
+utf8mb4_zh_0900_as_cs	utf8mb4	308
 utf8mb4_test_ci	utf8mb4	326
 utf16_test_ci	utf16	327
 utf8mb4_test_400_ci	utf8mb4	328
 
@@ -2014,3 +2014,64 @@ DROP TABLE t1;
 #
 # End of 5.8 tests
 #
+CREATE TABLE t1(a VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0x2E87), (_utf16 0x2E8D), (_utf16 0x2F17),
+(_utf16 0x3038), (_utf16 0x24B6), (_utf32 0x1F150), (_utf16 0x4E2D),
+(_utf16 0x3197), (_utf32 0x1F22D), ('A'), ('a'), ('Z'), ('z'),
+(_utf16 0x3082), (_utf16 0x30E2), (_utf16 0x2E31), (_utf16 0x33E8),
+(_utf32 0x1F229), (_utf32 0x1F241), (_utf16 0xFA56);
+SELECT HEX(CONVERT(a USING utf32)), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, HEX(a);
+HEX(CONVERT(a USING utf32))	HEX(WEIGHT_STRING(a))
+00002E31	028C0000002000000002
+0001F241	0379815D037A000000200020002000000002000200020021
+000033E8	1C467F7E0000002000200000000200020021
+00002E87	4CDF000000200110000000040004
+0000FA56	51CD0000002000000002
+00002F17	857A00000020000000020021
+00003038	857A00000020000000020022
+00002E8D	9C310000002000000002
+0001F229	A63E00000020000000020024
+00004E2D	B8200000002000000002
+00003197	B82000000020000000020021
+0001F22D	B82000000020000000020023
+00000061	BDC40000002000000002
+00000041	BDC40000002000000008
+000024B6	BDC4000000200000000C
+0001F150	BDC4000000200000000C
+0000007A	C09E0000002000000002
+0000005A	C09E0000002000000008
+00003082	DEFA000000200000000E
+000030E2	DEFA0000002000000011
+DROP TABLE t1;
+CREATE TABLE t1(a VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0x6C88), (_utf16 0x5F1E), (_utf16 0x9633),
+(_utf16 0x6C889633), (_utf16 0x5F1E9633);
+SELECT HEX(CONVERT(a USING utf32)), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, HEX(a);
+HEX(CONVERT(a USING utf32))	HEX(WEIGHT_STRING(a))
+00006C88	289C0000002000000002
+00005F1E	848C0000002000000002
+00005F1E00009633	848CA41B000000200020000000020002
+00006C8800009633	848CA41BF645000000200020000000020002
+00009633	A41B0000002000000002
+DROP TABLE t1;
+CREATE TABLE t1(a VARCHAR(10), b VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0xF902, _utf16 0x2F9E), (_utf16 0xF907, _utf16 0x2FD4),
+(_utf16 0xF908, _utf16 0x2FD4), (_utf16 0xF9D1, _utf16 0x3285);
+SELECT HEX(CONVERT(a USING utf16)) AS a_u16, HEX(CONVERT(b USING utf16)) AS b_u16, a = b FROM t1;
+a_u16	b_u16	a = b
+F902	2F9E	0
+F907	2FD4	0
+F908	2FD4	0
+F9D1	3285	0
+DROP TABLE t1;
+CREATE TABLE t1(a VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0x1EC2), (_utf16 0x1EC3), (_utf16 0x1EC5), (_utf16 0x1EC0), (_utf16 0x1EC7), (_Utf16 0x1EBF);
+SELECT HEX(CONVERT(a USING utf16)) FROM t1 ORDER BY a;
+HEX(CONVERT(a USING utf16))
+1EC5
+1EC3
+1EC2
+1EC7
+1EBF
+1EC0
+DROP TABLE t1;
@@ -0,0 +1,8 @@
+--source suite/collations/include/unicode.inc
+select hex(convert(uc using utf32)),
+       hex(weight_string(convert(uc using utf8mb4) collate utf8mb4_zh_0900_as_cs)),
+       name from unicode
+where category in ('Lu','Ll','Lt','Lm','Lo','So')
+order by uc  collate utf8mb4_zh_0900_as_cs, cp;
+
+drop table if exists unicode;
@@ -243,6 +243,7 @@ utf8mb4_unicode_ci	utf8mb4	224		#	#	PAD SPACE
 utf8mb4_vietnamese_ci	utf8mb4	247		#	#	PAD SPACE
 utf8mb4_vi_0900_ai_ci	utf8mb4	277		#	#	NO PAD
 utf8mb4_vi_0900_as_cs	utf8mb4	300		#	#	NO PAD
+utf8mb4_zh_0900_as_cs	utf8mb4	308		#	#	NO PAD
 utf8_bin	utf8	83		#	#	PAD SPACE
 utf8_croatian_ci	utf8	213		#	#	PAD SPACE
 utf8_czech_ci	utf8	202		#	#	PAD SPACE
 
@@ -243,6 +243,7 @@ utf8mb4_unicode_ci	utf8mb4	224		#	#	PAD SPACE
 utf8mb4_vietnamese_ci	utf8mb4	247		#	#	PAD SPACE
 utf8mb4_vi_0900_ai_ci	utf8mb4	277		#	#	NO PAD
 utf8mb4_vi_0900_as_cs	utf8mb4	300		#	#	NO PAD
+utf8mb4_zh_0900_as_cs	utf8mb4	308		#	#	NO PAD
 utf8_bin	utf8	83		#	#	PAD SPACE
 utf8_croatian_ci	utf8	213		#	#	PAD SPACE
 utf8_czech_ci	utf8	202		#	#	PAD SPACE
 
@@ -51,6 +51,7 @@ utf8mb4_ja_0900_as_cs_ks	utf8mb4	304
 utf8mb4_0900_as_ci	utf8mb4	305
 utf8mb4_ru_0900_ai_ci	utf8mb4	306
 utf8mb4_ru_0900_as_cs	utf8mb4	307
+utf8mb4_zh_0900_as_cs	utf8mb4	308
 utf8mb4_test_ci	utf8mb4	326
 utf16_test_ci	utf16	327
 utf8mb4_test_400_ci	utf8mb4	328
 
@@ -505,6 +505,7 @@ utf8mb4_ja_0900_as_cs_ks	utf8mb4	304
 utf8mb4_0900_as_ci	utf8mb4	305
 utf8mb4_ru_0900_ai_ci	utf8mb4	306
 utf8mb4_ru_0900_as_cs	utf8mb4	307
+utf8mb4_zh_0900_as_cs	utf8mb4	308
 utf8mb4_test_ci	utf8mb4	326
 utf16_test_ci	utf16	327
 utf8mb4_test_400_ci	utf8mb4	328
 
@@ -264,3 +264,48 @@ DROP TABLE t1;
 --echo #
 --echo # End of 5.8 tests
 --echo #
+
+# Test the characters in different groups are reordered correctly. For example,
+# U+33E8 is in the core group, and U+2F17 is in the Han group, and 'A' is in
+# the latin group. According to the reorder rule defined by the CLDR for the
+# Chinese collation, we should get U+33E8 < U+2F17 < 'A'. This also tests how
+# different glyphs of one Han character sort according to the weight shift rule
+# defined by CLDR. For example, U+3197 (IDEOGRAPHIC ANNOTATION MIDDLE MARK) and
+# U+4E2D (CJK UNIFIED IDEOGRAPH-4E2D) are different glyphs of a Chinese
+# character which means 'middle' and the CLDR defines "U+412D <<< U+3197".
+CREATE TABLE t1(a VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0x2E87), (_utf16 0x2E8D), (_utf16 0x2F17),
+(_utf16 0x3038), (_utf16 0x24B6), (_utf32 0x1F150), (_utf16 0x4E2D),
+(_utf16 0x3197), (_utf32 0x1F22D), ('A'), ('a'), ('Z'), ('z'),
+(_utf16 0x3082), (_utf16 0x30E2), (_utf16 0x2E31), (_utf16 0x33E8),
+(_utf32 0x1F229), (_utf32 0x1F241), (_utf16 0xFA56);
+SELECT HEX(CONVERT(a USING utf32)), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, HEX(a);
+DROP TABLE t1;
+
+# Test how the contraction of Han characters sorts. For example, U+6C88 and
+# U+5F1E are differenct characters, and U+6C88 < U+5F1E. But the strings
+# U+6C88U+9633 and U+5F1EU+9633 mean same thing. In such a contraction case,
+# U+5F1EU+9633 < U+6C88U+9633.
+CREATE TABLE t1(a VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0x6C88), (_utf16 0x5F1E), (_utf16 0x9633),
+(_utf16 0x6C889633), (_utf16 0x5F1E9633);
+SELECT HEX(CONVERT(a USING utf32)), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a, HEX(a);
+DROP TABLE t1;
+
+# This tests how different glyphs of one Han character sort. For example,
+# U+2F9E (KANGXI RADICAL CART) and U+F902 (CJK COMPATIBILITY IDEOGRAPH-F902)
+# are different glyphs of Chinese character which means 'cart'.
+CREATE TABLE t1(a VARCHAR(10), b VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0xF902, _utf16 0x2F9E), (_utf16 0xF907, _utf16 0x2FD4),
+(_utf16 0xF908, _utf16 0x2FD4), (_utf16 0xF9D1, _utf16 0x3285);
+SELECT HEX(CONVERT(a USING utf16)) AS a_u16, HEX(CONVERT(b USING utf16)) AS b_u16, a = b FROM t1;
+DROP TABLE t1;
+
+# CLDR defines some weight shift rules for Chinese Bopomofo characters.
+# Bopomofo is a group of latin characters used to illustrate how a Han character
+# is pronounced. For example, 'e' is one of Bopomofo characters. This tests
+# how accented latin character which is not in Bopomofo group should be sorted.
+CREATE TABLE t1(a VARCHAR(10)) COLLATE utf8mb4_zh_0900_as_cs;
+INSERT INTO t1 VALUES(_utf16 0x1EC2), (_utf16 0x1EC3), (_utf16 0x1EC5), (_utf16 0x1EC0), (_utf16 0x1EC7), (_Utf16 0x1EBF);
+SELECT HEX(CONVERT(a USING utf16)) FROM t1 ORDER BY a;
+DROP TABLE t1;
@@ -1,4 +1,4 @@
-/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License, version 2.0,
@@ -248,6 +248,7 @@ extern CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs;
 extern CHARSET_INFO my_charset_utf8mb4_ja_0900_as_cs_ks;
 extern CHARSET_INFO my_charset_utf8mb4_0900_as_ci;
 extern CHARSET_INFO my_charset_utf8mb4_ru_0900_as_cs;
+extern CHARSET_INFO my_charset_utf8mb4_zh_0900_as_cs;
 
 extern CHARSET_INFO my_charset_gb18030_unicode_520_ci;
 
@@ -425,6 +426,7 @@ bool init_compiled_charsets(myf flags MY_ATTRIBUTE((unused))) {
   add_compiled_collation(&my_charset_utf8mb4_ja_0900_as_cs_ks);
   add_compiled_collation(&my_charset_utf8mb4_0900_as_ci);
   add_compiled_collation(&my_charset_utf8mb4_ru_0900_as_cs);
+  add_compiled_collation(&my_charset_utf8mb4_zh_0900_as_cs);
 
   add_compiled_collation(&my_charset_utf16_general_ci);
   add_compiled_collation(&my_charset_utf16_bin);
 
@@ -134,10 +134,11 @@ const char *globerrs[GLOBERRS] = {
     "Invalid decimal value for option '%s'.",
     "%s.",
     "Failed to reset before a primary ignorable character %s.",
-    "Failed to reset before a territory ignorable character %s.",
+    "Failed to reset before a tertiary ignorable character %s.",
     "Shift character out of range: %s.",
     "Reset character out of range: %s.",
-    "Unknown LDML tag: '%.*s'."};
+    "Unknown LDML tag: '%.*s'.",
+    "Failed to reset before a secondary ignorable character %s."};
 
 /*
  We cannot call my_error/my_printf_error here in this function.
 
@@ -1,4 +1,4 @@
-# Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2018, Oracle and/or its affiliates. All rights reserved.
 # 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License, version 2.0,
@@ -67,11 +67,32 @@ IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND
   ADD_COMPILE_FLAGS(dtoa.cc COMPILE_FLAGS "-fno-strict-aliasing")
 ENDIF()
 
+MYSQL_ADD_EXECUTABLE(uca9dump uca9-dump.cc SKIP_INSTALL)
+
+SET(ZH_HANS_SRC_FILE ${CMAKE_SOURCE_DIR}/strings/lang_data/zh_hans.txt)
+SET(ZH_HANS_DST_FILE ${CMAKE_BINARY_DIR}/strings/uca900_zh_tbls.cc)
+SET(JA_HANS_SRC_FILE ${CMAKE_SOURCE_DIR}/strings/lang_data/ja_hans.txt)
+SET(JA_HANS_DST_FILE ${CMAKE_BINARY_DIR}/strings/uca900_ja_tbls.cc)
+ADD_CUSTOM_COMMAND(OUTPUT ${ZH_HANS_DST_FILE}
+                          ${JA_HANS_DST_FILE}
+                   COMMAND uca9dump zh
+                     --in_file=${ZH_HANS_SRC_FILE}
+                     --out_file=${ZH_HANS_DST_FILE}
+                   COMMAND uca9dump ja
+                     --in_file=${JA_HANS_SRC_FILE}
+                     --out_file=${JA_HANS_DST_FILE}
+                   DEPENDS uca9dump ${ZH_HANS_SRC_FILE} ${JA_HANS_SRC_FILE}
+                  )
+
+SET_SOURCE_FILES_PROPERTIES(
+  ${JA_HANS_DST_FILE} ${ZH_HANS_DST_FILE}
+  PROPERTIES GENERATED TRUE
+  )
+
+LIST(APPEND STRINGS_SOURCES ${JA_HANS_DST_FILE} ${ZH_HANS_DST_FILE})
 # Avoid dependencies on perschema data defined in mysys
 ADD_DEFINITIONS(-DDISABLE_MYSQL_THREAD_H)
 ADD_CONVENIENCE_LIBRARY(strings ${STRINGS_SOURCES})
 
 MYSQL_ADD_EXECUTABLE(conf_to_src conf_to_src.cc SKIP_INSTALL)
 TARGET_LINK_LIBRARIES(conf_to_src strings)
-
-MYSQL_ADD_EXECUTABLE(uca9dump uca9-dump.cc SKIP_INSTALL)