From fbac30429bebc79973bcaab34870da1fddc64edd Mon Sep 17 00:00:00 2001 From: Keerthan Vasist Date: Thu, 21 Jul 2022 09:14:26 -0700 Subject: [PATCH] fix: Two letter language code must be supported --- src/sagemaker/clarify.py | 67 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/src/sagemaker/clarify.py b/src/sagemaker/clarify.py index 873a87ca57..6590d30514 100644 --- a/src/sagemaker/clarify.py +++ b/src/sagemaker/clarify.py @@ -512,68 +512,131 @@ class TextConfig: _SUPPORTED_GRANULARITIES = ["token", "sentence", "paragraph"] _SUPPORTED_LANGUAGES = [ "chinese", + "zh", "danish", + "da", "dutch", + "nl", "english", + "en", "french", + "fr", "german", + "de", "greek", + "el", "italian", + "it", "japanese", + "ja", "lithuanian", + "lt", "multi-language", + "xx", "norwegian bokmål", + "nb", "polish", + "pl", "portuguese", + "pt", "romanian", + "ro", "russian", + "ru", "spanish", + "es", "afrikaans", + "af", "albanian", + "sq", "arabic", + "ar", "armenian", + "hy", "basque", + "eu", "bengali", + "bn", "bulgarian", + "bg", "catalan", + "ca", "croatian", + "hr", "czech", + "cs", "estonian", + "et", "finnish", + "fi", "gujarati", + "gu", "hebrew", + "he", "hindi", + "hi", "hungarian", + "hu", "icelandic", + "is", "indonesian", + "id", "irish", + "ga", "kannada", + "kn", "kyrgyz", + "ky", "latvian", + "lv", "ligurian", + "lij", "luxembourgish", + "lb", "macedonian", + "mk", "malayalam", + "ml", "marathi", + "mr", "nepali", + "ne", "persian", + "fa", "sanskrit", + "sa", "serbian", + "sr", "setswana", + "tn", "sinhala", + "si", "slovak", + "sk", "slovenian", + "sl", "swedish", + "sv", "tagalog", + "tl", "tamil", + "ta", "tatar", + "tt", "telugu", + "te", "thai", + "th", "turkish", + "tr", "ukrainian", + "uk", "urdu", + "ur", "vietnamese", + "vi", "yoruba", + "yo", ] def __init__( @@ -602,8 +665,8 @@ def __init__( ``"persian"``, ``"sanskrit"``, ``"serbian"``, ``"setswana"``, ``"sinhala"``, ``"slovak"``, ``"slovenian"``, ``"swedish"``, ``"tagalog"``, ``"tamil"``, ``"tatar"``, ``"telugu"``, ``"thai"``, ``"turkish"``, ``"ukrainian"``, ``"urdu"``, - ``"vietnamese"``, ``"yoruba"``. - Use ``"multi-language"`` for a mix of multiple languages. + ``"vietnamese"``, ``"yoruba"``. Use "multi-language" for a mix of multiple + languages. The corresponding two-letter ISO codes are also accepted. Raises: ValueError: when ``granularity`` is not in list of supported values