Skip to content

Commit 60562db

Browse files
fix: Two letter language code must be supported (#3258)
1 parent 7d30d8c commit 60562db

File tree

1 file changed

+65
-2
lines changed

1 file changed

+65
-2
lines changed

src/sagemaker/clarify.py

+65-2
Original file line numberDiff line numberDiff line change
@@ -512,68 +512,131 @@ class TextConfig:
512512
_SUPPORTED_GRANULARITIES = ["token", "sentence", "paragraph"]
513513
_SUPPORTED_LANGUAGES = [
514514
"chinese",
515+
"zh",
515516
"danish",
517+
"da",
516518
"dutch",
519+
"nl",
517520
"english",
521+
"en",
518522
"french",
523+
"fr",
519524
"german",
525+
"de",
520526
"greek",
527+
"el",
521528
"italian",
529+
"it",
522530
"japanese",
531+
"ja",
523532
"lithuanian",
533+
"lt",
524534
"multi-language",
535+
"xx",
525536
"norwegian bokmål",
537+
"nb",
526538
"polish",
539+
"pl",
527540
"portuguese",
541+
"pt",
528542
"romanian",
543+
"ro",
529544
"russian",
545+
"ru",
530546
"spanish",
547+
"es",
531548
"afrikaans",
549+
"af",
532550
"albanian",
551+
"sq",
533552
"arabic",
553+
"ar",
534554
"armenian",
555+
"hy",
535556
"basque",
557+
"eu",
536558
"bengali",
559+
"bn",
537560
"bulgarian",
561+
"bg",
538562
"catalan",
563+
"ca",
539564
"croatian",
565+
"hr",
540566
"czech",
567+
"cs",
541568
"estonian",
569+
"et",
542570
"finnish",
571+
"fi",
543572
"gujarati",
573+
"gu",
544574
"hebrew",
575+
"he",
545576
"hindi",
577+
"hi",
546578
"hungarian",
579+
"hu",
547580
"icelandic",
581+
"is",
548582
"indonesian",
583+
"id",
549584
"irish",
585+
"ga",
550586
"kannada",
587+
"kn",
551588
"kyrgyz",
589+
"ky",
552590
"latvian",
591+
"lv",
553592
"ligurian",
593+
"lij",
554594
"luxembourgish",
595+
"lb",
555596
"macedonian",
597+
"mk",
556598
"malayalam",
599+
"ml",
557600
"marathi",
601+
"mr",
558602
"nepali",
603+
"ne",
559604
"persian",
605+
"fa",
560606
"sanskrit",
607+
"sa",
561608
"serbian",
609+
"sr",
562610
"setswana",
611+
"tn",
563612
"sinhala",
613+
"si",
564614
"slovak",
615+
"sk",
565616
"slovenian",
617+
"sl",
566618
"swedish",
619+
"sv",
567620
"tagalog",
621+
"tl",
568622
"tamil",
623+
"ta",
569624
"tatar",
625+
"tt",
570626
"telugu",
627+
"te",
571628
"thai",
629+
"th",
572630
"turkish",
631+
"tr",
573632
"ukrainian",
633+
"uk",
574634
"urdu",
635+
"ur",
575636
"vietnamese",
637+
"vi",
576638
"yoruba",
639+
"yo",
577640
]
578641

579642
def __init__(
@@ -602,8 +665,8 @@ def __init__(
602665
``"persian"``, ``"sanskrit"``, ``"serbian"``, ``"setswana"``, ``"sinhala"``,
603666
``"slovak"``, ``"slovenian"``, ``"swedish"``, ``"tagalog"``, ``"tamil"``,
604667
``"tatar"``, ``"telugu"``, ``"thai"``, ``"turkish"``, ``"ukrainian"``, ``"urdu"``,
605-
``"vietnamese"``, ``"yoruba"``.
606-
Use ``"multi-language"`` for a mix of multiple languages.
668+
``"vietnamese"``, ``"yoruba"``. Use "multi-language" for a mix of multiple
669+
languages. The corresponding two-letter ISO codes are also accepted.
607670
608671
Raises:
609672
ValueError: when ``granularity`` is not in list of supported values

0 commit comments

Comments
 (0)