From b4069afec306fa40a020ecd06c2256eaf22cbd72 Mon Sep 17 00:00:00 2001 From: Marek Mazij <112333347+Mrk-Mzj@users.noreply.github.com> Date: Wed, 4 Oct 2023 19:43:08 +0200 Subject: [PATCH 1/2] First commit camel case to snake case conversion algorithm, including numbers --- strings/camel_case_to_snake_case.py | 55 +++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 strings/camel_case_to_snake_case.py diff --git a/strings/camel_case_to_snake_case.py b/strings/camel_case_to_snake_case.py new file mode 100644 index 000000000000..38051fed389e --- /dev/null +++ b/strings/camel_case_to_snake_case.py @@ -0,0 +1,55 @@ +def camel_to_snake_case(input_str: str) -> str: + """ + Transforms a camelCase (or PascalCase) string to snake_case + + >>> camel_to_snake_case("someRandomString") + 'some_random_string' + + >>> camel_to_snake_case("SomeRandomString") + 'some_random_string' + + >>> camel_to_snake_case("123someRandom123String123") + '123_some_random_123_string_123' + + >>> camel_to_snake_case("123SomeRandom123String123") + '123_some_random_123_string_123' + + >>> camel_to_snake_case(123) + Traceback (most recent call last): + ... + ValueError: Expected string as input, found + + """ + + import re + + # check for invalid input type + if not isinstance(input_str, str): + msg = f"Expected string as input, found {type(input_str)}" + raise ValueError(msg) + + # Replace all characters that are not letters or numbers with the underscore + snake_str = re.sub(r"[^a-zA-Z0-9]", "_", input_str) + + # Find where lowercase meets uppercase. Insert underscore between them + snake_str = re.sub(r"([a-z])([A-Z])", r"\1_\2", snake_str).lower() + + # Find the sequence of digits at the beginning + snake_str = re.sub(r"^(\d+)", r"\1_", snake_str) + + # Find the sequence of digits at the end + snake_str = re.sub(r"(\d+)$", r"_\1", snake_str) + + # Find where letter meets digits + snake_str = re.sub(r"([a-z])(\d+)", r"\1_\2", snake_str) + + # Find where digits meet letter + snake_str = re.sub(r"(\d+)([a-z])", r"\1_\2", snake_str) + + return snake_str + + +if __name__ == "__main__": + from doctest import testmod + + testmod() From 28f41df979d496f45e28102b537c44548bd46219 Mon Sep 17 00:00:00 2001 From: Marek Mazij <112333347+Mrk-Mzj@users.noreply.github.com> Date: Thu, 5 Oct 2023 20:36:07 +0200 Subject: [PATCH 2/2] code modified to not use regex --- strings/camel_case_to_snake_case.py | 37 ++++++++++++++++------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/strings/camel_case_to_snake_case.py b/strings/camel_case_to_snake_case.py index 38051fed389e..582907be2edb 100644 --- a/strings/camel_case_to_snake_case.py +++ b/strings/camel_case_to_snake_case.py @@ -5,8 +5,8 @@ def camel_to_snake_case(input_str: str) -> str: >>> camel_to_snake_case("someRandomString") 'some_random_string' - >>> camel_to_snake_case("SomeRandomString") - 'some_random_string' + >>> camel_to_snake_case("SomeRandomStr#ng") + 'some_random_str_ng' >>> camel_to_snake_case("123someRandom123String123") '123_some_random_123_string_123' @@ -21,30 +21,35 @@ def camel_to_snake_case(input_str: str) -> str: """ - import re - # check for invalid input type if not isinstance(input_str, str): msg = f"Expected string as input, found {type(input_str)}" raise ValueError(msg) - # Replace all characters that are not letters or numbers with the underscore - snake_str = re.sub(r"[^a-zA-Z0-9]", "_", input_str) + snake_str = "" + + for index, char in enumerate(input_str): + if char.isupper(): + snake_str += "_" + char.lower() - # Find where lowercase meets uppercase. Insert underscore between them - snake_str = re.sub(r"([a-z])([A-Z])", r"\1_\2", snake_str).lower() + # if char is lowercase but proceeded by a digit: + elif input_str[index - 1].isdigit() and char.islower(): + snake_str += "_" + char - # Find the sequence of digits at the beginning - snake_str = re.sub(r"^(\d+)", r"\1_", snake_str) + # if char is a digit proceeded by a letter: + elif input_str[index - 1].isalpha() and char.isnumeric(): + snake_str += "_" + char.lower() - # Find the sequence of digits at the end - snake_str = re.sub(r"(\d+)$", r"_\1", snake_str) + # if char is not alphanumeric: + elif not char.isalnum(): + snake_str += "_" - # Find where letter meets digits - snake_str = re.sub(r"([a-z])(\d+)", r"\1_\2", snake_str) + else: + snake_str += char - # Find where digits meet letter - snake_str = re.sub(r"(\d+)([a-z])", r"\1_\2", snake_str) + # remove leading underscore + if snake_str[0] == "_": + snake_str = snake_str[1:] return snake_str