From 4040079df1e3c73566386dc3ad8413d17469740b Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Sun, 22 Nov 2020 12:26:24 +0100 Subject: [PATCH 1/8] rename base64_cipher.py to base64_encoding.py --- ciphers/{base64_cipher.py => base64_encoding.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename ciphers/{base64_cipher.py => base64_encoding.py} (100%) diff --git a/ciphers/base64_cipher.py b/ciphers/base64_encoding.py similarity index 100% rename from ciphers/base64_cipher.py rename to ciphers/base64_encoding.py From 2f9c0fc83273b99f8bb12cdc2cd12105fb2acf5e Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Sun, 22 Nov 2020 12:27:02 +0100 Subject: [PATCH 2/8] edit base64_encoding.py --- ciphers/base64_encoding.py | 173 +++++++++++++++++++++---------------- 1 file changed, 99 insertions(+), 74 deletions(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index 1dbe74a20fe7..46464a76c7eb 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -1,89 +1,114 @@ -def encode_base64(text: str) -> str: - r""" - >>> encode_base64('WELCOME to base64 encoding 😁') - 'V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ==' - >>> encode_base64('AĆ…įƒš€šŸ¤“') - 'QcOF4ZCD8JCAj/CfpJM=' - >>> encode_base64('A'*60) - 'QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB\r\nQUFB' +# Import the official implementation to check if ours is correct +from base64 import b64encode, b64decode + +B64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + + +def base64_encode(data: bytes) -> bytes: + """Encodes data according to RFC4648. + + The data is first transformed to binary and appended with binary digits so that its + length becomes a multiple of 6, then each 6 binary digits will match a character in + the B64_CHARSET string. The number of appended binary digits would later determine + how many "=" sign should be added, the padding. + For every 2 binary digits added, a "=" sign is added in the output. + We can add any binary digits to make it a multiple of 6, for instance, consider the + following example: + "AA" -> 0010100100101001 -> 001010 010010 1001 + As can be seen above, 2 more binary digits should be added, so there's 4 + possibilities here: 00, 01, 10 or 11. + That being said, Base64 encoding can be used in Steganography to hide data in these + appended digits. + + >>> a = b"This pull request is part of Hacktoberfest20!" + >>> b = b"https://tools.ietf.org/html/rfc4648" + >>> c = b"A" + >>> base64_encode(a) == b64encode(a) + True + >>> base64_encode(b) == b64encode(b) + True + >>> base64_encode(c) == b64encode(c) + True """ - base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + binary_stream = "".join(bin(char)[2:].zfill(8) for char in data) - byte_text = bytes(text, "utf-8") # put text in bytes for unicode support - r = "" # the result - c = -len(byte_text) % 3 # the length of padding - p = "=" * c # the padding - s = byte_text + b"\x00" * c # the text to encode + padding_needed = len(binary_stream) % 6 != 0 - i = 0 - while i < len(s): - if i > 0 and ((i / 3 * 4) % 76) == 0: - r = r + "\r\n" # for unix newline, put "\n" + if padding_needed: + # The padding that will be added later + padding = b"=" * ((6 - len(binary_stream) % 6) // 2) - n = (s[i] << 16) + (s[i + 1] << 8) + s[i + 2] - - n1 = (n >> 18) & 63 - n2 = (n >> 12) & 63 - n3 = (n >> 6) & 63 - n4 = n & 63 + # Append binary_stream with arbitrary binary digits (0's by default) to make its + # length a multiple of 6. + binary_stream += "0" * (6 - len(binary_stream) % 6) + else: + padding = b"" + + # Encode every 6 binary digits to their corresponding Base64 character + return ( + "".join( + B64_CHARSET[int(binary_stream[index : index + 6], 2)] + for index in range(0, len(binary_stream), 6) + ).encode() + + padding + ) + + +def base64_decode(encoded_data: str) -> bytes: + """Decodes data according to RFC4648. + + This does the reverse operation of base64_encode. + We first transform the encoded data back to a binary stream, take off the + previously appended binary digits according to the padding, at this point we + would have a binary stream whose length is multiple of 8, the last step is + to convert every 8 bits to a byte. + + >>> a = "VGhpcyBwdWxsIHJlcXVlc3QgaXMgcGFydCBvZiBIYWNrdG9iZXJmZXN0MjAh" + >>> b = "aHR0cHM6Ly90b29scy5pZXRmLm9yZy9odG1sL3JmYzQ2NDg=" + >>> c = "QQ==" + >>> base64_decode(a) == b64decode(a) + True + >>> base64_decode(b) == b64decode(b) + True + >>> base64_decode(c) == b64decode(c) + True + """ + padding = encoded_data.count("=") - r += base64_chars[n1] + base64_chars[n2] + base64_chars[n3] + base64_chars[n4] - i += 3 + # Check if the encoded string contains non base64 characters + if padding: + assert all( + char in B64_CHARSET for char in encoded_data[:-padding] + ), "Invalid base64 character(s) found." + else: + assert all( + char in B64_CHARSET for char in encoded_data + ), "Invalid base64 character(s) found." - return r[0 : len(r) - len(p)] + p + # Check the padding + assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding." + if padding: + # Remove padding if there is one + encoded_data = encoded_data[:-padding] -def decode_base64(text: str) -> str: - r""" - >>> decode_base64('V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ==') - 'WELCOME to base64 encoding 😁' - >>> decode_base64('QcOF4ZCD8JCAj/CfpJM=') - 'AĆ…įƒš€šŸ¤“' - >>> decode_base64("QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUF" - ... "BQUFBQUFBQUFB\r\nQUFB") - 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' - """ - base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - s = "" - - for i in text: - if i in base64_chars: - s += i - c = "" - else: - if i == "=": - c += "=" - - p = "" - if c == "=": - p = "A" + binary_stream = "".join( + bin(B64_CHARSET.index(char))[2:].zfill(6) for char in encoded_data + )[: -padding * 2] else: - if c == "==": - p = "AA" - - r = b"" - s = s + p - - i = 0 - while i < len(s): - n = ( - (base64_chars.index(s[i]) << 18) - + (base64_chars.index(s[i + 1]) << 12) - + (base64_chars.index(s[i + 2]) << 6) - + base64_chars.index(s[i + 3]) + binary_stream = "".join( + bin(B64_CHARSET.index(char))[2:].zfill(6) for char in encoded_data ) - r += bytes([(n >> 16) & 255]) + bytes([(n >> 8) & 255]) + bytes([n & 255]) + data = [ + int(binary_stream[index : index + 8], 2) + for index in range(0, len(binary_stream), 8) + ] - i += 4 - - return str(r[0 : len(r) - len(p)], "utf-8") - - -def main(): - print(encode_base64("WELCOME to base64 encoding 😁")) - print(decode_base64(encode_base64("WELCOME to base64 encoding 😁"))) + return bytes(data) if __name__ == "__main__": - main() + import doctest + + doctest.testmod() From f25693a0b113501c61b2ccb91eb1e4f07e3286dd Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Sun, 22 Nov 2020 12:59:41 +0100 Subject: [PATCH 3/8] import necessary modules inside doctests --- ciphers/base64_encoding.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index 46464a76c7eb..7e80668fa449 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -1,6 +1,3 @@ -# Import the official implementation to check if ours is correct -from base64 import b64encode, b64decode - B64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" @@ -20,6 +17,7 @@ def base64_encode(data: bytes) -> bytes: That being said, Base64 encoding can be used in Steganography to hide data in these appended digits. + >>> from base64 import b64encode >>> a = b"This pull request is part of Hacktoberfest20!" >>> b = b"https://tools.ietf.org/html/rfc4648" >>> c = b"A" @@ -63,6 +61,7 @@ def base64_decode(encoded_data: str) -> bytes: would have a binary stream whose length is multiple of 8, the last step is to convert every 8 bits to a byte. + >>> from base64 import b64decode >>> a = "VGhpcyBwdWxsIHJlcXVlc3QgaXMgcGFydCBvZiBIYWNrdG9iZXJmZXN0MjAh" >>> b = "aHR0cHM6Ly90b29scy5pZXRmLm9yZy9odG1sL3JmYzQ2NDg=" >>> c = "QQ==" From 62f053d3cf6b6884fb18ce79d308e0a7e30d686c Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Wed, 25 Nov 2020 11:37:34 +0100 Subject: [PATCH 4/8] make it behave like the official implementation --- ciphers/base64_encoding.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index 7e80668fa449..fb6ea3d85925 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -27,8 +27,18 @@ def base64_encode(data: bytes) -> bytes: True >>> base64_encode(c) == b64encode(c) True + >>> base64_encode("abc") + Traceback (most recent call last): + ... + TypeError: a bytes-like object is required, not 'str' """ - binary_stream = "".join(bin(char)[2:].zfill(8) for char in data) + # Make sure the supplied data is a bytes-like object + if not isinstance(data, bytes): + raise TypeError( + "a bytes-like object is required, not '{}'".format(data.__class__.__name__) + ) + + binary_stream = "".join(bin(byte)[2:].zfill(8) for byte in data) padding_needed = len(binary_stream) % 6 != 0 @@ -71,7 +81,27 @@ def base64_decode(encoded_data: str) -> bytes: True >>> base64_decode(c) == b64decode(c) True + >>> base64_decode("abc") + Traceback (most recent call last): + ... + AssertionError: Incorrect padding """ + # Make sure encoded_data is either a string or a bytes-like object + if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str): + raise TypeError( + "argument should be a bytes-like object or ASCII string, not '{}'".format( + encoded_data.__class__.__name__ + ) + ) + + # In case encoded_data is a bytes-like object, make sure it contains only + # ASCII characters so we convert it to a string object + if isinstance(encoded_data, bytes): + try: + encoded_data = encoded_data.decode("utf-8") + except UnicodeDecodeError: + raise ValueError("base64 encoded data should only contain ASCII characters") + padding = encoded_data.count("=") # Check if the encoded string contains non base64 characters @@ -85,7 +115,7 @@ def base64_decode(encoded_data: str) -> bytes: ), "Invalid base64 character(s) found." # Check the padding - assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding." + assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding" if padding: # Remove padding if there is one From 21cc76ab71a5ce6aa387b42a0c38c11f46282e05 Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Wed, 25 Nov 2020 11:47:11 +0100 Subject: [PATCH 5/8] replace format with f-string where possible --- ciphers/base64_encoding.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index fb6ea3d85925..9feaacfdc082 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -35,7 +35,7 @@ def base64_encode(data: bytes) -> bytes: # Make sure the supplied data is a bytes-like object if not isinstance(data, bytes): raise TypeError( - "a bytes-like object is required, not '{}'".format(data.__class__.__name__) + f"a bytes-like object is required, not '{data.__class__.__name__}'" ) binary_stream = "".join(bin(byte)[2:].zfill(8) for byte in data) From 1862289beae09df129e42058f4dcc3d148c7c316 Mon Sep 17 00:00:00 2001 From: Hafidh <32499116+hfz1337@users.noreply.github.com> Date: Wed, 25 Nov 2020 12:43:15 +0100 Subject: [PATCH 6/8] replace format with f-string Co-authored-by: Christian Clauss --- ciphers/base64_encoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index 9feaacfdc082..d5f185c928de 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -89,8 +89,8 @@ def base64_decode(encoded_data: str) -> bytes: # Make sure encoded_data is either a string or a bytes-like object if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str): raise TypeError( - "argument should be a bytes-like object or ASCII string, not '{}'".format( - encoded_data.__class__.__name__ + "argument should be a bytes-like object or ASCII string, not " + f"'{encoded_data.__class__.__name__}'" ) ) From 6c1b5177c2e61f7efa321d331fbcc65b811c5cd2 Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Wed, 25 Nov 2020 12:56:25 +0100 Subject: [PATCH 7/8] fix: syntax error due to closing parenthese --- ciphers/base64_encoding.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index 9feaacfdc082..32cf16786a6a 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -89,10 +89,9 @@ def base64_decode(encoded_data: str) -> bytes: # Make sure encoded_data is either a string or a bytes-like object if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str): raise TypeError( - "argument should be a bytes-like object or ASCII string, not '{}'".format( - encoded_data.__class__.__name__ + "argument should be a bytes-like object or ASCII string, not " + f"'{encoded_data.__class__.__name__}'" ) - ) # In case encoded_data is a bytes-like object, make sure it contains only # ASCII characters so we convert it to a string object @@ -140,4 +139,4 @@ def base64_decode(encoded_data: str) -> bytes: if __name__ == "__main__": import doctest - doctest.testmod() + doctest.testmod() \ No newline at end of file From 13d988173b5a8dd25e7f2aa3ada37f4e951212fd Mon Sep 17 00:00:00 2001 From: hfz1337 Date: Wed, 25 Nov 2020 13:00:42 +0100 Subject: [PATCH 8/8] reformat code --- ciphers/base64_encoding.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py index 32cf16786a6a..634afcb89873 100644 --- a/ciphers/base64_encoding.py +++ b/ciphers/base64_encoding.py @@ -91,7 +91,7 @@ def base64_decode(encoded_data: str) -> bytes: raise TypeError( "argument should be a bytes-like object or ASCII string, not " f"'{encoded_data.__class__.__name__}'" - ) + ) # In case encoded_data is a bytes-like object, make sure it contains only # ASCII characters so we convert it to a string object @@ -139,4 +139,4 @@ def base64_decode(encoded_data: str) -> bytes: if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod()