From 4040079df1e3c73566386dc3ad8413d17469740b Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Sun, 22 Nov 2020 12:26:24 +0100
Subject: [PATCH 1/8] rename base64_cipher.py to base64_encoding.py

---
 ciphers/{base64_cipher.py => base64_encoding.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename ciphers/{base64_cipher.py => base64_encoding.py} (100%)

diff --git a/ciphers/base64_cipher.py b/ciphers/base64_encoding.py
similarity index 100%
rename from ciphers/base64_cipher.py
rename to ciphers/base64_encoding.py

From 2f9c0fc83273b99f8bb12cdc2cd12105fb2acf5e Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Sun, 22 Nov 2020 12:27:02 +0100
Subject: [PATCH 2/8] edit base64_encoding.py

---
 ciphers/base64_encoding.py | 173 +++++++++++++++++++++----------------
 1 file changed, 99 insertions(+), 74 deletions(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index 1dbe74a20fe7..46464a76c7eb 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -1,89 +1,114 @@
-def encode_base64(text: str) -> str:
-    r"""
-    >>> encode_base64('WELCOME to base64 encoding 😁')
-    'V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ=='
-    >>> encode_base64('AÅᐃ𐀏🤓')
-    'QcOF4ZCD8JCAj/CfpJM='
-    >>> encode_base64('A'*60)
-    'QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFB\r\nQUFB'
+# Import the official implementation to check if ours is correct
+from base64 import b64encode, b64decode
+
+B64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+
+
+def base64_encode(data: bytes) -> bytes:
+    """Encodes data according to RFC4648.
+
+    The data is first transformed to binary and appended with binary digits so that its
+    length becomes a multiple of 6, then each 6 binary digits will match a character in
+    the B64_CHARSET string. The number of appended binary digits would later determine
+    how many "=" sign should be added, the padding.
+    For every 2 binary digits added, a "=" sign is added in the output.
+    We can add any binary digits to make it a multiple of 6, for instance, consider the
+    following example:
+    "AA" -> 0010100100101001 -> 001010 010010 1001
+    As can be seen above, 2 more binary digits should be added, so there's 4
+    possibilities here: 00, 01, 10 or 11.
+    That being said, Base64 encoding can be used in Steganography to hide data in these
+    appended digits.
+
+    >>> a = b"This pull request is part of Hacktoberfest20!"
+    >>> b = b"https://tools.ietf.org/html/rfc4648"
+    >>> c = b"A"
+    >>> base64_encode(a) == b64encode(a)
+    True
+    >>> base64_encode(b) == b64encode(b)
+    True
+    >>> base64_encode(c) == b64encode(c)
+    True
     """
-    base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+    binary_stream = "".join(bin(char)[2:].zfill(8) for char in data)
 
-    byte_text = bytes(text, "utf-8")  # put text in bytes for unicode support
-    r = ""  # the result
-    c = -len(byte_text) % 3  # the length of padding
-    p = "=" * c  # the padding
-    s = byte_text + b"\x00" * c  # the text to encode
+    padding_needed = len(binary_stream) % 6 != 0
 
-    i = 0
-    while i < len(s):
-        if i > 0 and ((i / 3 * 4) % 76) == 0:
-            r = r + "\r\n"  # for unix newline, put "\n"
+    if padding_needed:
+        # The padding that will be added later
+        padding = b"=" * ((6 - len(binary_stream) % 6) // 2)
 
-        n = (s[i] << 16) + (s[i + 1] << 8) + s[i + 2]
-
-        n1 = (n >> 18) & 63
-        n2 = (n >> 12) & 63
-        n3 = (n >> 6) & 63
-        n4 = n & 63
+        # Append binary_stream with arbitrary binary digits (0's by default) to make its
+        # length a multiple of 6.
+        binary_stream += "0" * (6 - len(binary_stream) % 6)
+    else:
+        padding = b""
+
+    # Encode every 6 binary digits to their corresponding Base64 character
+    return (
+        "".join(
+            B64_CHARSET[int(binary_stream[index : index + 6], 2)]
+            for index in range(0, len(binary_stream), 6)
+        ).encode()
+        + padding
+    )
+
+
+def base64_decode(encoded_data: str) -> bytes:
+    """Decodes data according to RFC4648.
+
+    This does the reverse operation of base64_encode.
+    We first transform the encoded data back to a binary stream, take off the
+    previously appended binary digits according to the padding, at this point we
+    would have a binary stream whose length is multiple of 8, the last step is
+    to convert every 8 bits to a byte.
+
+    >>> a = "VGhpcyBwdWxsIHJlcXVlc3QgaXMgcGFydCBvZiBIYWNrdG9iZXJmZXN0MjAh"
+    >>> b = "aHR0cHM6Ly90b29scy5pZXRmLm9yZy9odG1sL3JmYzQ2NDg="
+    >>> c = "QQ=="
+    >>> base64_decode(a) == b64decode(a)
+    True
+    >>> base64_decode(b) == b64decode(b)
+    True
+    >>> base64_decode(c) == b64decode(c)
+    True
+    """
+    padding = encoded_data.count("=")
 
-        r += base64_chars[n1] + base64_chars[n2] + base64_chars[n3] + base64_chars[n4]
-        i += 3
+    # Check if the encoded string contains non base64 characters
+    if padding:
+        assert all(
+            char in B64_CHARSET for char in encoded_data[:-padding]
+        ), "Invalid base64 character(s) found."
+    else:
+        assert all(
+            char in B64_CHARSET for char in encoded_data
+        ), "Invalid base64 character(s) found."
 
-    return r[0 : len(r) - len(p)] + p
+    # Check the padding
+    assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding."
 
+    if padding:
+        # Remove padding if there is one
+        encoded_data = encoded_data[:-padding]
 
-def decode_base64(text: str) -> str:
-    r"""
-    >>> decode_base64('V0VMQ09NRSB0byBiYXNlNjQgZW5jb2Rpbmcg8J+YgQ==')
-    'WELCOME to base64 encoding 😁'
-    >>> decode_base64('QcOF4ZCD8JCAj/CfpJM=')
-    'AÅᐃ𐀏🤓'
-    >>> decode_base64("QUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUF"
-    ...               "BQUFBQUFBQUFB\r\nQUFB")
-    'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
-    """
-    base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
-    s = ""
-
-    for i in text:
-        if i in base64_chars:
-            s += i
-            c = ""
-        else:
-            if i == "=":
-                c += "="
-
-    p = ""
-    if c == "=":
-        p = "A"
+        binary_stream = "".join(
+            bin(B64_CHARSET.index(char))[2:].zfill(6) for char in encoded_data
+        )[: -padding * 2]
     else:
-        if c == "==":
-            p = "AA"
-
-    r = b""
-    s = s + p
-
-    i = 0
-    while i < len(s):
-        n = (
-            (base64_chars.index(s[i]) << 18)
-            + (base64_chars.index(s[i + 1]) << 12)
-            + (base64_chars.index(s[i + 2]) << 6)
-            + base64_chars.index(s[i + 3])
+        binary_stream = "".join(
+            bin(B64_CHARSET.index(char))[2:].zfill(6) for char in encoded_data
         )
 
-        r += bytes([(n >> 16) & 255]) + bytes([(n >> 8) & 255]) + bytes([n & 255])
+    data = [
+        int(binary_stream[index : index + 8], 2)
+        for index in range(0, len(binary_stream), 8)
+    ]
 
-        i += 4
-
-    return str(r[0 : len(r) - len(p)], "utf-8")
-
-
-def main():
-    print(encode_base64("WELCOME to base64 encoding 😁"))
-    print(decode_base64(encode_base64("WELCOME to base64 encoding 😁")))
+    return bytes(data)
 
 
 if __name__ == "__main__":
-    main()
+    import doctest
+
+    doctest.testmod()

From f25693a0b113501c61b2ccb91eb1e4f07e3286dd Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Sun, 22 Nov 2020 12:59:41 +0100
Subject: [PATCH 3/8] import necessary modules inside doctests

---
 ciphers/base64_encoding.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index 46464a76c7eb..7e80668fa449 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -1,6 +1,3 @@
-# Import the official implementation to check if ours is correct
-from base64 import b64encode, b64decode
-
 B64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
 
 
@@ -20,6 +17,7 @@ def base64_encode(data: bytes) -> bytes:
     That being said, Base64 encoding can be used in Steganography to hide data in these
     appended digits.
 
+    >>> from base64 import b64encode
     >>> a = b"This pull request is part of Hacktoberfest20!"
     >>> b = b"https://tools.ietf.org/html/rfc4648"
     >>> c = b"A"
@@ -63,6 +61,7 @@ def base64_decode(encoded_data: str) -> bytes:
     would have a binary stream whose length is multiple of 8, the last step is
     to convert every 8 bits to a byte.
 
+    >>> from base64 import b64decode
     >>> a = "VGhpcyBwdWxsIHJlcXVlc3QgaXMgcGFydCBvZiBIYWNrdG9iZXJmZXN0MjAh"
     >>> b = "aHR0cHM6Ly90b29scy5pZXRmLm9yZy9odG1sL3JmYzQ2NDg="
     >>> c = "QQ=="

From 62f053d3cf6b6884fb18ce79d308e0a7e30d686c Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Wed, 25 Nov 2020 11:37:34 +0100
Subject: [PATCH 4/8] make it behave like the official implementation

---
 ciphers/base64_encoding.py | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index 7e80668fa449..fb6ea3d85925 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -27,8 +27,18 @@ def base64_encode(data: bytes) -> bytes:
     True
     >>> base64_encode(c) == b64encode(c)
     True
+    >>> base64_encode("abc")
+    Traceback (most recent call last):
+      ...
+    TypeError: a bytes-like object is required, not 'str'
     """
-    binary_stream = "".join(bin(char)[2:].zfill(8) for char in data)
+    # Make sure the supplied data is a bytes-like object
+    if not isinstance(data, bytes):
+        raise TypeError(
+            "a bytes-like object is required, not '{}'".format(data.__class__.__name__)
+        )
+
+    binary_stream = "".join(bin(byte)[2:].zfill(8) for byte in data)
 
     padding_needed = len(binary_stream) % 6 != 0
 
@@ -71,7 +81,27 @@ def base64_decode(encoded_data: str) -> bytes:
     True
     >>> base64_decode(c) == b64decode(c)
     True
+    >>> base64_decode("abc")
+    Traceback (most recent call last):
+      ...
+    AssertionError: Incorrect padding
     """
+    # Make sure encoded_data is either a string or a bytes-like object
+    if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str):
+        raise TypeError(
+            "argument should be a bytes-like object or ASCII string, not '{}'".format(
+                encoded_data.__class__.__name__
+            )
+        )
+
+    # In case encoded_data is a bytes-like object, make sure it contains only
+    # ASCII characters so we convert it to a string object
+    if isinstance(encoded_data, bytes):
+        try:
+            encoded_data = encoded_data.decode("utf-8")
+        except UnicodeDecodeError:
+            raise ValueError("base64 encoded data should only contain ASCII characters")
+
     padding = encoded_data.count("=")
 
     # Check if the encoded string contains non base64 characters
@@ -85,7 +115,7 @@ def base64_decode(encoded_data: str) -> bytes:
         ), "Invalid base64 character(s) found."
 
     # Check the padding
-    assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding."
+    assert len(encoded_data) % 4 == 0 and padding < 3, "Incorrect padding"
 
     if padding:
         # Remove padding if there is one

From 21cc76ab71a5ce6aa387b42a0c38c11f46282e05 Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Wed, 25 Nov 2020 11:47:11 +0100
Subject: [PATCH 5/8] replace format with f-string where possible

---
 ciphers/base64_encoding.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index fb6ea3d85925..9feaacfdc082 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -35,7 +35,7 @@ def base64_encode(data: bytes) -> bytes:
     # Make sure the supplied data is a bytes-like object
     if not isinstance(data, bytes):
         raise TypeError(
-            "a bytes-like object is required, not '{}'".format(data.__class__.__name__)
+            f"a bytes-like object is required, not '{data.__class__.__name__}'"
         )
 
     binary_stream = "".join(bin(byte)[2:].zfill(8) for byte in data)

From 1862289beae09df129e42058f4dcc3d148c7c316 Mon Sep 17 00:00:00 2001
From: Hafidh <32499116+hfz1337@users.noreply.github.com>
Date: Wed, 25 Nov 2020 12:43:15 +0100
Subject: [PATCH 6/8] replace format with f-string

Co-authored-by: Christian Clauss <cclauss@me.com>
---
 ciphers/base64_encoding.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index 9feaacfdc082..d5f185c928de 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -89,8 +89,8 @@ def base64_decode(encoded_data: str) -> bytes:
     # Make sure encoded_data is either a string or a bytes-like object
     if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str):
         raise TypeError(
-            "argument should be a bytes-like object or ASCII string, not '{}'".format(
-                encoded_data.__class__.__name__
+            "argument should be a bytes-like object or ASCII string, not "
+            f"'{encoded_data.__class__.__name__}'"
             )
         )
 

From 6c1b5177c2e61f7efa321d331fbcc65b811c5cd2 Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Wed, 25 Nov 2020 12:56:25 +0100
Subject: [PATCH 7/8] fix: syntax error due to closing parenthese

---
 ciphers/base64_encoding.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index 9feaacfdc082..32cf16786a6a 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -89,10 +89,9 @@ def base64_decode(encoded_data: str) -> bytes:
     # Make sure encoded_data is either a string or a bytes-like object
     if not isinstance(encoded_data, bytes) and not isinstance(encoded_data, str):
         raise TypeError(
-            "argument should be a bytes-like object or ASCII string, not '{}'".format(
-                encoded_data.__class__.__name__
+            "argument should be a bytes-like object or ASCII string, not "
+            f"'{encoded_data.__class__.__name__}'"
             )
-        )
 
     # In case encoded_data is a bytes-like object, make sure it contains only
     # ASCII characters so we convert it to a string object
@@ -140,4 +139,4 @@ def base64_decode(encoded_data: str) -> bytes:
 if __name__ == "__main__":
     import doctest
 
-    doctest.testmod()
+    doctest.testmod()
\ No newline at end of file

From 13d988173b5a8dd25e7f2aa3ada37f4e951212fd Mon Sep 17 00:00:00 2001
From: hfz1337 <gh_zouahi@esi.dz>
Date: Wed, 25 Nov 2020 13:00:42 +0100
Subject: [PATCH 8/8] reformat code

---
 ciphers/base64_encoding.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ciphers/base64_encoding.py b/ciphers/base64_encoding.py
index 32cf16786a6a..634afcb89873 100644
--- a/ciphers/base64_encoding.py
+++ b/ciphers/base64_encoding.py
@@ -91,7 +91,7 @@ def base64_decode(encoded_data: str) -> bytes:
         raise TypeError(
             "argument should be a bytes-like object or ASCII string, not "
             f"'{encoded_data.__class__.__name__}'"
-            )
+        )
 
     # In case encoded_data is a bytes-like object, make sure it contains only
     # ASCII characters so we convert it to a string object
@@ -139,4 +139,4 @@ def base64_decode(encoded_data: str) -> bytes:
 if __name__ == "__main__":
     import doctest
 
-    doctest.testmod()
\ No newline at end of file
+    doctest.testmod()