Skip to content

Commit 046c958

Browse files
Totktonadaligurio
andcommitted
python3: decouple bytes and strings
In Python 2 default string type (`<str>`) is a binary string, non-unicode. We receive data from a socket, from a Popen stream, from a file as a string and operate on those strings without any conversions. Python 3 draws a line here. We usually operate on unicode strings in the code (because this is the default string type, `<str>`), but receive bytes from a socket and a Popen stream. We can use unicode or binary streams for files (unicode by default[^1]). This commit decouples bytes and strings. In most cases it means that we convert data from bytes to a string after receiving from a socket / Popen stream and convert it back from a string to bytes before writting to a socket. Those operations are no-op on Python 2. So, the general rule for our APIs is to accept and return `<str>` disregarding Python version. Not `<bytes>`, not `<unicode>`. The only non-trivial change is around `FilteredStream` and writes into `sys.stdout`. The `FilteredStream` instance replaces `sys.stdout` during execution of a test, so it should follow the usual convention and accept `<str>` in the `write()` method. This is both intuitive and necessary, because `*.py` tests rely on `print('bla bla')` to write into a result file. However the stream should also accept `<bytes>`, because we have a unit test (`unit/json.test`), which produces a binary output, which does not conform UTF-8 encoding. The separate `write_bytes()` method was introduced for this sake. UnittestServer and AppServer write tests output as bytes directly, TarantoolServer rely on the usual string output. We also use bytes directly, when write from one stream to another one: in `app_server.py` for stderr (writting to a log file), in `tarantool_server.py` for log destination property (because it is the destination for Popen). [^1]: Technically it depends on a system locale, but, hey, does anyone see a non UTF-8 locale after the millennium? Part of #20 Co-authored-by: Sergey Bronnikov <[email protected]>
1 parent ca551e2 commit 046c958

7 files changed

+82
-35
lines changed

lib/admin_connection.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
import re
2828
import sys
2929

30+
from .utils import bytes_to_str
31+
from .utils import str_to_bytes
32+
3033
from .tarantool_connection import TarantoolConnection
3134
from .tarantool_connection import TarantoolPool
3235
from .tarantool_connection import TarantoolAsyncConnection
@@ -39,13 +42,13 @@ def get_handshake(sock, length=128, max_try=100):
3942
"""
4043
Correct way to get tarantool handshake
4144
"""
42-
result = ""
45+
result = b""
4346
i = 0
4447
while len(result) != length and i < max_try:
45-
result = "%s%s" % (result, sock.recv(length-len(result)))
48+
result = b"%s%s" % (result, sock.recv(length-len(result)))
4649
# max_try counter for tarantool/gh-1362
4750
i += 1
48-
return result
51+
return bytes_to_str(result)
4952

5053

5154
class AdminPool(TarantoolPool):
@@ -64,12 +67,12 @@ def _new_connection(self):
6467

6568
class ExecMixIn(object):
6669
def cmd(self, socket, cmd, silent):
67-
socket.sendall(cmd)
70+
socket.sendall(str_to_bytes(cmd))
6871

6972
bufsiz = 4096
7073
res = ""
7174
while True:
72-
buf = socket.recv(bufsiz)
75+
buf = bytes_to_str(socket.recv(bufsiz))
7376
if not buf:
7477
break
7578
res = res + buf

lib/app_server.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ def run_server(execs, cwd, server, logfile, retval):
4040
timer.start()
4141
stdout, stderr = server.process.communicate()
4242
timer.cancel()
43-
sys.stdout.write(stdout)
44-
with open(logfile, 'a') as f:
43+
sys.stdout.write_bytes(stdout)
44+
with open(logfile, 'ab') as f:
4545
f.write(stderr)
4646
retval['returncode'] = server.process.wait()
4747
server.process = None

lib/inspector.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
from .utils import find_port
1212
from .utils import prefix_each_line
13+
from .utils import bytes_to_str
14+
from .utils import str_to_bytes
1315
from .colorer import color_stdout
1416
from .colorer import color_log
1517
from .colorer import qa_notice
@@ -79,7 +81,7 @@ def readline(socket, delimiter='\n', size=4096):
7981

8082
while data:
8183
try:
82-
data = socket.recv(size)
84+
data = bytes_to_str(socket.recv(size))
8385
except IOError:
8486
# catch instance halt connection refused errors
8587
data = ''
@@ -121,7 +123,7 @@ def handle(self, socket, addr):
121123
color_log("DEBUG: test-run's response for [{}]\n{}\n".format(
122124
line, prefix_each_line(' | ', result)),
123125
schema='test-run command')
124-
socket.sendall(result)
126+
socket.sendall(str_to_bytes(result))
125127

126128
self.sem.release()
127129

lib/tarantool_server.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from .utils import signame
4646
from .utils import warn_unix_socket
4747
from .utils import prefix_each_line
48+
from .utils import bytes_to_str
4849
from .test import TestRunGreenlet, TestExecutionError
4950

5051

@@ -97,8 +98,8 @@ def result_file_version(self):
9798
if not os.path.isfile(self.result):
9899
return self.RESULT_FILE_VERSION_DEFAULT
99100

100-
with open(self.result, 'r') as f:
101-
line = f.readline().rstrip('\n')
101+
with open(self.result, 'rb') as f:
102+
line = bytes_to_str(f.readline()).rstrip('\n')
102103

103104
# An empty line or EOF.
104105
if not line:
@@ -580,7 +581,7 @@ def _iproto(self, port):
580581
@property
581582
def log_des(self):
582583
if not hasattr(self, '_log_des'):
583-
self._log_des = open(self.logfile, 'a')
584+
self._log_des = open(self.logfile, 'ab')
584585
return self._log_des
585586

586587
@log_des.deleter
@@ -666,7 +667,7 @@ def __del__(self):
666667
@classmethod
667668
def version(cls):
668669
p = subprocess.Popen([cls.binary, "--version"], stdout=subprocess.PIPE)
669-
version = p.stdout.read().rstrip()
670+
version = bytes_to_str(p.stdout.read()).rstrip()
670671
p.wait()
671672
return version
672673

@@ -1165,7 +1166,7 @@ def test_option_get(self, option_list_str, silent=False):
11651166
cwd=self.vardir,
11661167
stdout=subprocess.PIPE,
11671168
stderr=subprocess.STDOUT).stdout.read()
1168-
return output
1169+
return bytes_to_str(output)
11691170

11701171
def test_option(self, option_list_str):
11711172
print(self.test_option_get(option_list_str))

lib/test.py

+22-20
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,14 @@
1010
import traceback
1111
from functools import partial
1212
from hashlib import md5
13-
14-
try:
15-
# Python 2
16-
from StringIO import StringIO
17-
except ImportError:
18-
# Python 3
19-
from io import StringIO
20-
2113
from . import Options
2214
from .colorer import color_stdout
2315
from .utils import non_empty_valgrind_logs
2416
from .utils import print_tail_n
2517
from .utils import print_unidiff as utils_print_unidiff
2618
from .utils import safe_makedirs
19+
from .utils import assert_bytes
20+
from .utils import str_to_bytes
2721

2822
from . import pytap13
2923

@@ -51,21 +45,17 @@ def __repr__(self):
5145
class FilteredStream:
5246
"""Helper class to filter .result file output"""
5347
def __init__(self, filename):
54-
#
55-
# always open the output stream in line-buffered mode,
56-
# to see partial results of a failed test
57-
#
58-
self.stream = open(filename, "w+", 1)
48+
self.stream = open(filename, "wb+")
5949
self.filters = []
6050
self.inspector = None
6151

62-
def write(self, fragment):
63-
"""Apply all filters, then write result to the undelrying stream.
64-
Do line-oriented filtering: the fragment doesn't have to represent
65-
just one line."""
66-
fragment_stream = StringIO(fragment)
52+
def write_bytes(self, fragment):
53+
""" The same as ``write()``, but accepts ``<bytes>`` as
54+
input.
55+
"""
56+
assert_bytes(fragment)
6757
skipped = False
68-
for line in fragment_stream:
58+
for line in fragment.splitlines(True):
6959
original_len = len(line.strip())
7060
for pattern, replacement in self.filters:
7161
line = re.sub(pattern, replacement, line)
@@ -76,8 +66,20 @@ def write(self, fragment):
7666
if not skipped:
7767
self.stream.write(line)
7868

69+
def write(self, fragment):
70+
""" Apply all filters, then write result to the underlying
71+
stream.
72+
73+
Do line-oriented filtering: the fragment doesn't have
74+
to represent just one line.
75+
76+
Accepts ``<str>`` as input, just like the standard
77+
``sys.stdout.write()``.
78+
"""
79+
self.write_bytes(str_to_bytes(fragment))
80+
7981
def push_filter(self, pattern, replacement):
80-
self.filters.append([pattern, replacement])
82+
self.filters.append([str_to_bytes(pattern), str_to_bytes(replacement)])
8183

8284
def pop_filter(self):
8385
self.filters.pop()

lib/unittest_server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def execute(self, server):
1818
server.current_test = self
1919
execs = server.prepare_args()
2020
proc = Popen(execs, cwd=server.vardir, stdout=PIPE, stderr=STDOUT)
21-
sys.stdout.write(proc.communicate()[0])
21+
sys.stdout.write_bytes(proc.communicate()[0])
2222

2323

2424
class UnittestServer(Server):

lib/utils.py

+39
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
# Useful for very coarse version differentiation.
2727
PY3 = sys.version_info[0] == 3
28+
PY2 = sys.version_info[0] == 2
2829

2930
if PY3:
3031
string_types = str,
@@ -315,3 +316,41 @@ def extract_schema_from_snapshot(snapshot_path):
315316
if res[0] == 'version':
316317
return res
317318
return None
319+
320+
321+
def assert_bytes(b):
322+
""" Ensure given value is <bytes>.
323+
"""
324+
if type(b) != bytes:
325+
raise ValueError('Internal error: expected {}, got {}: {}'.format(
326+
str(bytes), str(type(b)), repr(b)))
327+
328+
329+
def assert_str(s):
330+
""" Ensure given value is <str>.
331+
"""
332+
if type(s) != str:
333+
raise ValueError('Internal error: expected {}, got {}: {}'.format(
334+
str(str), str(type(s)), repr(s)))
335+
336+
337+
def bytes_to_str(b):
338+
""" Convert <bytes> to <str>.
339+
340+
No-op on Python 2.
341+
"""
342+
assert_bytes(b)
343+
if PY2:
344+
return b
345+
return b.decode('utf-8')
346+
347+
348+
def str_to_bytes(s):
349+
""" Convert <str> to <bytes>.
350+
351+
No-op on Python 2.
352+
"""
353+
assert_str(s)
354+
if PY2:
355+
return s
356+
return s.encode('utf-8')

0 commit comments

Comments
 (0)