Skip to content

Commit 65e4791

Browse files
Totktonadaligurio
andcommitted
python3: decouple bytes and strings
In Python 2 default string type (`<str>`) is a binary string, non-unicode. We receive data from a socket, from a Popen stream, from a file as a string and operate on those strings without any conversions. Python 3 draws a line here. We usually operate on unicode strings in the code (because this is the default string type, `<str>`), but receive bytes from a socket and a Popen stream. We can use unicode or binary streams for files (unicode by default[^1]). This commit decouples bytes and strings. In most cases it means that we convert data from bytes to a string after receiving from a socket / Popen stream and convert it back from a string to bytes before writting to a socket. Those operations are no-op on Python 2. So, the general rule for our APIs is to accept and return `<str>` disregarding Python version. Not `<bytes>`, not `<unicode>`. The only non-trivial change is around `FilteredStream` and writes into `sys.stdout`. The `FilteredStream` instance replaces `sys.stdout` during execution of a test, so it should follow the usual convention and accept `<str>` in the `write()` method. This is both intuitive and necessary, because `*.py` tests rely on `print('bla bla')` to write into a result file. However the stream should also accept `<bytes>`, because we have a unit test (`unit/json.test`), which produces a binary output, which does not conform UTF-8 encoding. The separate `write_bytes()` method was introduced for this sake. UnittestServer and AppServer write tests output as bytes directly, TarantoolServer rely on the usual string output. We also use bytes directly, when write from one stream to another one: in `app_server.py` for stderr (writting to a log file), in `tarantool_server.py` for log destination property (because it is the destination for Popen). [^1]: Technically it depends on a system locale, but, hey, does anyone see a non UTF-8 locale after the millennium? Part of #20 Co-authored-by: Sergey Bronnikov <[email protected]>
1 parent f4fc259 commit 65e4791

7 files changed

+81
-34
lines changed

lib/admin_connection.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
from lib.tarantool_connection import TarantoolPool
2929
from lib.tarantool_connection import TarantoolAsyncConnection
3030

31+
from lib.utils import bytes_to_str
32+
from lib.utils import str_to_bytes
3133

3234
ADMIN_SEPARATOR = '\n'
3335

@@ -36,13 +38,13 @@ def get_handshake(sock, length=128, max_try=100):
3638
"""
3739
Correct way to get tarantool handshake
3840
"""
39-
result = ""
41+
result = b""
4042
i = 0
4143
while len(result) != length and i < max_try:
42-
result = "%s%s" % (result, sock.recv(length-len(result)))
44+
result = b"%s%s" % (result, sock.recv(length-len(result)))
4345
# max_try counter for tarantool/gh-1362
4446
i += 1
45-
return result
47+
return bytes_to_str(result)
4648

4749

4850
class AdminPool(TarantoolPool):
@@ -61,12 +63,12 @@ def _new_connection(self):
6163

6264
class ExecMixIn(object):
6365
def cmd(self, socket, cmd, silent):
64-
socket.sendall(cmd)
66+
socket.sendall(str_to_bytes(cmd))
6567

6668
bufsiz = 4096
6769
res = ""
6870
while True:
69-
buf = socket.recv(bufsiz)
71+
buf = bytes_to_str(socket.recv(bufsiz))
7072
if not buf:
7173
break
7274
res = res + buf

lib/app_server.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def run_server(execs, cwd, server, logfile, retval):
3838
timer.start()
3939
stdout, stderr = server.process.communicate()
4040
timer.cancel()
41-
sys.stdout.write(stdout)
42-
with open(logfile, 'a') as f:
41+
sys.stdout.write_bytes(stdout)
42+
with open(logfile, 'ab') as f:
4343
f.write(stderr)
4444
retval['returncode'] = server.process.wait()
4545
server.process = None

lib/inspector.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
from gevent.lock import Semaphore
77
from gevent.server import StreamServer
88

9+
from lib.utils import bytes_to_str
910
from lib.utils import find_port
1011
from lib.utils import prefix_each_line
12+
from lib.utils import str_to_bytes
1113
from lib.colorer import color_stdout
1214
from lib.colorer import color_log
1315
from lib.colorer import qa_notice
@@ -77,7 +79,7 @@ def readline(socket, delimiter='\n', size=4096):
7779

7880
while data:
7981
try:
80-
data = socket.recv(size)
82+
data = bytes_to_str(socket.recv(size))
8183
except IOError:
8284
# catch instance halt connection refused errors
8385
data = ''
@@ -119,7 +121,7 @@ def handle(self, socket, addr):
119121
color_log("DEBUG: test-run's response for [{}]\n{}\n".format(
120122
line, prefix_each_line(' | ', result)),
121123
schema='test-run command')
122-
socket.sendall(result)
124+
socket.sendall(str_to_bytes(result))
123125

124126
self.sem.release()
125127

lib/tarantool_server.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
from lib.server import Server
3636
from lib.server import DEFAULT_SNAPSHOT_NAME
3737
from lib.test import Test
38+
from lib.utils import bytes_to_str
3839
from lib.utils import find_port
3940
from lib.utils import extract_schema_from_snapshot
4041
from lib.utils import format_process
@@ -94,8 +95,8 @@ def result_file_version(self):
9495
if not os.path.isfile(self.result):
9596
return self.RESULT_FILE_VERSION_DEFAULT
9697

97-
with open(self.result, 'r') as f:
98-
line = f.readline().rstrip('\n')
98+
with open(self.result, 'rb') as f:
99+
line = bytes_to_str(f.readline()).rstrip('\n')
99100

100101
# An empty line or EOF.
101102
if not line:
@@ -577,7 +578,7 @@ def _iproto(self, port):
577578
@property
578579
def log_des(self):
579580
if not hasattr(self, '_log_des'):
580-
self._log_des = open(self.logfile, 'a')
581+
self._log_des = open(self.logfile, 'ab')
581582
return self._log_des
582583

583584
@log_des.deleter
@@ -663,7 +664,7 @@ def __del__(self):
663664
@classmethod
664665
def version(cls):
665666
p = subprocess.Popen([cls.binary, "--version"], stdout=subprocess.PIPE)
666-
version = p.stdout.read().rstrip()
667+
version = bytes_to_str(p.stdout.read()).rstrip()
667668
p.wait()
668669
return version
669670

@@ -1162,7 +1163,7 @@ def test_option_get(self, option_list_str, silent=False):
11621163
cwd=self.vardir,
11631164
stdout=subprocess.PIPE,
11641165
stderr=subprocess.STDOUT).stdout.read()
1165-
return output
1166+
return bytes_to_str(output)
11661167

11671168
def test_option(self, option_list_str):
11681169
print(self.test_option_get(option_list_str))

lib/test.py

+22-19
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,14 @@
99
from functools import partial
1010
from hashlib import md5
1111

12-
try:
13-
# Python 2
14-
from StringIO import StringIO
15-
except ImportError:
16-
# Python 3
17-
from io import StringIO
18-
1912
from lib import Options
2013
from lib.colorer import color_stdout
14+
from lib.utils import assert_bytes
2115
from lib.utils import non_empty_valgrind_logs
2216
from lib.utils import print_tail_n
2317
from lib.utils import print_unidiff as utils_print_unidiff
2418
from lib.utils import safe_makedirs
19+
from lib.utils import str_to_bytes
2520
from lib import pytap13
2621

2722

@@ -48,21 +43,17 @@ def __repr__(self):
4843
class FilteredStream:
4944
"""Helper class to filter .result file output"""
5045
def __init__(self, filename):
51-
#
52-
# always open the output stream in line-buffered mode,
53-
# to see partial results of a failed test
54-
#
55-
self.stream = open(filename, "w+", 1)
46+
self.stream = open(filename, "wb+")
5647
self.filters = []
5748
self.inspector = None
5849

59-
def write(self, fragment):
60-
"""Apply all filters, then write result to the undelrying stream.
61-
Do line-oriented filtering: the fragment doesn't have to represent
62-
just one line."""
63-
fragment_stream = StringIO(fragment)
50+
def write_bytes(self, fragment):
51+
""" The same as ``write()``, but accepts ``<bytes>`` as
52+
input.
53+
"""
54+
assert_bytes(fragment)
6455
skipped = False
65-
for line in fragment_stream:
56+
for line in fragment.splitlines(True):
6657
original_len = len(line.strip())
6758
for pattern, replacement in self.filters:
6859
line = re.sub(pattern, replacement, line)
@@ -73,8 +64,20 @@ def write(self, fragment):
7364
if not skipped:
7465
self.stream.write(line)
7566

67+
def write(self, fragment):
68+
""" Apply all filters, then write result to the underlying
69+
stream.
70+
71+
Do line-oriented filtering: the fragment doesn't have
72+
to represent just one line.
73+
74+
Accepts ``<str>`` as input, just like the standard
75+
``sys.stdout.write()``.
76+
"""
77+
self.write_bytes(str_to_bytes(fragment))
78+
7679
def push_filter(self, pattern, replacement):
77-
self.filters.append([pattern, replacement])
80+
self.filters.append([str_to_bytes(pattern), str_to_bytes(replacement)])
7881

7982
def pop_filter(self):
8083
self.filters.pop()

lib/unittest_server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def execute(self, server):
1616
server.current_test = self
1717
execs = server.prepare_args()
1818
proc = Popen(execs, cwd=server.vardir, stdout=PIPE, stderr=STDOUT)
19-
sys.stdout.write(proc.communicate()[0])
19+
sys.stdout.write_bytes(proc.communicate()[0])
2020

2121

2222
class UnittestServer(Server):

lib/utils.py

+39
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
# Useful for very coarse version differentiation.
2424
PY3 = sys.version_info[0] == 3
25+
PY2 = sys.version_info[0] == 2
2526

2627
if PY3:
2728
string_types = str,
@@ -312,3 +313,41 @@ def extract_schema_from_snapshot(snapshot_path):
312313
if res[0] == 'version':
313314
return res
314315
return None
316+
317+
318+
def assert_bytes(b):
319+
""" Ensure given value is <bytes>.
320+
"""
321+
if type(b) != bytes:
322+
raise ValueError('Internal error: expected {}, got {}: {}'.format(
323+
str(bytes), str(type(b)), repr(b)))
324+
325+
326+
def assert_str(s):
327+
""" Ensure given value is <str>.
328+
"""
329+
if type(s) != str:
330+
raise ValueError('Internal error: expected {}, got {}: {}'.format(
331+
str(str), str(type(s)), repr(s)))
332+
333+
334+
def bytes_to_str(b):
335+
""" Convert <bytes> to <str>.
336+
337+
No-op on Python 2.
338+
"""
339+
assert_bytes(b)
340+
if PY2:
341+
return b
342+
return b.decode('utf-8')
343+
344+
345+
def str_to_bytes(s):
346+
""" Convert <str> to <bytes>.
347+
348+
No-op on Python 2.
349+
"""
350+
assert_str(s)
351+
if PY2:
352+
return s
353+
return s.encode('utf-8')

0 commit comments

Comments
 (0)