Skip to content

Commit bee8d9d

Browse files
committed
CLN: GH11271 move _get_handle, UTF encoders to io.common
1 parent a89b96d commit bee8d9d

File tree

4 files changed

+162
-162
lines changed

4 files changed

+162
-162
lines changed

pandas/core/common.py

+1-151
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import re
66
import collections
77
import numbers
8-
import codecs
9-
import csv
108
import types
119
from datetime import datetime, timedelta
1210
from functools import partial
@@ -19,7 +17,7 @@
1917
import pandas.lib as lib
2018
import pandas.tslib as tslib
2119
from pandas import compat
22-
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems
20+
from pandas.compat import BytesIO, range, long, u, zip, map, string_types, iteritems
2321
from pandas.core.dtypes import CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType
2422
from pandas.core.config import get_option
2523

@@ -2808,154 +2806,6 @@ def _all_none(*args):
28082806
return True
28092807

28102808

2811-
class UTF8Recoder:
2812-
2813-
"""
2814-
Iterator that reads an encoded stream and reencodes the input to UTF-8
2815-
"""
2816-
2817-
def __init__(self, f, encoding):
2818-
self.reader = codecs.getreader(encoding)(f)
2819-
2820-
def __iter__(self):
2821-
return self
2822-
2823-
def read(self, bytes=-1):
2824-
return self.reader.read(bytes).encode('utf-8')
2825-
2826-
def readline(self):
2827-
return self.reader.readline().encode('utf-8')
2828-
2829-
def next(self):
2830-
return next(self.reader).encode("utf-8")
2831-
2832-
# Python 3 iterator
2833-
__next__ = next
2834-
2835-
2836-
def _get_handle(path, mode, encoding=None, compression=None):
2837-
"""Gets file handle for given path and mode.
2838-
NOTE: Under Python 3.2, getting a compressed file handle means reading in
2839-
the entire file, decompressing it and decoding it to ``str`` all at once
2840-
and then wrapping it in a StringIO.
2841-
"""
2842-
if compression is not None:
2843-
if encoding is not None and not compat.PY3:
2844-
msg = 'encoding + compression not yet supported in Python 2'
2845-
raise ValueError(msg)
2846-
2847-
if compression == 'gzip':
2848-
import gzip
2849-
f = gzip.GzipFile(path, mode)
2850-
elif compression == 'bz2':
2851-
import bz2
2852-
f = bz2.BZ2File(path, mode)
2853-
else:
2854-
raise ValueError('Unrecognized compression type: %s' %
2855-
compression)
2856-
if compat.PY3:
2857-
from io import TextIOWrapper
2858-
f = TextIOWrapper(f, encoding=encoding)
2859-
return f
2860-
else:
2861-
if compat.PY3:
2862-
if encoding:
2863-
f = open(path, mode, encoding=encoding)
2864-
else:
2865-
f = open(path, mode, errors='replace')
2866-
else:
2867-
f = open(path, mode)
2868-
2869-
return f
2870-
2871-
2872-
if compat.PY3: # pragma: no cover
2873-
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
2874-
# ignore encoding
2875-
return csv.reader(f, dialect=dialect, **kwds)
2876-
2877-
def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
2878-
return csv.writer(f, dialect=dialect, **kwds)
2879-
else:
2880-
class UnicodeReader:
2881-
2882-
"""
2883-
A CSV reader which will iterate over lines in the CSV file "f",
2884-
which is encoded in the given encoding.
2885-
2886-
On Python 3, this is replaced (below) by csv.reader, which handles
2887-
unicode.
2888-
"""
2889-
2890-
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
2891-
f = UTF8Recoder(f, encoding)
2892-
self.reader = csv.reader(f, dialect=dialect, **kwds)
2893-
2894-
def next(self):
2895-
row = next(self.reader)
2896-
return [compat.text_type(s, "utf-8") for s in row]
2897-
2898-
# python 3 iterator
2899-
__next__ = next
2900-
2901-
def __iter__(self): # pragma: no cover
2902-
return self
2903-
2904-
class UnicodeWriter:
2905-
2906-
"""
2907-
A CSV writer which will write rows to CSV file "f",
2908-
which is encoded in the given encoding.
2909-
"""
2910-
2911-
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
2912-
# Redirect output to a queue
2913-
self.queue = StringIO()
2914-
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
2915-
self.stream = f
2916-
self.encoder = codecs.getincrementalencoder(encoding)()
2917-
self.quoting = kwds.get("quoting", None)
2918-
2919-
def writerow(self, row):
2920-
def _check_as_is(x):
2921-
return (self.quoting == csv.QUOTE_NONNUMERIC and
2922-
is_number(x)) or isinstance(x, str)
2923-
2924-
row = [x if _check_as_is(x)
2925-
else pprint_thing(x).encode('utf-8') for x in row]
2926-
2927-
self.writer.writerow([s for s in row])
2928-
# Fetch UTF-8 output from the queue ...
2929-
data = self.queue.getvalue()
2930-
data = data.decode("utf-8")
2931-
# ... and reencode it into the target encoding
2932-
data = self.encoder.encode(data)
2933-
# write to the target stream
2934-
self.stream.write(data)
2935-
# empty queue
2936-
self.queue.truncate(0)
2937-
2938-
def writerows(self, rows):
2939-
def _check_as_is(x):
2940-
return (self.quoting == csv.QUOTE_NONNUMERIC and
2941-
is_number(x)) or isinstance(x, str)
2942-
2943-
for i, row in enumerate(rows):
2944-
rows[i] = [x if _check_as_is(x)
2945-
else pprint_thing(x).encode('utf-8') for x in row]
2946-
2947-
self.writer.writerows([[s for s in row] for row in rows])
2948-
# Fetch UTF-8 output from the queue ...
2949-
data = self.queue.getvalue()
2950-
data = data.decode("utf-8")
2951-
# ... and reencode it into the target encoding
2952-
data = self.encoder.encode(data)
2953-
# write to the target stream
2954-
self.stream.write(data)
2955-
# empty queue
2956-
self.queue.truncate(0)
2957-
2958-
29592809
def get_dtype_kinds(l):
29602810
"""
29612811
Parameters

pandas/core/format.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
OrderedDict)
1414
from pandas.util.terminal import get_terminal_size
1515
from pandas.core.config import get_option, set_option
16+
from pandas.io.common import _get_handle, UnicodeWriter
1617
import pandas.core.common as com
1718
import pandas.lib as lib
1819
from pandas.tslib import iNaT, Timestamp, Timedelta, format_array_from_datetime
@@ -1475,7 +1476,7 @@ def save(self):
14751476
f = self.path_or_buf
14761477
close = False
14771478
else:
1478-
f = com._get_handle(self.path_or_buf, self.mode,
1479+
f = _get_handle(self.path_or_buf, self.mode,
14791480
encoding=self.encoding,
14801481
compression=self.compression)
14811482
close = True
@@ -1488,7 +1489,7 @@ def save(self):
14881489
quotechar=self.quotechar)
14891490
if self.encoding is not None:
14901491
writer_kwargs['encoding'] = self.encoding
1491-
self.writer = com.UnicodeWriter(f, **writer_kwargs)
1492+
self.writer = UnicodeWriter(f, **writer_kwargs)
14921493
else:
14931494
self.writer = csv.writer(f, **writer_kwargs)
14941495

pandas/io/common.py

+148
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,14 @@
22

33
import sys
44
import os
5+
import csv
6+
import codecs
57
import zipfile
68
from contextlib import contextmanager, closing
79

810
from pandas.compat import StringIO, string_types, BytesIO
911
from pandas import compat
12+
from pandas.core.common import pprint_thing, is_number
1013

1114

1215
if compat.PY3:
@@ -284,3 +287,148 @@ def ZipFile(*args, **kwargs):
284287
yield zf
285288
else:
286289
ZipFile = zipfile.ZipFile
290+
291+
292+
def _get_handle(path, mode, encoding=None, compression=None):
293+
"""Gets file handle for given path and mode.
294+
"""
295+
if compression is not None:
296+
if encoding is not None and not compat.PY3:
297+
msg = 'encoding + compression not yet supported in Python 2'
298+
raise ValueError(msg)
299+
300+
if compression == 'gzip':
301+
import gzip
302+
f = gzip.GzipFile(path, mode)
303+
elif compression == 'bz2':
304+
import bz2
305+
f = bz2.BZ2File(path, mode)
306+
else:
307+
raise ValueError('Unrecognized compression type: %s' %
308+
compression)
309+
if compat.PY3:
310+
from io import TextIOWrapper
311+
f = TextIOWrapper(f, encoding=encoding)
312+
return f
313+
else:
314+
if compat.PY3:
315+
if encoding:
316+
f = open(path, mode, encoding=encoding)
317+
else:
318+
f = open(path, mode, errors='replace')
319+
else:
320+
f = open(path, mode)
321+
322+
return f
323+
324+
325+
class UTF8Recoder:
326+
327+
"""
328+
Iterator that reads an encoded stream and reencodes the input to UTF-8
329+
"""
330+
331+
def __init__(self, f, encoding):
332+
self.reader = codecs.getreader(encoding)(f)
333+
334+
def __iter__(self):
335+
return self
336+
337+
def read(self, bytes=-1):
338+
return self.reader.read(bytes).encode("utf-8")
339+
340+
def readline(self):
341+
return self.reader.readline().encode("utf-8")
342+
343+
def next(self):
344+
return next(self.reader).encode("utf-8")
345+
346+
# Python 3 iterator
347+
__next__ = next
348+
349+
350+
if compat.PY3: # pragma: no cover
351+
def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds):
352+
# ignore encoding
353+
return csv.reader(f, dialect=dialect, **kwds)
354+
355+
def UnicodeWriter(f, dialect=csv.excel, encoding="utf-8", **kwds):
356+
return csv.writer(f, dialect=dialect, **kwds)
357+
else:
358+
class UnicodeReader:
359+
360+
"""
361+
A CSV reader which will iterate over lines in the CSV file "f",
362+
which is encoded in the given encoding.
363+
364+
On Python 3, this is replaced (below) by csv.reader, which handles
365+
unicode.
366+
"""
367+
368+
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
369+
f = UTF8Recoder(f, encoding)
370+
self.reader = csv.reader(f, dialect=dialect, **kwds)
371+
372+
def next(self):
373+
row = next(self.reader)
374+
return [compat.text_type(s, "utf-8") for s in row]
375+
376+
# python 3 iterator
377+
__next__ = next
378+
379+
def __iter__(self): # pragma: no cover
380+
return self
381+
382+
class UnicodeWriter:
383+
384+
"""
385+
A CSV writer which will write rows to CSV file "f",
386+
which is encoded in the given encoding.
387+
"""
388+
389+
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
390+
# Redirect output to a queue
391+
self.queue = StringIO()
392+
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
393+
self.stream = f
394+
self.encoder = codecs.getincrementalencoder(encoding)()
395+
self.quoting = kwds.get("quoting", None)
396+
397+
def writerow(self, row):
398+
def _check_as_is(x):
399+
return (self.quoting == csv.QUOTE_NONNUMERIC and
400+
is_number(x)) or isinstance(x, str)
401+
402+
row = [x if _check_as_is(x)
403+
else pprint_thing(x).encode("utf-8") for x in row]
404+
405+
self.writer.writerow([s for s in row])
406+
# Fetch UTF-8 output from the queue ...
407+
data = self.queue.getvalue()
408+
data = data.decode("utf-8")
409+
# ... and reencode it into the target encoding
410+
data = self.encoder.encode(data)
411+
# write to the target stream
412+
self.stream.write(data)
413+
# empty queue
414+
self.queue.truncate(0)
415+
416+
def writerows(self, rows):
417+
def _check_as_is(x):
418+
return (self.quoting == csv.QUOTE_NONNUMERIC and
419+
is_number(x)) or isinstance(x, str)
420+
421+
for i, row in enumerate(rows):
422+
rows[i] = [x if _check_as_is(x)
423+
else pprint_thing(x).encode("utf-8") for x in row]
424+
425+
self.writer.writerows([[s for s in row] for row in rows])
426+
# Fetch UTF-8 output from the queue ...
427+
data = self.queue.getvalue()
428+
data = data.decode("utf-8")
429+
# ... and reencode it into the target encoding
430+
data = self.encoder.encode(data)
431+
# write to the target stream
432+
self.stream.write(data)
433+
# empty queue
434+
self.queue.truncate(0)

0 commit comments

Comments
 (0)