From 2a96074b1311b33d3ef8f3b3d8e3029b02f9d2b7 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Sat, 9 Aug 2014 22:25:57 +0900
Subject: [PATCH] ENH: Data formatting with unicode length

---
 doc/source/options.rst           |  53 +++++
 doc/source/whatsnew/v0.17.0.txt  |  31 +++
 pandas/compat/__init__.py        |  40 ++++
 pandas/core/common.py            |  34 ++-
 pandas/core/config_init.py       |  15 ++
 pandas/core/format.py            | 152 +++++++------
 pandas/core/index.py             |  22 +-
 pandas/tests/test_categorical.py |  29 ++-
 pandas/tests/test_common.py      |  94 ++++++++
 pandas/tests/test_format.py      | 353 ++++++++++++++++++++++++++++++-
 pandas/tests/test_index.py       | 307 ++++++++++++++++++++++++++-
 11 files changed, 1049 insertions(+), 81 deletions(-)

diff --git a/doc/source/options.rst b/doc/source/options.rst
index fb57175f96eaa..46ff2b6e5c343 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -440,3 +440,56 @@ For instance:
    pd.reset_option('^display\.')
 
 To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`.
+
+.. _options.east_asian_width:
+
+Unicode Formatting
+------------------
+
+.. warning::
+
+   Enabling this option will affect the performance for printing of DataFrame and Series (about 2 times slower).
+   Use only when it is actually required.
+
+Some East Asian countries use Unicode characters its width is corresponding to 2 alphabets.
+If DataFrame or Series contains these characters, default output cannot be aligned properly.
+
+.. ipython:: python
+
+   df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']})
+   df
+
+Enable ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property.
+These characters can be aligned properly by checking this property, but it takes longer time than standard ``len`` function.
+
+.. ipython:: python
+
+   pd.set_option('display.unicode.east_asian_width', True)
+   df
+
+In addition, Unicode contains characters which width is "Ambiguous". These character's width should be either 1 or 2 depending on terminal setting or encoding. Because this cannot be distinguished from Python, ``display.unicode.ambiguous_as_wide`` option is added to handle this.
+
+By default, "Ambiguous" character's width, "¡" (inverted exclamation) in below example, is regarded as 1.
+
+.. note::
+
+   This should be aligned properly in terminal which uses monospaced font.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'a': ['xxx', u'¡¡'], 'b': ['yyy', u'¡¡']})
+   df
+
+Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to regard these character's width as 2. Note that this option will be effective only when ``display.unicode.east_asian_width`` is enabled. Confirm starting position has been changed, but not aligned properly because the setting is mismatched with this environment.
+
+.. ipython:: python
+
+   pd.set_option('display.unicode.ambiguous_as_wide', True)
+   df
+
+.. ipython:: python
+   :suppress:
+
+   pd.set_option('display.unicode.east_asian_width', False)
+   pd.set_option('display.unicode.ambiguous_as_wide', False)
+
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 9990d2bd1c78d..59b69d22c3b62 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -49,6 +49,7 @@ Highlights include:
 - Support for reading SAS xport files, see :ref:`here <whatsnew_0170.enhancements.sas_xport>`
 - Documentation comparing SAS to *pandas*, see :ref:`here <compare_with_sas>`
 - Removal of the automatic TimeSeries broadcasting, deprecated since 0.8.0, see :ref:`here <whatsnew_0170.prior_deprecations>`
+- Display format with plain text can optionally align with Unicode East Asian Width, see :ref:`here <whatsnew_0170.east_asian_width>`
 - Compatibility with Python 3.5 (:issue:`11097`)
 - Compatibility with matplotlib 1.5.0 (:issue:`11111`)
 
@@ -334,6 +335,36 @@ Google BigQuery Enhancements
 - The ``generate_bq_schema()`` function is now deprecated and will be removed in a future version (:issue:`11121`)
 - Update the gbq module to support Python 3 (:issue:`11094`).
 
+.. _whatsnew_0170.east_asian_width:
+
+Display Alignemnt with Unicode East Asian Width
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. warning::
+
+   Enabling this option will affect the performance for printing of DataFrame and Series (about 2 times slower).
+   Use only when it is actually required.
+
+Some East Asian countries use Unicode characters its width is corresponding to 2 alphabets. If DataFrame or Series contains these characters, default output cannot be aligned properly. The following options are added to enable precise handling for these characters.
+
+- ``display.unicode.east_asian_width``: Whether to use the Unicode East Asian Width to calculate the display text width. (:issue:`2612`)
+- ``display.unicode.ambiguous_as_wide``: Whether to handle Unicode characters belong to Ambiguous as Wide. (:issue:`11102`)
+
+.. ipython:: python
+
+   df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']})
+   df
+
+   pd.set_option('display.unicode.east_asian_width', True)
+   df
+
+For further details, see :ref:`here <options.east_asian_width>`
+
+.. ipython:: python
+   :suppress:
+
+   pd.set_option('display.unicode.east_asian_width', False)
+
 .. _whatsnew_0170.enhancements.other:
 
 Other enhancements
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index bad7192047e19..ba5114dd7d8ba 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -35,6 +35,7 @@
 from itertools import product
 import sys
 import types
+from unicodedata import east_asian_width
 
 PY2 = sys.version_info[0] == 2
 PY3 = (sys.version_info[0] >= 3)
@@ -90,6 +91,7 @@ def lmap(*args, **kwargs):
 
     def lfilter(*args, **kwargs):
         return list(filter(*args, **kwargs))
+
 else:
     # Python 2
     import re
@@ -176,6 +178,11 @@ class to receive bound method
 # The license for this library can be found in LICENSES/SIX and the code can be
 # found at https://bitbucket.org/gutworth/six
 
+# Definition of East Asian Width
+# http://unicode.org/reports/tr11/
+# Ambiguous width can be changed by option
+_EAW_MAP = {'Na': 1, 'N': 1, 'W': 2, 'F': 2, 'H': 1}
+
 if PY3:
     string_types = str,
     integer_types = int,
@@ -188,6 +195,20 @@ def u(s):
 
     def u_safe(s):
         return s
+
+    def strlen(data, encoding=None):
+        # encoding is for compat with PY2
+        return len(data)
+
+    def east_asian_len(data, encoding=None, ambiguous_width=1):
+        """
+        Calculate display width considering unicode East Asian Width
+        """
+        if isinstance(data, text_type):
+            return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
+        else:
+            return len(data)
+
 else:
     string_types = basestring,
     integer_types = (int, long)
@@ -204,6 +225,25 @@ def u_safe(s):
         except:
             return s
 
+    def strlen(data, encoding=None):
+        try:
+            data = data.decode(encoding)
+        except UnicodeError:
+            pass
+        return len(data)
+
+    def east_asian_len(data, encoding=None, ambiguous_width=1):
+        """
+        Calculate display width considering unicode East Asian Width
+        """
+        if isinstance(data, text_type):
+            try:
+                data = data.decode(encoding)
+            except UnicodeError:
+                pass
+            return sum([_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data])
+        else:
+            return len(data)
 
 string_and_binary_types = string_types + (binary_type,)
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 2d403f904a446..2411925207696 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -2149,21 +2149,33 @@ def _count_not_none(*args):
 
 
 
-def adjoin(space, *lists):
+def adjoin(space, *lists, **kwargs):
     """
     Glues together two sets of strings using the amount of space requested.
     The idea is to prettify.
-    """
+
+    ----------
+    space : int
+        number of spaces for padding
+    lists : str
+        list of str which being joined
+    strlen : callable
+        function used to calculate the length of each str. Needed for unicode
+        handling.
+    justfunc : callable
+        function used to justify str. Needed for unicode handling.
+    """
+    strlen = kwargs.pop('strlen', len)
+    justfunc = kwargs.pop('justfunc', _justify)
+
     out_lines = []
     newLists = []
-    lengths = [max(map(len, x)) + space for x in lists[:-1]]
-
+    lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
     # not the last one
     lengths.append(max(map(len, lists[-1])))
-
     maxLen = max(map(len, lists))
     for i, lst in enumerate(lists):
-        nl = [x.ljust(lengths[i]) for x in lst]
+        nl = justfunc(lst, lengths[i], mode='left')
         nl.extend([' ' * lengths[i]] * (maxLen - len(lst)))
         newLists.append(nl)
     toJoin = zip(*newLists)
@@ -2171,6 +2183,16 @@ def adjoin(space, *lists):
         out_lines.append(_join_unicode(lines))
     return _join_unicode(out_lines, sep='\n')
 
+def _justify(texts, max_len, mode='right'):
+    """
+    Perform ljust, center, rjust against string or list-like
+    """
+    if mode == 'left':
+        return [x.ljust(max_len) for x in texts]
+    elif mode == 'center':
+        return [x.center(max_len) for x in texts]
+    else:
+        return [x.rjust(max_len) for x in texts]
 
 def _join_unicode(lines, sep=''):
     try:
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 03eaa45582bef..751a530ce73cc 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -144,6 +144,17 @@
     Deprecated.
 """
 
+pc_east_asian_width_doc = """
+: boolean
+    Whether to use the Unicode East Asian Width to calculate the display text width
+    Enabling this may affect to the performance (default: False)
+"""
+pc_ambiguous_as_wide_doc = """
+: boolean
+    Whether to handle Unicode characters belong to Ambiguous as Wide (width=2)
+    (default: False)
+"""
+
 pc_line_width_deprecation_warning = """\
 line_width has been deprecated, use display.width instead (currently both are
 identical)
@@ -282,6 +293,10 @@ def mpl_style_cb(key):
                        pc_line_width_doc)
     cf.register_option('memory_usage', True, pc_memory_usage_doc,
                         validator=is_instance_factory([type(None), bool]))
+    cf.register_option('unicode.east_asian_width', False,
+                       pc_east_asian_width_doc, validator=is_bool)
+    cf.register_option('unicode.ambiguous_as_wide', False,
+                       pc_east_asian_width_doc, validator=is_bool)
 
 cf.deprecate_option('display.line_width',
                     msg=pc_line_width_deprecation_warning,
diff --git a/pandas/core/format.py b/pandas/core/format.py
index 0c1a3dbadbd86..5f12abb543513 100644
--- a/pandas/core/format.py
+++ b/pandas/core/format.py
@@ -138,6 +138,7 @@ def __init__(self, series, buf=None, length=True, header=True,
             float_format = get_option("display.float_format")
         self.float_format = float_format
         self.dtype = dtype
+        self.adj = _get_adjustment()
 
         self._chk_truncate()
 
@@ -221,22 +222,24 @@ def to_string(self):
         fmt_index, have_header = self._get_formatted_index()
         fmt_values = self._get_formatted_values()
 
-        maxlen = max(len(x) for x in fmt_index)  # max index len
+        maxlen = max(self.adj.len(x) for x in fmt_index)  # max index len
         pad_space = min(maxlen, 60)
 
         if self.truncate_v:
             n_header_rows = 0
             row_num = self.tr_row_num
-            width = len(fmt_values[row_num-1])
+            width = self.adj.len(fmt_values[row_num-1])
             if width > 3:
                 dot_str = '...'
             else:
                 dot_str = '..'
-            dot_str = dot_str.center(width)
+            # Series uses mode=center because it has single value columns
+            # DataFrame uses mode=left
+            dot_str = self.adj.justify([dot_str], width, mode='center')[0]
             fmt_values.insert(row_num + n_header_rows, dot_str)
             fmt_index.insert(row_num + 1, '')
 
-        result = adjoin(3, *[fmt_index[1:], fmt_values])
+        result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
 
         if self.header and have_header:
             result = fmt_index[0] + '\n' + result
@@ -247,19 +250,54 @@ def to_string(self):
         return compat.text_type(u('').join(result))
 
 
-def _strlen_func():
-    if compat.PY3:  # pragma: no cover
-        _strlen = len
-    else:
-        encoding = get_option("display.encoding")
+class TextAdjustment(object):
+
+    def __init__(self):
+        self.encoding = get_option("display.encoding")
+
+    def len(self, text):
+        return compat.strlen(text, encoding=self.encoding)
+
+    def justify(self, texts, max_len, mode='right'):
+        return com._justify(texts, max_len, mode=mode)
+
+    def adjoin(self, space, *lists, **kwargs):
+        return com.adjoin(space, *lists, strlen=self.len,
+                          justfunc=self.justify, **kwargs)
+
+
+class EastAsianTextAdjustment(TextAdjustment):
+
+    def __init__(self):
+        super(EastAsianTextAdjustment, self).__init__()
+        if get_option("display.unicode.ambiguous_as_wide"):
+            self.ambiguous_width = 2
+        else:
+            self.ambiguous_width = 1
+
+    def len(self, text):
+        return compat.east_asian_len(text, encoding=self.encoding,
+                                     ambiguous_width=self.ambiguous_width)
+
+    def justify(self, texts, max_len, mode='right'):
+        # re-calculate padding space per str considering East Asian Width
+        def _get_pad(t):
+            return max_len - self.len(t) + len(t)
+
+        if mode == 'left':
+            return [x.ljust(_get_pad(x)) for x in texts]
+        elif mode == 'center':
+            return [x.center(_get_pad(x)) for x in texts]
+        else:
+            return [x.rjust(_get_pad(x)) for x in texts]
 
-        def _strlen(x):
-            try:
-                return len(x.decode(encoding))
-            except UnicodeError:
-                return len(x)
 
-    return _strlen
+def _get_adjustment():
+    use_east_asian_width = get_option("display.unicode.east_asian_width")
+    if use_east_asian_width:
+        return EastAsianTextAdjustment()
+    else:
+        return TextAdjustment()
 
 
 class TableFormatter(object):
@@ -338,6 +376,7 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
             self.columns = frame.columns
 
         self._chk_truncate()
+        self.adj = _get_adjustment()
 
     def _chk_truncate(self):
         '''
@@ -414,7 +453,6 @@ def _to_str_columns(self):
         """
         Render a DataFrame to a list of columns (as lists of strings).
         """
-        _strlen = _strlen_func()
         frame = self.tr_frame
 
         # may include levels names also
@@ -427,27 +465,23 @@ def _to_str_columns(self):
             for i, c in enumerate(frame):
                 cheader = str_columns[i]
                 max_colwidth = max(self.col_space or 0,
-                                   *(_strlen(x) for x in cheader))
-
+                                   *(self.adj.len(x) for x in cheader))
                 fmt_values = self._format_col(i)
-
                 fmt_values = _make_fixed_width(fmt_values, self.justify,
-                                               minimum=max_colwidth)
+                                               minimum=max_colwidth,
+                                               adj=self.adj)
 
-                max_len = max(np.max([_strlen(x) for x in fmt_values]),
+                max_len = max(np.max([self.adj.len(x) for x in fmt_values]),
                               max_colwidth)
-                if self.justify == 'left':
-                    cheader = [x.ljust(max_len) for x in cheader]
-                else:
-                    cheader = [x.rjust(max_len) for x in cheader]
-
+                cheader = self.adj.justify(cheader, max_len, mode=self.justify)
                 stringified.append(cheader + fmt_values)
         else:
             stringified = []
             for i, c in enumerate(frame):
                 fmt_values = self._format_col(i)
                 fmt_values = _make_fixed_width(fmt_values, self.justify,
-                                               minimum=(self.col_space or 0))
+                                               minimum=(self.col_space or 0),
+                                               adj=self.adj)
 
                 stringified.append(fmt_values)
 
@@ -461,13 +495,13 @@ def _to_str_columns(self):
 
         if truncate_h:
             col_num = self.tr_col_num
-            col_width = len(strcols[self.tr_size_col][0])  # infer from column header
+            col_width = self.adj.len(strcols[self.tr_size_col][0])  # infer from column header
             strcols.insert(self.tr_col_num + 1, ['...'.center(col_width)] * (len(str_index)))
         if truncate_v:
             n_header_rows = len(str_index) - len(frame)
             row_num = self.tr_row_num
             for ix, col in enumerate(strcols):
-                cwidth = len(strcols[ix][row_num])  # infer from above row
+                cwidth = self.adj.len(strcols[ix][row_num])  # infer from above row
                 is_dot_col = False
                 if truncate_h:
                     is_dot_col = ix == col_num + 1
@@ -477,13 +511,13 @@ def _to_str_columns(self):
                     my_str = '..'
 
                 if ix == 0:
-                    dot_str = my_str.ljust(cwidth)
+                    dot_mode = 'left'
                 elif is_dot_col:
-                    cwidth = len(strcols[self.tr_size_col][0])
-                    dot_str = my_str.center(cwidth)
+                    cwidth = self.adj.len(strcols[self.tr_size_col][0])
+                    dot_mode = 'center'
                 else:
-                    dot_str = my_str.rjust(cwidth)
-
+                    dot_mode = 'right'
+                dot_str = self.adj.justify([my_str], cwidth, mode=dot_mode)[0]
                 strcols[ix].insert(row_num + n_header_rows, dot_str)
         return strcols
 
@@ -492,6 +526,7 @@ def to_string(self):
         Render a DataFrame to a console-friendly tabular output.
         """
         from pandas import Series
+
         frame = self.frame
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
@@ -503,11 +538,11 @@ def to_string(self):
         else:
             strcols = self._to_str_columns()
             if self.line_width is None:  # no need to wrap around just print the whole frame
-                text = adjoin(1, *strcols)
+                text = self.adj.adjoin(1, *strcols)
             elif not isinstance(self.max_cols, int) or self.max_cols > 0:  # need to wrap around
                 text = self._join_multiline(*strcols)
             else:  # max_cols == 0. Try to fit frame to terminal
-                text = adjoin(1, *strcols).split('\n')
+                text = self.adj.adjoin(1, *strcols).split('\n')
                 row_lens = Series(text).apply(len)
                 max_len_col_ix = np.argmax(row_lens)
                 max_len = row_lens[max_len_col_ix]
@@ -535,7 +570,7 @@ def to_string(self):
                 # and then generate string representation
                 self._chk_truncate()
                 strcols = self._to_str_columns()
-                text = adjoin(1, *strcols)
+                text = self.adj.adjoin(1, *strcols)
 
         self.buf.writelines(text)
 
@@ -549,9 +584,9 @@ def _join_multiline(self, *strcols):
         strcols = list(strcols)
         if self.index:
             idx = strcols.pop(0)
-            lwidth -= np.array([len(x) for x in idx]).max() + adjoin_width
+            lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
 
-        col_widths = [np.array([len(x) for x in col]).max()
+        col_widths = [np.array([self.adj.len(x) for x in col]).max()
                       if len(col) > 0 else 0
                       for col in strcols]
         col_bins = _binify(col_widths, lwidth)
@@ -572,8 +607,7 @@ def _join_multiline(self, *strcols):
                     row.append([' \\'] + ['  '] * (nrows - 1))
                 else:
                     row.append([' '] * nrows)
-
-            str_lst.append(adjoin(adjoin_width, *row))
+            str_lst.append(self.adj.adjoin(adjoin_width, *row))
             st = ed
         return '\n\n'.join(str_lst)
 
@@ -776,11 +810,12 @@ def _get_formatted_index(self, frame):
                                      formatter=fmt)
         else:
             fmt_index = [index.format(name=show_index_names, formatter=fmt)]
-        fmt_index = [tuple(_make_fixed_width(
-            list(x), justify='left', minimum=(self.col_space or 0)))
-            for x in fmt_index]
+        fmt_index = [tuple(_make_fixed_width(list(x), justify='left',
+                                             minimum=(self.col_space or 0),
+                                             adj=self.adj))
+                     for x in fmt_index]
 
-        adjoined = adjoin(1, *fmt_index).split('\n')
+        adjoined = self.adj.adjoin(1, *fmt_index).split('\n')
 
         # empty space for columns
         if show_col_names:
@@ -2222,13 +2257,16 @@ def _formatter(x):
     return _formatter
 
 
-def _make_fixed_width(strings, justify='right', minimum=None):
+def _make_fixed_width(strings, justify='right', minimum=None,
+                      adj=None):
+
     if len(strings) == 0 or justify == 'all':
         return strings
 
-    _strlen = _strlen_func()
+    if adj is None:
+        adj = _get_adjustment()
 
-    max_len = np.max([_strlen(x) for x in strings])
+    max_len = np.max([adj.len(x) for x in strings])
 
     if minimum is not None:
         max_len = max(minimum, max_len)
@@ -2237,22 +2275,14 @@ def _make_fixed_width(strings, justify='right', minimum=None):
     if conf_max is not None and max_len > conf_max:
         max_len = conf_max
 
-    if justify == 'left':
-        justfunc = lambda self, x: self.ljust(x)
-    else:
-        justfunc = lambda self, x: self.rjust(x)
-
     def just(x):
-        eff_len = max_len
-
         if conf_max is not None:
-            if (conf_max > 3) & (_strlen(x) > max_len):
-                x = x[:eff_len - 3] + '...'
-
-        return justfunc(x, eff_len)
-
-    result = [just(x) for x in strings]
+            if (conf_max > 3) & (adj.len(x) > max_len):
+                x = x[:max_len - 3] + '...'
+        return x
 
+    strings = [just(x) for x in strings]
+    result = adj.justify(strings, max_len, mode=justify)
     return result
 
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
index d64a20fc9563c..1daa0e1b52d02 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -488,7 +488,7 @@ def _format_data(self):
         """
         Return the formatted data as a unicode string
         """
-        from pandas.core.format import get_console_size
+        from pandas.core.format import get_console_size, _get_adjustment
         display_width, _ = get_console_size()
         if display_width is None:
             display_width = get_option('display.width') or 80
@@ -502,14 +502,19 @@ def _format_data(self):
         formatter = self._formatter_func
 
         # do we want to justify (only do so for non-objects)
-        is_justify = not (self.inferred_type == 'string' or self.inferred_type == 'categorical' and is_object_dtype(self.categories))
+        is_justify = not (self.inferred_type in ('string', 'unicode') or
+                          (self.inferred_type == 'categorical' and
+                           is_object_dtype(self.categories)))
 
         # are we a truncated display
         is_truncated = n > max_seq_items
 
+        # adj can optionaly handle unicode eastern asian width
+        adj = _get_adjustment()
+
         def _extend_line(s, line, value, display_width, next_line_prefix):
 
-            if len(line.rstrip()) + len(value.rstrip()) >= display_width:
+            if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
                 s += line.rstrip()
                 line = next_line_prefix
             line += value
@@ -517,7 +522,7 @@ def _extend_line(s, line, value, display_width, next_line_prefix):
 
         def best_len(values):
             if values:
-                return max([len(x) for x in values])
+                return max([adj.len(x) for x in values])
             else:
                 return 0
 
@@ -556,8 +561,10 @@ def best_len(values):
                 word = head[i] + sep + ' '
                 summary, line = _extend_line(summary, line, word,
                                              display_width, space2)
+
             if is_truncated:
-                summary += line + space2 + '...'
+                # remove trailing space of last line
+                summary += line.rstrip() + space2 + '...'
                 line = space2
 
             for i in range(len(tail)-1):
@@ -4501,8 +4508,11 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
                                       start=int(names),
                                       sentinel=sentinel)
 
+
         if adjoin:
-            return com.adjoin(space, *result_levels).split('\n')
+            from pandas.core.format import  _get_adjustment
+            adj = _get_adjustment()
+            return adj.adjoin(space, *result_levels).split('\n')
         else:
             return result_levels
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 9173c0a87f6c2..e97010e1cb552 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -2,7 +2,7 @@
 # pylint: disable=E1101,E1103,W0232
 
 from datetime import datetime
-from pandas.compat import range, lrange, u
+from pandas.compat import range, lrange, u, PY3
 import os
 import pickle
 import re
@@ -534,6 +534,33 @@ def test_print_none_width(self):
         with option_context("display.width", None):
             self.assertEqual(exp, repr(a))
 
+    def test_unicode_print(self):
+        if PY3:
+            _rep = repr
+        else:
+            _rep = unicode
+
+        c = pd.Categorical(['aaaaa', 'bb', 'cccc'] * 20)
+        expected = u"""[aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc]
+Length: 60
+Categories (3, object): [aaaaa, bb, cccc]"""
+        self.assertEqual(_rep(c), expected)
+
+        c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
+        expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
+Length: 60
+Categories (3, object): [ああああ, いいいいい, ううううううう]"""
+        self.assertEqual(_rep(c), expected)
+
+        # unicode option should not affect to Categorical, as it doesn't care the repr width
+        with option_context('display.unicode.east_asian_width', True):
+
+            c = pd.Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20)
+            expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう]
+Length: 60
+Categories (3, object): [ああああ, いいいいい, ううううううう]"""
+            self.assertEqual(_rep(c), expected)
+
     def test_periodindex(self):
         idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
                             '2014-03', '2014-03'], freq='M')
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
index c488d22da7dfe..003fd134cf210 100644
--- a/pandas/tests/test_common.py
+++ b/pandas/tests/test_common.py
@@ -14,6 +14,7 @@
 from pandas.core.common import notnull, isnull, array_equivalent
 import pandas.core.common as com
 import pandas.core.convert as convert
+import pandas.core.format as fmt
 import pandas.util.testing as tm
 import pandas.core.config as cf
 
@@ -332,6 +333,99 @@ def test_adjoin():
     assert(adjoined == expected)
 
 
+
+class TestFormattBase(tm.TestCase):
+
+    def test_adjoin(self):
+        data = [['a', 'b', 'c'],
+                ['dd', 'ee', 'ff'],
+                ['ggg', 'hhh', 'iii']]
+        expected = 'a  dd  ggg\nb  ee  hhh\nc  ff  iii'
+
+        adjoined = com.adjoin(2, *data)
+
+        self.assertEqual(adjoined, expected)
+
+    def test_adjoin_unicode(self):
+        data = [[u'あ', 'b', 'c'],
+                ['dd', u'ええ', 'ff'],
+                ['ggg', 'hhh', u'いいい']]
+        expected = u'あ  dd  ggg\nb  ええ  hhh\nc  ff  いいい'
+        adjoined = com.adjoin(2, *data)
+        self.assertEqual(adjoined, expected)
+
+        adj = fmt.EastAsianTextAdjustment()
+
+        expected = u"""あ  dd    ggg
+b   ええ  hhh
+c   ff    いいい"""
+        adjoined = adj.adjoin(2, *data)
+        self.assertEqual(adjoined, expected)
+        cols = adjoined.split('\n')
+        self.assertEqual(adj.len(cols[0]), 13)
+        self.assertEqual(adj.len(cols[1]), 13)
+        self.assertEqual(adj.len(cols[2]), 16)
+
+        expected = u"""あ       dd         ggg
+b        ええ       hhh
+c        ff         いいい"""
+        adjoined = adj.adjoin(7, *data)
+        self.assertEqual(adjoined, expected)
+        cols = adjoined.split('\n')
+        self.assertEqual(adj.len(cols[0]), 23)
+        self.assertEqual(adj.len(cols[1]), 23)
+        self.assertEqual(adj.len(cols[2]), 26)
+
+    def test_justify(self):
+        adj = fmt.EastAsianTextAdjustment()
+
+        def just(x, *args, **kwargs):
+            # wrapper to test single str
+            return adj.justify([x], *args, **kwargs)[0]
+
+        self.assertEqual(just('abc', 5, mode='left'), 'abc  ')
+        self.assertEqual(just('abc', 5, mode='center'), ' abc ')
+        self.assertEqual(just('abc', 5, mode='right'), '  abc')
+        self.assertEqual(just(u'abc', 5, mode='left'), 'abc  ')
+        self.assertEqual(just(u'abc', 5, mode='center'), ' abc ')
+        self.assertEqual(just(u'abc', 5, mode='right'), '  abc')
+
+        self.assertEqual(just(u'パンダ', 5, mode='left'), u'パンダ')
+        self.assertEqual(just(u'パンダ', 5, mode='center'), u'パンダ')
+        self.assertEqual(just(u'パンダ', 5, mode='right'), u'パンダ')
+
+        self.assertEqual(just(u'パンダ', 10, mode='left'), u'パンダ    ')
+        self.assertEqual(just(u'パンダ', 10, mode='center'), u'  パンダ  ')
+        self.assertEqual(just(u'パンダ', 10, mode='right'), u'    パンダ')
+
+    def test_east_asian_len(self):
+        adj = fmt.EastAsianTextAdjustment()
+
+        self.assertEqual(adj.len('abc'), 3)
+        self.assertEqual(adj.len(u'abc'), 3)
+
+        self.assertEqual(adj.len(u'パンダ'), 6)
+        self.assertEqual(adj.len(u'ﾊﾟﾝﾀﾞ'), 5)
+        self.assertEqual(adj.len(u'パンダpanda'), 11)
+        self.assertEqual(adj.len(u'ﾊﾟﾝﾀﾞpanda'), 10)
+
+
+    def test_ambiguous_width(self):
+        adj = fmt.EastAsianTextAdjustment()
+        self.assertEqual(adj.len(u'¡¡ab'), 4)
+
+        with cf.option_context('display.unicode.ambiguous_as_wide', True):
+            adj = fmt.EastAsianTextAdjustment()
+            self.assertEqual(adj.len(u'¡¡ab'), 6)
+
+        data = [[u'あ', 'b', 'c'],
+                ['dd', u'ええ', 'ff'],
+                ['ggg', u'¡¡ab', u'いいい']]
+        expected = u'あ  dd    ggg \nb   ええ  ¡¡ab\nc   ff    いいい'
+        adjoined = adj.adjoin(2, *data)
+        self.assertEqual(adjoined, expected)
+
+
 def test_iterpairs():
     data = [1, 2, 3, 4]
     expected = [(1, 2),
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
index 58c365029a694..b5220c8cb2706 100644
--- a/pandas/tests/test_format.py
+++ b/pandas/tests/test_format.py
@@ -162,10 +162,10 @@ def test_repr_truncation(self):
             r = repr(df)
             r = r[r.find('\n') + 1:]
 
-            _strlen = fmt._strlen_func()
+            adj = fmt._get_adjustment()
 
             for line, value in lzip(r.split('\n'), df['B']):
-                if _strlen(value) + 1 > max_len:
+                if adj.len(value) + 1 > max_len:
                     self.assertIn('...', line)
                 else:
                     self.assertNotIn('...', line)
@@ -438,6 +438,209 @@ def test_to_string_with_formatters_unicode(self):
         self.assertEqual(result, u('  c/\u03c3\n') +
                                  '0   1\n1   2\n2   3')
 
+    def test_east_asian_unicode_frame(self):
+        if PY3:
+            _rep = repr
+        else:
+            _rep = unicode
+
+        # not alighned properly because of east asian width
+
+        # mid col
+        df = DataFrame({'a': [u'あ', u'いいい', u'う', u'ええええええ'],
+                        'b': [1, 222, 33333, 4]},
+                       index=['a', 'bb', 'c', 'ddd'])
+        expected = (u"          a      b\na         あ      1\n"
+                    u"bb      いいい    222\nc         う  33333\n"
+                    u"ddd  ええええええ      4")
+        self.assertEqual(_rep(df), expected)
+
+        # last col
+        df = DataFrame({'a': [1, 222, 33333, 4],
+                        'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                       index=['a', 'bb', 'c', 'ddd'])
+        expected = (u"         a       b\na        1       あ\n"
+                    u"bb     222     いいい\nc    33333       う\n"
+                    u"ddd      4  ええええええ")
+        self.assertEqual(_rep(df), expected)
+
+        # all col
+        df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                        'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                       index=['a', 'bb', 'c', 'ddd'])
+        expected = (u"         a       b\na    あああああ       あ\n"
+                    u"bb       い     いいい\nc        う       う\n"
+                    u"ddd    えええ  ええええええ")
+        self.assertEqual(_rep(df), expected)
+
+        # column name
+        df = DataFrame({u'あああああ': [1, 222, 33333, 4],
+                        'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                       index=['a', 'bb', 'c', 'ddd'])
+        expected = (u"          b  あああああ\na         あ      1\n"
+                    u"bb      いいい    222\nc         う  33333\n"
+                    u"ddd  ええええええ      4")
+        self.assertEqual(_rep(df), expected)
+
+        # index
+        df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                        'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                       index=[u'あああ', u'いいいいいい', u'うう', u'え'])
+        expected = (u"            a       b\nあああ     あああああ       あ\n"
+                    u"いいいいいい      い     いいい\nうう          う       う\n"
+                    u"え         えええ  ええええええ")
+        self.assertEqual(_rep(df), expected)
+
+        # index name
+        df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                        'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                       index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお'))
+        expected = (u"          a       b\nおおおお               \nあ     あああああ       あ\n"
+                    u"い         い     いいい\nうう        う       う\nえ       えええ  ええええええ")
+        self.assertEqual(_rep(df), expected)
+
+        # all
+        df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'],
+                        u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']},
+                       index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お'))
+        expected = (u"       あああ いいいいい\nお               \nあ      あああ     あ\n"
+                    u"いいい      い   いいい\nうう       う     う\nえ    えええええ    ええ")
+        self.assertEqual(_rep(df), expected)
+
+        # MultiIndex
+        idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'),
+                                         (u'おおお', u'かかかか'), (u'き', u'くく')])
+        df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                        'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx)
+        expected = (u"              a       b\nあ   いい    あああああ       あ\n"
+                    u"う   え         い     いいい\nおおお かかかか      う       う\n"
+                    u"き   くく      えええ  ええええええ")
+        self.assertEqual(_rep(df), expected)
+
+        # truncate
+        with option_context('display.max_rows', 3, 'display.max_columns', 3):
+            df = pd.DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                               'b': [u'あ', u'いいい', u'う', u'ええええええ'],
+                               'c': [u'お', u'か', u'ききき', u'くくくくくく'],
+                               u'ああああ': [u'さ', u'し', u'す', u'せ']},
+                              columns=['a', 'b', 'c', u'ああああ'])
+
+            expected = (u"        a ...  ああああ\n0   あああああ ...     さ\n"
+                        u"..    ... ...   ...\n3     えええ ...     せ\n"
+                        u"\n[4 rows x 4 columns]")
+            self.assertEqual(_rep(df), expected)
+
+            df.index = [u'あああ', u'いいいい',  u'う', 'aaa']
+            expected = (u"         a ...  ああああ\nあああ  あああああ ...     さ\n"
+                        u"..     ... ...   ...\naaa    えええ ...     せ\n"
+                        u"\n[4 rows x 4 columns]")
+            self.assertEqual(_rep(df), expected)
+
+        # Emable Unicode option -----------------------------------------
+        with option_context('display.unicode.east_asian_width', True):
+
+            # mid col
+            df = DataFrame({'a': [u'あ', u'いいい', u'う', u'ええええええ'],
+                          'b': [1, 222, 33333, 4]},
+                         index=['a', 'bb', 'c', 'ddd'])
+            expected = (u"                a      b\na              あ      1\n"
+                        u"bb         いいい    222\nc              う  33333\n"
+                        u"ddd  ええええええ      4")
+            self.assertEqual(_rep(df), expected)
+
+            # last col
+            df = DataFrame({'a': [1, 222, 33333, 4],
+                          'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                         index=['a', 'bb', 'c', 'ddd'])
+            expected = (u"         a             b\na        1            あ\n"
+                        u"bb     222        いいい\nc    33333            う\n"
+                        u"ddd      4  ええええええ")
+            self.assertEqual(_rep(df), expected)
+
+            # all col
+            df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                          'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                         index=['a', 'bb', 'c', 'ddd'])
+            expected = (u"              a             b\na    あああああ            あ\n"
+                        u"bb           い        いいい\nc            う            う\n"
+                        u"ddd      えええ  ええええええ""")
+            self.assertEqual(_rep(df), expected)
+
+            # column name
+            df = DataFrame({u'あああああ': [1, 222, 33333, 4],
+                          'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                         index=['a', 'bb', 'c', 'ddd'])
+            expected = (u"                b  あああああ\na              あ           1\n"
+                        u"bb         いいい         222\nc              う       33333\n"
+                        u"ddd  ええええええ           4")
+            self.assertEqual(_rep(df), expected)
+
+            # index
+            df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                            'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                           index=[u'あああ', u'いいいいいい', u'うう', u'え'])
+            expected = (u"                       a             b\nあああ        あああああ            あ\n"
+                        u"いいいいいい          い        いいい\nうう                  う            う\n"
+                        u"え                えええ  ええええええ")
+            self.assertEqual(_rep(df), expected)
+
+            # index name
+            df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                            'b': [u'あ', u'いいい', u'う', u'ええええええ']},
+                           index=pd.Index([u'あ', u'い', u'うう', u'え'], name=u'おおおお'))
+            expected = (u"                   a             b\nおおおお                          \n"
+                        u"あ        あああああ            あ\nい                い        いいい\n"
+                        u"うう              う            う\nえ            えええ  ええええええ")
+            self.assertEqual(_rep(df), expected)
+
+            # all
+            df = DataFrame({u'あああ': [u'あああ', u'い', u'う', u'えええええ'],
+                            u'いいいいい': [u'あ', u'いいい', u'う', u'ええ']},
+                           index=pd.Index([u'あ', u'いいい', u'うう', u'え'], name=u'お'))
+            expected = (u"            あああ いいいいい\nお                           \n"
+                        u"あ          あああ         あ\nいいい          い     いいい\n"
+                        u"うう            う         う\nえ      えええええ       ええ")
+            self.assertEqual(_rep(df), expected)
+
+            # MultiIndex
+            idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'),
+                                             (u'おおお', u'かかかか'), (u'き', u'くく')])
+            df = DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                            'b': [u'あ', u'いいい', u'う', u'ええええええ']}, index=idx)
+            expected = (u"                          a             b\nあ     いい      あああああ            あ\n"
+                        u"う     え                い        いいい\nおおお かかかか          う            う\n"
+                        u"き     くく          えええ  ええええええ")
+            self.assertEqual(_rep(df), expected)
+
+            # truncate
+            with option_context('display.max_rows', 3, 'display.max_columns', 3):
+
+                df = pd.DataFrame({'a': [u'あああああ', u'い', u'う', u'えええ'],
+                                   'b': [u'あ', u'いいい', u'う', u'ええええええ'],
+                                   'c': [u'お', u'か', u'ききき', u'くくくくくく'],
+                                   u'ああああ': [u'さ', u'し', u'す', u'せ']},
+                                  columns=['a', 'b', 'c', u'ああああ'])
+
+                expected = (u"             a   ...    ああああ\n0   あああああ   ...          さ\n"
+                            u"..         ...   ...         ...\n3       えええ   ...          せ\n"
+                            u"\n[4 rows x 4 columns]")
+                self.assertEqual(_rep(df), expected)
+
+                df.index = [u'あああ', u'いいいい',  u'う', 'aaa']
+                expected = (u"                 a   ...    ああああ\nあああ  あああああ   ...          さ\n"
+                            u"...            ...   ...         ...\naaa         えええ   ...          せ\n"
+                            u"\n[4 rows x 4 columns]")
+                self.assertEqual(_rep(df), expected)
+
+            # ambiguous unicode
+            df = DataFrame({u'あああああ': [1, 222, 33333, 4],
+                          'b': [u'あ', u'いいい', u'¡¡', u'ええええええ']},
+                         index=['a', 'bb', 'c', '¡¡¡'])
+            expected = (u"                b  あああああ\na              あ           1\n"
+                        u"bb         いいい         222\nc              ¡¡       33333\n"
+                        u"¡¡¡  ええええええ           4")
+            self.assertEqual(_rep(df), expected)
+
     def test_to_string_buffer_all_unicode(self):
         buf = StringIO()
 
@@ -895,10 +1098,6 @@ def test_to_html_regression_GH6098(self):
         # it works
         df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_()
 
-
-
-
-
     def test_to_html_truncate(self):
         raise nose.SkipTest("unreliable on travis")
         index = pd.DatetimeIndex(start='20010101',freq='D',periods=20)
@@ -2888,6 +3087,148 @@ def test_unicode_name_in_footer(self):
         sf = fmt.SeriesFormatter(s, name=u('\u05e2\u05d1\u05e8\u05d9\u05ea'))
         sf._get_footer()  # should not raise exception
 
+    def test_east_asian_unicode_series(self):
+        if PY3:
+            _rep = repr
+        else:
+            _rep = unicode
+        # not alighned properly because of east asian width
+
+        # unicode index
+        s = Series(['a', 'bb', 'CCC', 'D'],
+                   index=[u'あ', u'いい', u'ううう', u'ええええ'])
+        expected = (u"あ         a\nいい       bb\nううう     CCC\n"
+                    u"ええええ      D\ndtype: object")
+        self.assertEqual(_rep(s), expected)
+
+        # unicode values
+        s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=['a', 'bb', 'c', 'ddd'])
+        expected = (u"a         あ\nbb       いい\nc       ううう\n"
+                    u"ddd    ええええ\ndtype: object")
+        self.assertEqual(_rep(s), expected)
+
+        # both
+        s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
+                   index=[u'ああ', u'いいいい', u'う', u'えええ'])
+        expected = (u"ああ         あ\nいいいい      いい\nう        ううう\n"
+                    u"えええ     ええええ\ndtype: object")
+        self.assertEqual(_rep(s), expected)
+
+        # unicode footer
+        s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
+                   index=[u'ああ', u'いいいい', u'う', u'えええ'],
+                   name=u'おおおおおおお')
+        expected = (u"ああ         あ\nいいいい      いい\nう        ううう\n"
+                    u"えええ     ええええ\nName: おおおおおおお, dtype: object")
+        self.assertEqual(_rep(s), expected)
+
+        # MultiIndex
+        idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'),
+                                         (u'おおお', u'かかかか'), (u'き', u'くく')])
+        s = Series([1, 22, 3333, 44444], index=idx)
+        expected = (u"あ    いい          1\nう    え          22\nおおお  かかかか     3333\n"
+                    u"き    くく      44444\ndtype: int64")
+        self.assertEqual(_rep(s), expected)
+
+        # object dtype, shorter than unicode repr
+        s = Series([1, 22, 3333, 44444], index=[1, 'AB', np.nan, u'あああ'])
+        expected = (u"1          1\nAB        22\nNaN     3333\n"
+                    u"あああ    44444\ndtype: int64")
+        self.assertEqual(_rep(s), expected)
+
+        # object dtype, longer than unicode repr
+        s = Series([1, 22, 3333, 44444],
+                   index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ'])
+        expected = (u"1                          1\nAB                        22\n"
+                    u"2011-01-01 00:00:00     3333\nあああ                    44444\ndtype: int64")
+        self.assertEqual(_rep(s), expected)
+
+        # truncate
+        with option_context('display.max_rows', 3):
+            s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
+                       name=u'おおおおおおお')
+
+            expected = (u"0       あ\n     ... \n"
+                        u"3    ええええ\nName: おおおおおおお, dtype: object")
+            self.assertEqual(_rep(s), expected)
+
+            s.index = [u'ああ', u'いいいい', u'う', u'えええ']
+            expected = (u"ああ        あ\n       ... \n"
+                        u"えええ    ええええ\nName: おおおおおおお, dtype: object")
+            self.assertEqual(_rep(s), expected)
+
+        # Emable Unicode option -----------------------------------------
+        with option_context('display.unicode.east_asian_width', True):
+
+            # unicode index
+            s = Series(['a', 'bb', 'CCC', 'D'],
+                       index=[u'あ', u'いい', u'ううう', u'ええええ'])
+            expected = (u"あ            a\nいい         bb\nううう      CCC\n"
+                        u"ええええ      D\ndtype: object")
+            self.assertEqual(_rep(s), expected)
+
+            # unicode values
+            s = Series([u'あ', u'いい', u'ううう', u'ええええ'], index=['a', 'bb', 'c', 'ddd'])
+            expected = (u"a            あ\nbb         いい\nc        ううう\n"
+                        u"ddd    ええええ\ndtype: object")
+            self.assertEqual(_rep(s), expected)
+
+            # both
+            s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
+                       index=[u'ああ', u'いいいい', u'う', u'えええ'])
+            expected = (u"ああ              あ\nいいいい        いい\nう            ううう\n"
+                        u"えええ      ええええ\ndtype: object")
+            self.assertEqual(_rep(s), expected)
+
+            # unicode footer
+            s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
+                       index=[u'ああ', u'いいいい', u'う', u'えええ'],
+                       name=u'おおおおおおお')
+            expected = (u"ああ              あ\nいいいい        いい\nう            ううう\n"
+                        u"えええ      ええええ\nName: おおおおおおお, dtype: object")
+            self.assertEqual(_rep(s), expected)
+
+            # MultiIndex
+            idx = pd.MultiIndex.from_tuples([(u'あ', u'いい'), (u'う', u'え'),
+                                             (u'おおお', u'かかかか'), (u'き', u'くく')])
+            s = Series([1, 22, 3333, 44444], index=idx)
+            expected = (u"あ      いい            1\nう      え             22\nおおお  かかかか     3333\n"
+                        u"き      くく        44444\ndtype: int64")
+            self.assertEqual(_rep(s), expected)
+
+            # object dtype, shorter than unicode repr
+            s = Series([1, 22, 3333, 44444], index=[1, 'AB', np.nan, u'あああ'])
+            expected = (u"1             1\nAB           22\nNaN        3333\n"
+                        u"あああ    44444\ndtype: int64")
+            self.assertEqual(_rep(s), expected)
+
+            # object dtype, longer than unicode repr
+            s = Series([1, 22, 3333, 44444],
+                       index=[1, 'AB', pd.Timestamp('2011-01-01'), u'あああ'])
+            expected = (u"1                          1\nAB                        22\n"
+                        u"2011-01-01 00:00:00     3333\nあああ                 44444\ndtype: int64")
+            self.assertEqual(_rep(s), expected)
+
+            # truncate
+            with option_context('display.max_rows', 3):
+                s = Series([u'あ', u'いい', u'ううう', u'ええええ'],
+                           name=u'おおおおおおお')
+                expected = (u"0          あ\n       ...   \n"
+                            u"3    ええええ\nName: おおおおおおお, dtype: object")
+                self.assertEqual(_rep(s), expected)
+
+                s.index = [u'ああ', u'いいいい', u'う', u'えええ']
+                expected = (u"ああ            あ\n            ...   \n"
+                            u"えええ    ええええ\nName: おおおおおおお, dtype: object")
+                self.assertEqual(_rep(s), expected)
+
+            # ambiguous unicode
+            s = Series([u'¡¡', u'い¡¡', u'ううう', u'ええええ'],
+                       index=[u'ああ', u'¡¡¡¡いい', u'¡¡', u'えええ'])
+            expected = (u"ああ              ¡¡\n¡¡¡¡いい        い¡¡\n¡¡            ううう\n"
+                        u"えええ      ええええ\ndtype: object")
+            self.assertEqual(_rep(s), expected)
+
     def test_float_trim_zeros(self):
         vals = [2.08430917305e+10, 3.52205017305e+10, 2.30674817305e+10,
                 2.03954217305e+10, 5.59897817305e+10]
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 75daabe2dab67..81ebc7efdbdd9 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -2,7 +2,7 @@
 # pylint: disable=E1101,E1103,W0232
 
 from datetime import datetime, timedelta, time
-from pandas.compat import range, lrange, lzip, u, zip
+from pandas.compat import range, lrange, lzip, u, zip, PY3
 import operator
 import re
 import nose
@@ -1842,6 +1842,137 @@ def test_conversion_preserves_name(self):
         self.assertEqual(i.name, pd.to_datetime(i).name)
         self.assertEqual(i.name, pd.to_timedelta(i).name)
 
+    def test_string_index_repr(self):
+        # py3/py2 repr can differ because of "u" prefix
+        # which also affects to displayed element size
+
+        # short
+        idx = pd.Index(['a', 'bb', 'ccc'])
+        if PY3:
+            expected = u"""Index(['a', 'bb', 'ccc'], dtype='object')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""Index([u'a', u'bb', u'ccc'], dtype='object')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # multiple lines
+        idx = pd.Index(['a', 'bb', 'ccc'] * 10)
+        if PY3:
+            expected = u"""Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc',
+       'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc',
+       'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
+      dtype='object')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a',
+       u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb',
+       u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'],
+      dtype='object')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # truncated
+        idx = pd.Index(['a', 'bb', 'ccc'] * 100)
+        if PY3:
+            expected = u"""Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
+       ...
+       'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
+      dtype='object', length=300)"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""Index([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a',
+       ...
+       u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'],
+      dtype='object', length=300)"""
+            self.assertEqual(unicode(idx), expected)
+
+        # short
+        idx = pd.Index([u'あ', u'いい', u'ううう'])
+        if PY3:
+            expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # multiple lines
+        idx = pd.Index([u'あ', u'いい', u'ううう'] * 10)
+        if PY3:
+            expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+       'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+       'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+      dtype='object')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+       u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+       u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'],
+      dtype='object')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # truncated
+        idx = pd.Index([u'あ', u'いい', u'ううう'] * 100)
+        if PY3:
+            expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
+       ...
+       'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+      dtype='object', length=300)"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+       ...
+       u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'],
+      dtype='object', length=300)"""
+            self.assertEqual(unicode(idx), expected)
+
+        # Emable Unicode option -----------------------------------------
+        with cf.option_context('display.unicode.east_asian_width', True):
+
+            # short
+            idx = pd.Index([u'あ', u'いい', u'ううう'])
+            if PY3:
+                expected = u"""Index(['あ', 'いい', 'ううう'], dtype='object')"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""Index([u'あ', u'いい', u'ううう'], dtype='object')"""
+                self.assertEqual(unicode(idx), expected)
+
+            # multiple lines
+            idx = pd.Index([u'あ', u'いい', u'ううう'] * 10)
+            if PY3:
+                expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+       'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+       'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+       'あ', 'いい', 'ううう'],
+      dtype='object')"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+       u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+       u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+       u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'],
+      dtype='object')"""
+                self.assertEqual(unicode(idx), expected)
+
+            # truncated
+            idx = pd.Index([u'あ', u'いい', u'ううう'] * 100)
+            if PY3:
+                expected = u"""Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+       'あ',
+       ...
+       'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+       'ううう'],
+      dtype='object', length=300)"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""Index([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+       u'ううう', u'あ',
+       ...
+       u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+       u'いい', u'ううう'],
+      dtype='object', length=300)"""
+                self.assertEqual(unicode(idx), expected)
+
+
 class TestCategoricalIndex(Base, tm.TestCase):
     _holder = CategoricalIndex
 
@@ -2211,6 +2342,180 @@ def test_equals(self):
         self.assertFalse(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b']).equals(list('aabca')))
         self.assertTrue(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b']).equals(list('aabca') + [np.nan]))
 
+    def test_string_categorical_index_repr(self):
+        # short
+        idx = pd.CategoricalIndex(['a', 'bb', 'ccc'])
+        if PY3:
+            expected = u"""CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'a', u'bb', u'ccc'], categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # multiple lines
+        idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 10)
+        if PY3:
+            expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
+                  'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
+                  'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
+                 categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb',
+                  u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a',
+                  u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc',
+                  u'a', u'bb', u'ccc', u'a', u'bb', u'ccc'],
+                 categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # truncated
+        idx = pd.CategoricalIndex(['a', 'bb', 'ccc'] * 100)
+        if PY3:
+            expected = u"""CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
+                  ...
+                  'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
+                 categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a', u'bb',
+                  u'ccc', u'a',
+                  ...
+                  u'ccc', u'a', u'bb', u'ccc', u'a', u'bb', u'ccc', u'a',
+                  u'bb', u'ccc'],
+                 categories=[u'a', u'bb', u'ccc'], ordered=False, dtype='category', length=300)"""
+            self.assertEqual(unicode(idx), expected)
+
+        # larger categories
+        idx = pd.CategoricalIndex(list('abcdefghijklmmo'))
+        if PY3:
+            expected = u"""CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
+                  'm', 'm', 'o'],
+                 categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', u'i', u'j',
+                  u'k', u'l', u'm', u'm', u'o'],
+                 categories=[u'a', u'b', u'c', u'd', u'e', u'f', u'g', u'h', ...], ordered=False, dtype='category')"""
+
+            self.assertEqual(unicode(idx), expected)
+
+        # short
+        idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'])
+        if PY3:
+            expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # multiple lines
+        idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10)
+        if PY3:
+            expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
+                  'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+                  u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう',
+                  u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう'],
+                 categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # truncated
+        idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100)
+        if PY3:
+            expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
+                  ...
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+                  u'ううう', u'あ',
+                  ...
+                  u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう'],
+                 categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)"""
+            self.assertEqual(unicode(idx), expected)
+
+        # larger categories
+        idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ'))
+        if PY3:
+            expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
+                  'す', 'せ', 'そ'],
+                 categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')"""
+            self.assertEqual(repr(idx), expected)
+        else:
+            expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', u'け', u'こ',
+                  u'さ', u'し', u'す', u'せ', u'そ'],
+                 categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')"""
+            self.assertEqual(unicode(idx), expected)
+
+        # Emable Unicode option -----------------------------------------
+        with cf.option_context('display.unicode.east_asian_width', True):
+
+            # short
+            idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'])
+            if PY3:
+                expected = u"""CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう'], categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')"""
+                self.assertEqual(unicode(idx), expected)
+
+            # multiple lines
+            idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 10)
+            if PY3:
+                expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+                  'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう', u'あ', u'いい', u'ううう'],
+                 categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category')"""
+                self.assertEqual(unicode(idx), expected)
+
+            # truncated
+            idx = pd.CategoricalIndex([u'あ', u'いい', u'ううう'] * 100)
+            if PY3:
+                expected = u"""CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
+                  'ううう', 'あ',
+                  ...
+                  'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
+                  'あ', 'いい', 'ううう'],
+                 categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""CategoricalIndex([u'あ', u'いい', u'ううう', u'あ', u'いい', u'ううう', u'あ',
+                  u'いい', u'ううう', u'あ',
+                  ...
+                  u'ううう', u'あ', u'いい', u'ううう', u'あ', u'いい',
+                  u'ううう', u'あ', u'いい', u'ううう'],
+                 categories=[u'あ', u'いい', u'ううう'], ordered=False, dtype='category', length=300)"""
+                self.assertEqual(unicode(idx), expected)
+
+            # larger categories
+            idx = pd.CategoricalIndex(list(u'あいうえおかきくけこさしすせそ'))
+            if PY3:
+                expected = u"""CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
+                  'さ', 'し', 'す', 'せ', 'そ'],
+                 categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')"""
+                self.assertEqual(repr(idx), expected)
+            else:
+                expected = u"""CategoricalIndex([u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く',
+                  u'け', u'こ', u'さ', u'し', u'す', u'せ', u'そ'],
+                 categories=[u'あ', u'い', u'う', u'え', u'お', u'か', u'き', u'く', ...], ordered=False, dtype='category')"""
+                self.assertEqual(unicode(idx), expected)
+
 
 class Numeric(Base):