ENH/CLN: redo pprint_warts merge, rebased PR lost the show

y-p · y-p · commit 4aea3f28ca38 · 2013-04-25T14:56:17.000+03:00
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1858,7 +1858,7 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds):
     return fmt % body
 
 
-def _pprint_dict(seq, _nest_lvl=0):
+def _pprint_dict(seq, _nest_lvl=0,**kwds):
     """
     internal. pprinter for iterables. you should probably use pprint_thing()
     rather then calling this directly.
@@ -1871,15 +1871,17 @@ def _pprint_dict(seq, _nest_lvl=0):
     nitems = get_option("max_seq_items") or len(seq)
 
     for k, v in seq.items()[:nitems]:
-        pairs.append(pfmt % (pprint_thing(k,_nest_lvl+1), pprint_thing(v,_nest_lvl+1)))
+        pairs.append(pfmt % (pprint_thing(k,_nest_lvl+1,**kwds),
+                             pprint_thing(v,_nest_lvl+1,**kwds)))
 
     if nitems < len(seq):
         return fmt % (", ".join(pairs) + ", ...")
     else:
         return fmt % ", ".join(pairs)
 
 
-def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False):
+def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
+                 quote_strings=False):
     """
     This function is the sanctioned way of converting objects
     to a unicode representation.
@@ -1904,26 +1906,10 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False):
     result - unicode object on py2, str on py3. Always Unicode.
 
     """
-
-    if thing is None:
-        result = ''
-    elif (py3compat.PY3 and hasattr(thing, '__next__')) or \
-            hasattr(thing, 'next'):
-        return unicode(thing)
-    elif (isinstance(thing, dict) and
-          _nest_lvl < get_option("display.pprint_nest_depth")):
-        result = _pprint_dict(thing, _nest_lvl)
-    elif _is_sequence(thing) and _nest_lvl < \
-            get_option("display.pprint_nest_depth"):
-        result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars)
-    else:
-        # when used internally in the package, everything
-        # should be unicode text. However as an aid to transition
-        # we also accept utf8 encoded strings,
-        # if that's not it either, we have no way of knowing,
-        # and the user should deal with it himself.
-        # we resort to utf-8 with replacing errors, rather then throwing
-        # an exception.
+    def as_escaped_unicode(thing,escape_chars=escape_chars):
+        # Unicode is fine, else we try to decode using utf-8 and 'replace'
+        # if that's not it either, we have no way of knowing and the user
+        #should deal with it himself.
 
         try:
             result = unicode(thing)  # we should try this first
@@ -1946,6 +1932,27 @@ def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False):
         for c in escape_chars:
             result = result.replace(c, translate[c])
 
+        return unicode(result)
+
+    if (py3compat.PY3 and hasattr(thing, '__next__')) or \
+            hasattr(thing, 'next'):
+        return unicode(thing)
+    elif (isinstance(thing, dict) and
+          _nest_lvl < get_option("display.pprint_nest_depth")):
+        result = _pprint_dict(thing, _nest_lvl,quote_strings=True)
+    elif _is_sequence(thing) and _nest_lvl < \
+            get_option("display.pprint_nest_depth"):
+        result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
+                             quote_strings=quote_strings)
+    elif isinstance(thing,basestring) and quote_strings:
+        if py3compat.PY3:
+            fmt = "'%s'"
+        else:
+            fmt = "u'%s'"
+        result = fmt % as_escaped_unicode(thing)
+    else:
+        result = as_escaped_unicode(thing)
+
     return unicode(result)  # always unicode
 
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -175,7 +175,7 @@ def __unicode__(self):
         else:
             data = self.format()
 
-        prepr = com.pprint_thing(data, escape_chars=('\t', '\r', '\n'))
+        prepr = com.pprint_thing(data, escape_chars=('\t', '\r', '\n'),quote_strings=True)
         return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype)
 
     def __repr__(self):
@@ -1506,7 +1506,8 @@ def __unicode__(self):
         else:
             values = self.format()
 
-        summary = com.pprint_thing(values, escape_chars=('\t', '\r', '\n'))
+        summary = com.pprint_thing(values, escape_chars=('\t', '\r', '\n'),
+                                   quote_strings=True)
 
         np.set_printoptions(threshold=options['threshold'])
 
diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py
@@ -288,30 +288,6 @@ def test_ensure_platform_int():
 #         expected = u"\u05d0".encode('utf-8')
 #         assert (result == expected)
 
-
-def test_pprint_thing():
-    if py3compat.PY3:
-        raise nose.SkipTest
-
-    pp_t = com.pprint_thing
-
-    assert(pp_t('a') == u'a')
-    assert(pp_t(u'a') == u'a')
-    assert(pp_t(None) == '')
-    assert(pp_t(u'\u05d0') == u'\u05d0')
-    assert(pp_t((u'\u05d0', u'\u05d1')) == u'(\u05d0, \u05d1)')
-    assert(pp_t((u'\u05d0', (u'\u05d1', u'\u05d2'))) ==
-           u'(\u05d0, (\u05d1, \u05d2))')
-    assert(pp_t(('foo', u'\u05d0', (u'\u05d0', u'\u05d0'))) ==
-           u'(foo, \u05d0, (\u05d0, \u05d0))')
-
-    # escape embedded tabs in string
-    # GH #2038
-    assert not "\t" in pp_t("a\tb", escape_chars=("\t",))
-
-    assert(pp_t((1,)) == u'(1,)')
-    assert("set" in pp_t(set([1,2,3]))) # it works
-
 class TestTake(unittest.TestCase):
 
     _multiprocess_can_split_ = True
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -16,7 +16,8 @@
 import numpy as np
 
 from pandas import DataFrame, Series, Index
-from pandas.util.py3compat import lzip
+from pandas.util.py3compat import lzip, PY3
+
 import pandas.core.format as fmt
 import pandas.util.testing as tm
 from pandas.util.terminal import get_terminal_size
@@ -136,6 +137,14 @@ def test_repr_obeys_max_seq_limit(self):
         with option_context("display.max_seq_items",5):
             self.assertTrue(len(com.pprint_thing(range(1000)))< 100)
 
+    def test_repr_is_valid_construction_code(self):
+        import pandas as pd
+
+        # for the case of Index, where the repr is traditional rather then stylized
+        idx = pd.Index(['a','b'])
+        res = eval("pd."+repr(idx))
+        tm.assert_series_equal(Series(res),Series(idx))
+
     def test_repr_should_return_str(self):
         # http://docs.python.org/py3k/reference/datamodel.html#object.__repr__
         # http://docs.python.org/reference/datamodel.html#object.__repr__
@@ -540,7 +549,7 @@ def test_to_html_index_formatter(self):
     <tr style="text-align: right;">
       <th></th>
       <th>foo</th>
-      <th></th>
+      <th>None</th>
     </tr>
   </thead>
   <tbody>
@@ -637,6 +646,29 @@ def test_setting(value, nrows=3, ncols=2):
         test_setting(3)
         self.assertRaises(ValueError, test_setting, 'string')
 
+    def test_pprint_thing(self):
+        import nose
+        from pandas.core.common import pprint_thing as pp_t
+
+        if PY3:
+            raise nose.SkipTest()
+
+        self.assertEquals(pp_t('a') , u'a')
+        self.assertEquals(pp_t(u'a') , u'a')
+        self.assertEquals(pp_t(None) , 'None')
+        self.assertEquals(pp_t(u'\u05d0',quote_strings=True) , u"u'\u05d0'")
+        self.assertEquals(pp_t(u'\u05d0',quote_strings=False) , u'\u05d0')
+        self.assertEquals(pp_t((u'\u05d0', u'\u05d1'),quote_strings=True) ,
+                          u"(u'\u05d0', u'\u05d1')")
+        self.assertEquals(pp_t((u'\u05d0', (u'\u05d1', u'\u05d2')),quote_strings=True) ,
+               u"(u'\u05d0', (u'\u05d1', u'\u05d2'))")
+        self.assertEquals(pp_t(('foo', u'\u05d0', (u'\u05d0', u'\u05d0')),quote_strings=True)
+                          , u"(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")
+
+        # escape embedded tabs in string
+        # GH #2038
+        self.assertTrue(not "\t" in pp_t("a\tb", escape_chars=("\t",)))
+
     def test_wide_repr(self):
         with option_context('mode.sim_interactive', True):
             col = lambda l, k: [tm.rands(k) for _ in xrange(l)]
@@ -1316,9 +1348,8 @@ def test_dict_entries(self):
         df = DataFrame({'A': [{'a': 1, 'b': 2}]})
 
         val = df.to_string()
-        # to be fixed ot 'a': 1 when #3038 comes to town
-        self.assertTrue("a: 1" in val)
-        self.assertTrue("b: 2" in val)
+        self.assertTrue("'a': 1" in val)
+        self.assertTrue("'b': 2" in val)
 
     def test_to_latex(self):
         # it works!