Skip to content

Period.strftime should return unicode strings always #3410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
3 commits merged into from Apr 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import pandas.core.common as com
from pandas.core.common import isnull
from pandas.util import py3compat

from pandas.lib import Timestamp
import pandas.lib as lib
Expand Down Expand Up @@ -264,12 +265,49 @@ def __repr__(self):
base, mult = _gfc(self.freq)
formatted = tslib.period_format(self.ordinal, base)
freqstr = _freq_mod._reverse_period_code_map[base]

if not py3compat.PY3:
encoding = com.get_option("display.encoding")
formatted = formatted.encode(encoding)

return "Period('%s', '%s')" % (formatted, freqstr)

def __str__(self):
"""
Return a string representation for a particular DataFrame

Invoked by str(df) in both py2/py3.
Yields Bytestring in Py2, Unicode String in py3.
"""

if py3compat.PY3:
return self.__unicode__()
return self.__bytes__()

def __bytes__(self):
"""
Return a string representation for a particular DataFrame

Invoked by bytes(df) in py3 only.
Yields a bytestring in both py2/py3.
"""
encoding = com.get_option("display.encoding")
return self.__unicode__().encode(encoding, 'replace')

def __unicode__(self):
"""
Return a string representation for a particular DataFrame

Invoked by unicode(df) in py2 only. Yields a Unicode String in both
py2/py3.
"""
base, mult = _gfc(self.freq)
formatted = tslib.period_format(self.ordinal, base)
return ("%s" % formatted)
value = (u"%s" % formatted)
assert type(value) == unicode

return value


def strftime(self, fmt):
"""
Expand Down
5 changes: 3 additions & 2 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,9 @@ def test_repr(self):

def test_strftime(self):
p = Period('2000-1-1 12:34:12', freq='S')
self.assert_(p.strftime('%Y-%m-%d %H:%M:%S') ==
'2000-01-01 12:34:12')
res = p.strftime('%Y-%m-%d %H:%M:%S')
self.assert_( res == '2000-01-01 12:34:12')
self.assert_( isinstance(res,unicode)) # GH3363

def test_sub_delta(self):
left, right = Period('2011', freq='A'), Period('2007', freq='A')
Expand Down
5 changes: 5 additions & 0 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2283,6 +2283,7 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"),
cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^"]

cdef _period_strftime(int64_t value, int freq, object fmt):
import sys
cdef:
Py_ssize_t i
date_info dinfo
Expand Down Expand Up @@ -2325,6 +2326,10 @@ cdef _period_strftime(int64_t value, int freq, object fmt):
if not PyString_Check(result):
result = str(result)

# GH3363
if sys.version_info[0] == 2:
result = result.decode('utf-8','strict')

return result

# period accessors
Expand Down
114 changes: 41 additions & 73 deletions scripts/use_build_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,21 @@
Tested on releases back to 0.7.0.

"""
import argparse
argparser = argparse.ArgumentParser(description="""
'Program description.
""".strip())

try:
import argparse
argparser = argparse.ArgumentParser(description="""
'Program description.
""".strip())

argparser.add_argument('-f', '--force-overwrite',
argparser.add_argument('-f', '--force-overwrite',
default=False,
help='Setting this will overwrite any existing cache results for the current commit',
action='store_true')
argparser.add_argument('-d', '--debug',
argparser.add_argument('-d', '--debug',
default=False,
help='Report cache hits/misses',
action='store_true')

args = argparser.parse_args()
except:
class Foo(object):
debug=False
force_overwrite=False

args = Foo() # for 2.6, no argparse
args = argparser.parse_args()

#print args.accumulate(args.integers)

Expand Down Expand Up @@ -78,28 +70,18 @@ class Foo(object):
import shutil
import multiprocessing
pyver = "%d.%d" % (sys.version_info[:2])
fileq = ["pandas"]
files = ["pandas"]
to_process = dict()
orig_hashes= dict((f.split("-")[0],f) for f in os.listdir(BUILD_CACHE_DIR)
if "-" in f and f.endswith(pyver))
post_hashes= dict((f.split("-")[1],f) for f in os.listdir(BUILD_CACHE_DIR)
if "-" in f and f.endswith(pyver))

# retrieve the hashes existing in the cache
orig_hashes=dict()
post_hashes=dict()
for path,dirs,files in os.walk(os.path.join(BUILD_CACHE_DIR,'pandas')):
for f in files:
s=f.split(".py-")[-1]
try:
prev_h,post_h,ver = s.split('-')
if ver == pyver:
orig_hashes[prev_h] = os.path.join(path,f)
post_hashes[post_h] = os.path.join(path,f)
except:
pass

while fileq:
f = fileq.pop()
while files:
f = files.pop()

if os.path.isdir(f):
fileq.extend([os.path.join(f,x) for x in os.listdir(f)])
files.extend([os.path.join(f,x) for x in os.listdir(f)])
else:
if not f.endswith(".py"):
continue
Expand All @@ -108,54 +90,40 @@ class Foo(object):
h = sha1(open(f,"rb").read()).hexdigest()
except IOError:
to_process[h] = f
else:
if h in orig_hashes and not BC_FORCE_OVERWRITE:
src = orig_hashes[h]
if BC_DEBUG:
print("2to3 cache hit %s,%s" % (f,h))
shutil.copyfile(src,f)
elif h not in post_hashes:
# we're not in a dev dir with already processed files
if BC_DEBUG:
print("2to3 cache miss (will process) %s,%s" % (f,h))
to_process[h] = f
if h in orig_hashes and not BC_FORCE_OVERWRITE:
src = os.path.join(BUILD_CACHE_DIR,orig_hashes[h])
if BC_DEBUG:
print("2to3 cache hit %s,%s" % (f,h))
shutil.copyfile(src,f)
elif h not in post_hashes:

# we're not in a dev dir with already processed files
if BC_DEBUG:
print("2to3 cache miss %s,%s" % (f,h))
print("2to3 will process " + f)
to_process[h] = f

avail_fixes = set(refactor.get_fixers_from_package("lib2to3.fixes"))
avail_fixes.discard('lib2to3.fixes.fix_next')
t=refactor.RefactoringTool(avail_fixes)
if to_process:
print("Starting 2to3 refactoring...")
for orig_h,f in to_process.items():
print("Starting 2to3 refactoring...")
for f in to_process.values():
if BC_DEBUG:
print("2to3 on %s" % f)
try:
t.refactor([f],True)
post_h = sha1(open(f, "rb").read()).hexdigest()
cached_fname = f + "-" + post_h + "-" + pyver
if BC_DEBUG:
print("2to3 on %s" % f)
try:
t.refactor([f],True)
post_h = sha1(open(f, "rb").read()).hexdigest()
cached_fname = f + '-' + orig_h + '-' + post_h + '-' + pyver
path = os.path.join(BUILD_CACHE_DIR, cached_fname)
pathdir =os.path.dirname(path)
if BC_DEBUG:
print("cache put %s in %s" % (f, path))
try:
os.makedirs(pathdir)
except OSError as exc:
import errno
if exc.errno == errno.EEXIST and os.path.isdir(pathdir):
pass
else:
raise

shutil.copyfile(f, path)
print("cache put %s,%s in %s" % (f, h, cached_fname))
shutil.copyfile(f, os.path.join(BUILD_CACHE_DIR, cached_fname))

except Exception as e:
print("While processing %s 2to3 raised: %s" % (f,str(e)))

pass
print("2to3 done refactoring.")
except:
pass
print("2to3 done refactoring.")

except Exception as e:
if not isinstance(e,ZeroDivisionError):
print( "Exception: " + str(e))
print( "Exception: " + str(e))
BUILD_CACHE_DIR = None

class CompilationCacheMixin(object):
Expand Down