Skip to content

np.vectorize can cause failures in TextParser if converter returns ints and floats #753

Closed
@wesm

Description

@wesm

import StringIO
import numpy as np
import pandas
csv = """\
id,score,days
1,2,12
2,2-5,
3,,14+
4,6-12,2
"""

def convert_days(x):
   x = x.strip()
   if not x: return np.nan

   is_plus = x.endswith('+')
   if is_plus:
       x = int(x[:-1]) + 1
   else:
       x = int(x)
   return x



def convert_score(x):
   x = x.strip()
   if not x: return np.nan
   if x.find('-')>0:
       valmin, valmax = map(int, x.split('-'))
       val = 0.5*(valmin + valmax)
   else:
       val = float(x)

   return val

fh = StringIO.StringIO(csv)

p = pandas.read_csv(fh, converters={'score':convert_score,
'days':convert_days}, na_values=[-1,'',None])

print p

cc @jdh2358

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions