PythonAPI/cocoeval.py add keypoint evaluation tools!

tylin · tylin · commit 1ee7aa3d66f3 · 2016-09-12T17:51:05.000-04:00
diff --git a/PythonAPI/pycocotools/coco.py b/PythonAPI/pycocotools/coco.py
@@ -72,6 +72,7 @@ def __init__(self, annotation_file=None):
             print 'loading annotations into memory...'
             tic = time.time()
             dataset = json.load(open(annotation_file, 'r'))
+            assert type(dataset)==dict, "annotation file format %s not supported"%(type(dataset))
             print 'Done (t=%0.2fs)'%(time.time()- tic)
             self.dataset = dataset
             self.createIndex()
@@ -332,8 +333,10 @@ def loadRes(self, resFile):
                 s = ann['keypoints']
                 x = s[0::3]
                 y = s[1::3]
-                ann['area'] = float((np.max(x)-np.min(x))*(np.max(y)-np.min(y)))
+                x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+                ann['area'] = (x1-x0)*(y1-y0)
                 ann['id'] = id + 1
+                ann['bbox'] = [x0,y0,x1-x0,y1-y0]
         print 'DONE (t=%0.2fs)'%(time.time()- tic)
 
         res.dataset['annotations'] = anns
diff --git a/PythonAPI/pycocotools/cocoeval.py b/PythonAPI/pycocotools/cocoeval.py
@@ -26,8 +26,9 @@ class COCOeval:
     #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
     #  areaRng    - [...] A=4 object area ranges for evaluation
     #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
-    #  useSegm    - [1] if true evaluate against ground-truth segments
-    #  useCats    - [1] if true use category labels for evaluation    # Note: if useSegm=0 the evaluation is run on bounding boxes.
+    #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
+    #  iouType replaced the now DEPRECATED useSegm parameter.
+    #  useCats    - [1] if true use category labels for evaluation
     # Note: if useCats=0 category labels are ignored as in proposal scoring.
     # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
     #
@@ -56,21 +57,23 @@ class COCOeval:
     # Data, paper, and tutorials available at:  http://mscoco.org/
     # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
     # Licensed under the Simplified BSD License [see coco/license.txt]
-    def __init__(self, cocoGt=None, cocoDt=None):
+    def __init__(self, cocoGt=None, cocoDt=None, iouType="segm"):
         '''
         Initialize CocoEval using coco APIs for gt and dt
         :param cocoGt: coco object with ground truth annotations
         :param cocoDt: coco object with detection results
         :return: None
         '''
+        if not iouType:
+            print("iouType not specified. use default iouType segm")
         self.cocoGt   = cocoGt              # ground truth COCO API
         self.cocoDt   = cocoDt              # detections COCO API
         self.params   = {}                  # evaluation parameters
         self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
         self.eval     = {}                  # accumulated evaluation results
         self._gts = defaultdict(list)       # gt for evaluation
         self._dts = defaultdict(list)       # dt for evaluation
-        self.params = Params()              # parameters
+        self.params = Params(iouType=iouType) # parameters
         self._paramsEval = {}               # parameters for evaluation
         self.stats = []                     # result summarization
         self.ious = {}                      # ious between all gts and dts
@@ -84,7 +87,6 @@ def _prepare(self):
         Prepare ._gts and ._dts for evaluation based on params
         :return: None
         '''
-        #
         def _toMask(objs, coco):
             # modify segmentation by reference
             for obj in objs:
@@ -114,9 +116,16 @@ def _toMask(objs, coco):
             gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
             dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
 
-        if p.useSegm:
+        # convert ground truth to mask if iouType == "segm"
+        if p.iouType == "segm":
             _toMask(gts, self.cocoGt)
             _toMask(dts, self.cocoDt)
+        # set ignore flag
+        for gt in gts:
+            gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
+            gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
+            if p.iouType == "keypoints":
+                gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
         self._gts = defaultdict(list)       # gt for evaluation
         self._dts = defaultdict(list)       # dt for evaluation
         for gt in gts:
@@ -134,6 +143,10 @@ def evaluate(self):
         tic = time.time()
         print 'Running per image evaluation...      '
         p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if not p.useSegm is None:
+            p.iouType = "segm" if p.useSegm == 1 else "bbox"
+            print("useSegm (deprecated) is not None. Running %s evaluation"%(p.iouType))
         p.imgIds = list(np.unique(p.imgIds))
         if p.useCats:
             p.catIds = list(np.unique(p.catIds))
@@ -144,7 +157,10 @@ def evaluate(self):
         # loop through images, area range, max detection number
         catIds = p.catIds if p.useCats else [-1]
 
-        computeIoU = self.computeIoU
+        if p.iouType == "segm" or p.iouType == "bbox":
+            computeIoU = self.computeIoU
+        elif p.iouType == "keypoints":
+            computeIoU = self.computeOks
         self.ious = {(imgId, catId): computeIoU(imgId, catId) \
                         for imgId in p.imgIds
                         for catId in catIds}
@@ -170,28 +186,73 @@ def computeIoU(self, imgId, catId):
             dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
         if len(gt) == 0 and len(dt) ==0:
             return []
-        dt = sorted(dt, key=lambda x: -x['score'])
+        inds = np.argsort(map(lambda x:-x["score"], dt),kind='mergesort')
+        dt = [dt[i] for i in inds]
         if len(dt) > p.maxDets[-1]:
             dt=dt[0:p.maxDets[-1]]
 
-        if p.useSegm:
+        if p.iouType == "segm":
             g = [g['segmentation'] for g in gt]
             d = [d['segmentation'] for d in dt]
-        else:
+        elif p.iouType == "bbox":
             g = [g['bbox'] for g in gt]
             d = [d['bbox'] for d in dt]
+        else:
+            raise Exception("unknown iouType for iou computation")
 
         # compute iou between each dt and gt region
         iscrowd = [int(o['iscrowd']) for o in gt]
         ious = mask.iou(d,g,iscrowd)
         return ious
 
+    def computeOks(self, imgId, catId):
+        p = self.params
+        # dimention here should be Nxm
+        gts = self._gts[imgId, catId]
+        dts = self._dts[imgId, catId]
+        inds = np.argsort(map(lambda x: -x["score"], dts), kind='mergesort')
+        dts = [dts[i] for i in inds]
+        if len(dts) > p.maxDets[-1]:
+            dts = dts[0:p.maxDets[-1]]
+        # if len(gts) == 0 and len(dts) == 0:
+        if len(gts) == 0 or len(dts) == 0:
+            return []
+        ious = np.zeros((len(dts), len(gts)))
+        sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
+        vars = (sigmas * 2)**2
+        k = len(sigmas)
+        # compute oks between each detection and ground truth object
+        for j, gt in enumerate(gts):
+            # create bounds for ignore regions(double the gt bbox)
+            g = np.array(gt["keypoints"])
+            xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
+            k1 = np.count_nonzero(vg > 0)
+            bb = gt["bbox"]
+            x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
+            y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
+            for i, dt in enumerate(dts):
+                d = np.array(dt["keypoints"])
+                xd = d[0::3]; yd = d[1::3]
+                if k1>0:
+                    # measure the per-keypoint distance if keypoints visible
+                    dx = xd - xg
+                    dy = yd - yg
+                else:
+                    # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
+                    z = np.zeros((k))
+                    dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
+                    dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
+                e = (dx**2 + dy**2) / vars / (gt["area"]+np.spacing(1)) / 2
+                if k1 > 0:
+                    e=e[vg > 0]
+                ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
+        return ious
+
     def evaluateImg(self, imgId, catId, aRng, maxDet):
         '''
         perform evaluation for single category and image
         :return: dict (single image results)
         '''
-        #
         p = self.params
         if p.useCats:
             gt = self._gts[imgId,catId]
@@ -203,23 +264,19 @@ def evaluateImg(self, imgId, catId, aRng, maxDet):
             return None
 
         for g in gt:
-            if 'ignore' not in g:
-                g['ignore'] = 0
-            if g['iscrowd'] == 1 or g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
+            if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
                 g['_ignore'] = 1
             else:
                 g['_ignore'] = 0
 
         # sort dt highest score first, sort gt ignore last
-        # gt = sorted(gt, key=lambda x: x['_ignore'])
-        gtind = [ind for (ind, g) in sorted(enumerate(gt), key=lambda (ind, g): g['_ignore']) ]
-
-        gt = [gt[ind] for ind in gtind]
-        dt = sorted(dt, key=lambda x: -x['score'])[0:maxDet]
+        gtind = np.argsort([g['_ignore'] for g in gt], kind="mergesort")
+        gt = map(lambda i: gt[i], gtind)
+        dtind = np.argsort([-d['score'] for d in dt], kind="mergesort")
+        dt = map(lambda i: dt[i], dtind[0:maxDet])
         iscrowd = [int(o['iscrowd']) for o in gt]
         # load computed ious
-        N_iou = len(self.ious[imgId, catId])
-        ious = self.ious[imgId, catId][0:maxDet, np.array(gtind)] if N_iou >0 else self.ious[imgId, catId]
+        ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
 
         T = len(p.iouThrs)
         G = len(gt)
@@ -244,7 +301,7 @@ def evaluateImg(self, imgId, catId, aRng, maxDet):
                         # continue to next gt unless better match made
                         if ious[dind,gind] < iou:
                             continue
-                        # match successful and best so far, store appropriately
+                        # if match successful and best so far, store appropriately
                         iou=ious[dind,gind]
                         m=gind
                     # if match made store id of match for both dt and gt
@@ -305,7 +362,6 @@ def accumulate(self, p = None):
         m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
         a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
         i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        # K0 = len(_pe.catIds)
         I0 = len(_pe.imgIds)
         A0 = len(_pe.areaRng)
         # retrieve E at each category, area range, and max number of detections
@@ -326,8 +382,8 @@ def accumulate(self, p = None):
 
                     dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
                     dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore']  for e in E])
-                    npig = len([ig for ig in gtIg if ig == 0])
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
                     if npig == 0:
                         continue
                     tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
@@ -356,7 +412,7 @@ def accumulate(self, p = None):
                             if pr[i] > pr[i-1]:
                                 pr[i-1] = pr[i]
 
-                        inds = np.searchsorted(rc, p.recThrs)
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
                         try:
                             for ri, pi in enumerate(inds):
                                 q[ri] = pr[pi]
@@ -387,43 +443,65 @@ def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
             areaStr     = areaRng
             maxDetsStr  = '%d'%(maxDets)
 
-            aind = [i for i, aRng in enumerate(['all', 'small', 'medium', 'large']) if aRng == areaRng]
-            mind = [i for i, mDet in enumerate([1, 10, 100]) if mDet == maxDets]
+            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
             if ap == 1:
                 # dimension of precision: [TxRxKxAxM]
                 s = self.eval['precision']
                 # IoU
                 if iouThr is not None:
                     t = np.where(iouThr == p.iouThrs)[0]
                     s = s[t]
-                # areaRng
                 s = s[:,:,:,aind,mind]
             else:
                 # dimension of recall: [TxKxAxM]
                 s = self.eval['recall']
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
                 s = s[:,:,aind,mind]
             if len(s[s>-1])==0:
                 mean_s = -1
             else:
                 mean_s = np.mean(s[s>-1])
             print iStr.format(titleStr, typeStr, iouStr, areaStr, maxDetsStr, '%.3f'%(float(mean_s)))
             return mean_s
-
+        def _summarizeDets():
+            stats = np.zeros((12,))
+            stats[0] = _summarize(1)
+            stats[1] = _summarize(1, iouThr=.5)
+            stats[2] = _summarize(1, iouThr=.75)
+            stats[3] = _summarize(1, areaRng='small')
+            stats[4] = _summarize(1, areaRng='medium')
+            stats[5] = _summarize(1, areaRng='large')
+            stats[6] = _summarize(0, maxDets=1)
+            stats[7] = _summarize(0, maxDets=10)
+            stats[8] = _summarize(0, maxDets=100)
+            stats[9] = _summarize(0, areaRng='small')
+            stats[10] = _summarize(0, areaRng='medium')
+            stats[11] = _summarize(0, areaRng='large')
+            return stats
+        def _summarizeKps():
+            stats = np.zeros((10,))
+            stats[0] = _summarize(1, maxDets=20)
+            stats[1] = _summarize(1, maxDets=20, iouThr=.5)
+            stats[2] = _summarize(1, maxDets=20, iouThr=.75)
+            stats[3] = _summarize(1, maxDets=20, areaRng='medium')
+            stats[4] = _summarize(1, maxDets=20, areaRng='large')
+            stats[5] = _summarize(0, maxDets=20)
+            stats[6] = _summarize(0, maxDets=20, iouThr=.5)
+            stats[7] = _summarize(0, maxDets=20, iouThr=.75)
+            stats[8] = _summarize(0, maxDets=20, areaRng='medium')
+            stats[9] = _summarize(0, maxDets=20, areaRng='large')
+            return stats
         if not self.eval:
             raise Exception('Please run accumulate() first')
-        self.stats = np.zeros((12,))
-        self.stats[0] = _summarize(1)
-        self.stats[1] = _summarize(1,iouThr=.5)
-        self.stats[2] = _summarize(1,iouThr=.75)
-        self.stats[3] = _summarize(1,areaRng='small')
-        self.stats[4] = _summarize(1,areaRng='medium')
-        self.stats[5] = _summarize(1,areaRng='large')
-        self.stats[6] = _summarize(0,maxDets=1)
-        self.stats[7] = _summarize(0,maxDets=10)
-        self.stats[8] = _summarize(0,maxDets=100)
-        self.stats[9]  = _summarize(0,areaRng='small')
-        self.stats[10] = _summarize(0,areaRng='medium')
-        self.stats[11] = _summarize(0,areaRng='large')
+        iouType = self.params.iouType
+        if iouType == "segm" or iouType == "bbox":
+            summarize = _summarizeDets
+        elif iouType == "keypoints":
+            summarize = _summarizeKps
+        self.stats = summarize()
 
     def __str__(self):
         self.summarize()
@@ -432,13 +510,35 @@ class Params:
     '''
     Params for coco evaluation api
     '''
-    def __init__(self):
+    def setDetParams(self):
         self.imgIds = []
         self.catIds = []
         # np.arange causes trouble.  the data point on arange is slightly larger than the true value
-        self.iouThrs = np.linspace(.5, 0.95, np.round((0.95-.5)/.05)+1, endpoint=True)
-        self.recThrs = np.linspace(.0, 1.00, np.round((1.00-.0)/.01)+1, endpoint=True)
-        self.maxDets = [1,10,100]
-        self.areaRng = [ [0**2,1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2] ]
-        self.useSegm = 0
-        self.useCats = 1
+        self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
+        self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
+        self.maxDets = [1, 10, 100]
+        self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+        self.areaRngLbl = ['all', 'small', 'medium', 'large']
+        self.useCats = 1
+
+    def setKpParams(self):
+        self.imgIds = []
+        self.catIds = []
+        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
+        self.iouThrs = np.linspace(.5, 0.95, np.round((0.95 - .5) / .05) + 1, endpoint=True)
+        self.recThrs = np.linspace(.0, 1.00, np.round((1.00 - .0) / .01) + 1, endpoint=True)
+        self.maxDets = [20]
+        self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+        self.areaRngLbl = ['all', 'medium', 'large']
+        self.useCats = 1
+
+    def __init__(self, iouType="segm"):
+        if iouType == "segm" or iouType == "bbox":
+            self.setDetParams()
+        elif iouType == "keypoints":
+            self.setKpParams()
+        else:
+            raise Exception("iouType not supported")
+        self.iouType = iouType
+        # useSegm is deprecated
+        self.useSegm = None