faster-rcnn增强自己的数据集annotations(四)

关于faster-rcnn数据集的制作，尤其是xml文件的制作以及为了增强自己的数据集这方面详细讲解一下。因为最近参加了比赛，目标检测，我用的faster-rcnn网络，奈何数据量太小。于是为了增强自己的数据集，并载入自己的训练网络中。我写了几个脚本文件进行数据的增强及加载。

数据集的增强，关于检测的数据集的增强，并不是很好处理，因为我们除了对图像处理外，还有得到图像中我们要定位的物体(比如persion、dog)的具体坐标，才能进行训练。那么该如何得到处理后图像的坐标呢？

首先我进行了图像的旋转(30度、90度、180度、60度等等)，旋转以后根据旋转的角度来计算旋转后图像的坐标。

输入：原图像、该图像中物体的坐标值(一般坐标值是存在txt文本中的)。注意：图像与其坐标值要一一对应。

输出：旋转后的图像、旋转后图像的坐标值。

相关代码如下(具体代码下面有链接)：

#设置旋转角度，这里设置了270度

angle = 270.0

# 旋转后图像的四点坐标

[[pt1[0]], [pt1[1]]] = np.dot(rotateMat, np.array([[pt1[0]], [pt1[1]], [1]]))

[[pt2[0]], [pt2[1]]] = np.dot(rotateMat, np.array([[pt2[0]], [pt2[1]], [1]]))

主要是根据旋转的角度来计算坐标值。还有注意的一点是：不同的旋转的角度，对于最后的两坐标点的处理不同。以270度为例，最后的到的坐标值还需要 pt1[0] = pt1[0] - height Rectpt2[0] = pt2[0] + heightRect这两步的处理。

# -*- coding:utf-8 -*-
import cv2
from math import *
import numpy as np
import time,math
import os
import re

'''旋转图像并剪裁'''
def rotate(
        img,  # 图片
        pt1, pt2,
        i
):
    #print pt1,pt2,i
    #withRect = math.sqrt((pt4[0] - pt1[0]) ** 2 + (pt4[1] - pt1[1]) ** 2)  # 矩形框的宽度
    withRect = pt2[0] - pt1[0]
    #heightRect = math.sqrt((pt1[0] - pt2[0]) ** 2 + (pt1[1] - pt2[1]) **2)
    heightRect = pt2[1]-pt1[1]
    #print withRect,heightRect
    #angle = acos((pt4[0] - pt1[0]) / withRect) * (180 / math.pi)  # 矩形框旋转角度
    angle = 270.0
    #print angle

    height = img.shape[0]  # 原始图像高度
    width = img.shape[1]   # 原始图像宽度
    #print height,width
    rotateMat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)  # 按angle角度旋转图像
    heightNew = int(width * fabs(sin(radians(angle))) + height * fabs(cos(radians(angle))))
    widthNew = int(height * fabs(sin(radians(angle))) + width * fabs(cos(radians(angle))))

    rotateMat[0, 2] += (widthNew - width) / 2
    rotateMat[1, 2] += (heightNew - height) / 2
    imgRotation = cv2.warpAffine(img, rotateMat, (widthNew, heightNew), borderValue=(255, 255, 255))
    #cv2.imshow('rotateImg2',  imgRotation)
    cv2.imwrite("/home/hqd/桌面/数据集增强集/270data/images/"+i+".jpg", imgRotation, [int(cv2.IMWRITE_JPEG_QUALITY), 100])   
    #cv2.waitKey(0)

    # 旋转后图像的四点坐标
    [[pt1[0]], [pt1[1]]] = np.dot(rotateMat, np.array([[pt1[0]], [pt1[1]], [1]]))
    [[pt2[0]], [pt2[1]]] = np.dot(rotateMat, np.array([[pt2[0]], [pt2[1]], [1]]))
    pt1[0] = pt1[0] - heightRect
    pt2[0] = pt2[0] + heightRect
    pt2=list(map(int,pt2))
    pt1=list(map(int,pt1))
    #pt2=list(map(float,pt2))
    #pt1=list(map(float,pt1))
    #print pt2,pt1
    a =  pt1+pt2
    return a 

#　读出文件中的坐标值
def ReadTxt(imageName,list_tmp):
    fileTxt="/home/hqd/桌面/数据集增强集/zuobiao/train.txt" # txt文件名
    getTxt=open(fileTxt, 'r')  # 打开txt文件
    lines = getTxt.readlines()
    length=len(lines)
    mlist= []
    for i in range(0,length):
        pt1=list(map(float,lines[i].split(' ')[:2]))
        pt2=list(map(float,lines[i].split(' ')[2:4]))
        #print pt1,pt2
        #float转int 
        j = list_tmp[i]
        p = j.split('_')[1]
        m = int(p) + 5704
        ms = "00"+str(m)
        msj = "IMG_"+ms
        print msj
        pt2=list(map(int,pt2))
        pt1=list(map(int,pt1))
        imgSrc = cv2.imread(imageName[i])
        b =rotate(imgSrc,pt1,pt2,msj)
        mlist.append(b)
    fl=open("/home/hqd/桌面/数据集增强集/270data/zuobiao.txt", "w")
    for j in mlist:
        for k in j : 
            fl.write(str(k)+' ')
        fl.write("\n")
    fl.close()

def direct():
    list_tmp=[]
    list_direct = []
    file=open("/home/hqd/桌面/数据集增强集/ImageSets/Main/train.txt", "r")
    for i in file.readlines():
        list_tmp.append(i.strip()) 
    file.close()
    for i in list_tmp:   
        a = "/home/hqd/桌面/数据集增强集/JPEGImages/"+i+".jpg"
        list_direct.append(a)
    return list_direct,list_tmp

if __name__=="__main__":
    c,list_tmp = direct()
    ReadTxt(c,list_tmp)