分类任务数据增强方法总结

分类任务数据增强方法总结

图像旋转

代码:

import os
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt

# import gc

# test for one image now
Folder_PATH = "./ship_cut_images/Train"
save_path = './rotation'

# get image files list
image_file_list = os.listdir(Folder_PATH)
plt.figure()
for ind, image_name in enumerate(image_file_list):
    # image read in
    image_name_prefix = image_name[:-4]
    image_filepath = Folder_PATH + '/' + image_name
    image_pil = Image.open(image_filepath)
    image = np.array(image_pil)
    shape = np.array(list(image.shape), np.int32)

    # rotation augment
    h, w = shape[0], shape[1]
    for rot_angle in [45, 90, 135, 180, 225, 270, 315]:
        M = cv2.getRotationMatrix2D((h / 2, w / 2), rot_angle, 1)
        rotation = cv2.warpAffine(image, M, (h, w))
        # save rotation result
        shape = list(rotation.shape)
        if len(shape) == 2:
            shape.append(1)
        img = Image.fromarray(rotation, 'RGB' if shape[2] == 3 else 'L')

        if not os.path.isdir(save_path):
            os.mkdir(save_path)
        img.save(save_path + '/{}_rotation_{}.jpg'.format(image_name_prefix, rot_angle))

        plt.imshow(rotation)
plt.show()

# os.listdir(FILE_PATH)

tensorflow内部增强代码

数据尺寸变化
tf.image.resize_image(img_tensor, [H, W]): 将图像resize到(H, W)大小，其中H = h * 1.2, W = w * 1.2.

不使用tf.image.resize_image_with_crop_or_pad.无法使用该函数完成resize功能——不能保持原始图像的完整型（存在crop）;也无法在数据增强时使用该函数完成crop功能，因为其没有随机性。
数据增强——裁剪
tf.random_crop(image_tensor, [h, w, c]): 将图像从(H, W, c)随机裁剪为(h, w, c).
数据增强——翻转
tf.image.random_flip_with_left_right(image_tensor)
tf.image.random_flip_with_up_down(image_tensor)
数据增强——色调、饱和度、亮度（HSB）与对比度
tf.image.random_hue(image_tensor, max_delta=0.1): 色调随机改变，参数决定变化大小范围，注意灰度图不能使用
tf.image.random_saturation(image_tensor, lower=0.0, upper=2.0) :饱和度随机改变，参数设置选值范围，注意灰度图不能使用
tf.image.random_brightness(image_tensor, max_delta=0.2): 亮度随机改变，参数设置变化大小范围，可用于灰度图
tf.image.random_contrast(image_tensor, lower=0.3, upper=1.0):对比度随机改变，参数设置选值范围，可用于灰度图

3 channel data process: 代码如下:

import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt

_HEIGHT = 256
_WIDTH = 256
_CHANNELS = 3

# preprocessing parameters
random_extend_ratio = 1.2
random_hue_max_delta = 0.1
random_contrast_lower = 0.3
random_contrast_upper = 1.0
random_saturation_lower = 0.0
random_saturation_upper = 2.0
random_brightness_max_delta = 0.5


def preprocess_image(image, is_training):
    if is_training:
        image = tf.image.resize_images(images=image,
                                    size=[tf.cast(_HEIGHT * random_extend_ratio, tf.int32),
                                            tf.cast(_WIDTH * random_extend_ratio, tf.int32)])
        # image = tf.image.resize_image_with_crop_or_pad(
        #     image, _HEIGHT, _WIDTH
        # )
        image = tf.random_crop(image, [_HEIGHT, _WIDTH, _CHANNELS])

        # flip
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)

        # adjust hue, contrast, saturation, bright
        image = tf.image.random_hue(image, max_delta=random_hue_max_delta)
        image = tf.image.random_contrast(image, lower=random_contrast_lower, upper=random_contrast_upper)
        image = tf.image.random_saturation(image, lower=random_saturation_lower, upper=random_saturation_upper)
        image = tf.image.random_brightness(image, max_delta=random_brightness_max_delta)
        #

    else:
        # according to the test, resize_images & resize_area have the same resize function which didn't appear the
        # problem mentioned by the blog(), while resize_bicubic() has a side effect when align_corners is setted as True.
        image = tf.image.resize_images(images=image,
                                    size=[_HEIGHT, _WIDTH])

    # image = tf.image.per_image_standardization(image)
    return image


def read_single_example_and_decode(filename_queue):

    # reader = tf.TFRecordReader(options=tfrecord_options)
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized=serialized_example,
        features = {
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'channel': tf.FixedLenFeature([], tf.int64),
            'img_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
    )
    img_height = tf.cast(features['height'], tf.int32)
    img_width = tf.cast(features['width'], tf.int32)
    img_channel = tf.cast(features['channel'], tf.int32)

    img = tf.decode_raw(features['img_raw'], tf.uint8)
    img = tf.reshape(img, shape=[img_height, img_width, img_channel])

    label = tf.cast(features['label'], tf.int32)

    return img, label


def next_batch(dataset_name, batch_size, is_training):
    if dataset_name == "one_image":
        pattern = "./data/one_image.tfrecords"
    else:
        raise ValueError("one_image only")
    print('tfrecord path is -->', os.path.abspath(pattern))

    # filename_tensorlist = tf.train.match_filenames_once(pattern)
    filename_queue = tf.train.string_input_producer([pattern])

    image, label = read_single_example_and_decode(filename_queue)

    image = preprocess_image(image, is_training)

    img_batch, label_batch = tf.train.batch([image, label],
                                            batch_size=batch_size,
                                            capacity=1,
                                            num_threads=1,
                                            dynamic_pad=True)
    return img_batch, label_batch
    # return image, label


# obtain the mask for seg
def input_fn(filename, is_training, batch_size, shuffle_buffer, num_epochs=1):
    ##
    if os.path.exists(filename):
        pass
    else:
        raise ValueError("not such file exists")

    def _parser(example_proto):
        features = {
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'channel': tf.FixedLenFeature([], tf.int64),
            'img_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
        parsed_features = tf.parse_single_example(example_proto, features=features)

        height = tf.cast(parsed_features['height'], tf.int32)
        width = tf.cast(parsed_features['width'], tf.int32)
        c = tf.cast(parsed_features['channel'], tf.int32)

        image = tf.decode_raw(parsed_features['img_raw'], tf.uint8)
        image = tf.reshape(image, [height, width, c])
        image = preprocess_image(image, is_training)

        label = tf.cast(parsed_features['label'], tf.int32)

        return image, label

    dataset = tf.data.TFRecordDataset(filename)
    dataset = dataset.prefetch(buffer_size=batch_size)
    if is_training:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.map(_parser)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    next_image, next_label = iterator.get_next()

    return next_image, next_label


if __name__ == '__main__':
    image, label = next_batch(dataset_name="one_image", batch_size=1, is_training=True)
    # image, label = input_fn("./data/one_image.tfrecords",
    #                         is_training=False, batch_size=1, shuffle_buffer=1, num_epochs=1)

    tf.summary.image("image", image)
    summary_op = tf.summary.merge_all()

    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    global_step = tf.train.get_or_create_global_step()
    with tf.Session() as sess:
        writer = tf.summary.FileWriter("./debug_summary", sess.graph)
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        img_ = sess.run(image)
        img_show = np.array(np.squeeze(img_,), dtype=np.uint8)

        plt.figure()
        plt.imshow(img_show)
        plt.show()

        summary = sess.run(summary_op)
        writer.add_summary(summary, 0)
        coord.request_stop()
        coord.join(threads)

1 channel data process:代码如下:

import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt

_HEIGHT = 256
_WIDTH = 256
_CHANNELS = 1

# preprocessing parameters
random_extend_ratio = 1.2
random_contrast_lower = 0.3
random_contrast_upper = 1.0
random_brightness_max_delta = 0.5


def preprocess_image(image, is_training):
    if is_training:
        image = tf.image.resize_images(images=image,
                                    size=[tf.cast(_HEIGHT * random_extend_ratio, tf.int32),
                                            tf.cast(_WIDTH * random_extend_ratio, tf.int32)])

        image = tf.random_crop(image, [_HEIGHT, _WIDTH, _CHANNELS])

        # flip
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)

        # adjust hue, contrast, saturation, bright(hue and saturation are not supported for one channel gray image)
        image = tf.image.random_contrast(image, lower=random_contrast_lower, upper=random_contrast_upper)
        image = tf.image.random_brightness(image, max_delta=random_brightness_max_delta)

    else:
        # according to the test, resize_images & resize_area have the same resize function which didn't appear the
        # problem mentioned by the blog(), while resize_bicubic() has a side effect when align_corners is setted as True.
        image = tf.image.resize_images(images=image,
                                    size=[_HEIGHT, _WIDTH])

    image = tf.image.per_image_standardization(image)
    return image


def read_single_example_and_decode(filename_queue):

    # reader = tf.TFRecordReader(options=tfrecord_options)
    reader = tf.TFRecordReader()

    _, serialized_example = reader.read(filename_queue)

    features = tf.parse_single_example(
        serialized=serialized_example,
        features = {
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'channel': tf.FixedLenFeature([], tf.int64),
            'img_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
    )
    img_height = tf.cast(features['height'], tf.int32)
    img_width = tf.cast(features['width'], tf.int32)
    img_channel = tf.cast(features['channel'], tf.int32)

    img = tf.decode_raw(features['img_raw'], tf.uint8)

    img = tf.reshape(img, shape=[img_height, img_width, img_channel])

    label = tf.cast(features['label'], tf.int32)

    return img, label


def next_batch(dataset_name, batch_size, is_training):
    if dataset_name == "one_image":
        pattern = "./data/one_sar.tfrecords"
    else:
        raise ValueError("one_image only")
    print('tfrecord path is -->', os.path.abspath(pattern))

    # filename_tensorlist = tf.train.match_filenames_once(pattern)
    filename_queue = tf.train.string_input_producer([pattern])

    image, label = read_single_example_and_decode(filename_queue)

    image = preprocess_image(image, is_training)

    img_batch, label_batch = tf.train.batch([image, label],
                                            batch_size=batch_size,
                                            capacity=1,
                                            num_threads=1,
                                            dynamic_pad=True)
    return img_batch, label_batch
    # return image, label


# obtain the mask for seg
def input_fn(filename, is_training, batch_size, shuffle_buffer, num_epochs=1):
    ##
    if os.path.exists(filename):
        pass
    else:
        raise ValueError("not such file exists")

    def _parser(example_proto):
        features = {
            'height': tf.FixedLenFeature([], tf.int64),
            'width': tf.FixedLenFeature([], tf.int64),
            'channel': tf.FixedLenFeature([], tf.int64),
            'img_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
        parsed_features = tf.parse_single_example(example_proto, features=features)

        height = tf.cast(parsed_features['height'], tf.int32)
        width = tf.cast(parsed_features['width'], tf.int32)
        c = tf.cast(parsed_features['channel'], tf.int32)

        image = tf.decode_raw(parsed_features['img_raw'], tf.uint8)
        image = tf.reshape(image, [height, width, c])
        image = preprocess_image(image, is_training)

        label = tf.cast(parsed_features['label'], tf.int32)

        return image, label

    dataset = tf.data.TFRecordDataset(filename)
    dataset = dataset.prefetch(buffer_size=batch_size)
    if is_training:
        dataset = dataset.shuffle(buffer_size=shuffle_buffer)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.map(_parser)
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()
    next_image, next_label = iterator.get_next()

    return next_image, next_label


if __name__ == '__main__':
    image, label = next_batch(dataset_name="one_image", batch_size=1, is_training=True)
    # image, label = input_fn("./data/one_image.tfrecords",
    #                         is_training=False, batch_size=1, shuffle_buffer=1, num_epochs=1)

    tf.summary.image("image", image)
    summary_op = tf.summary.merge_all()

    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    global_step = tf.train.get_or_create_global_step()
    with tf.Session() as sess:
        writer = tf.summary.FileWriter("./sar_summary", sess.graph)
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        img_ = sess.run(image)
        img_show = np.array(np.squeeze(img_,), dtype=np.uint8)

        plt.figure()
        plt.imshow(img_show)
        plt.show()

        summary = sess.run(summary_op)
        writer.add_summary(summary, 0)
        coord.request_stop()
        coord.join(threads)

HSB与RGB的对应关系

这里写图片描述

分类任务数据增强方法总结

分类任务数据增强方法总结

图像旋转

tensorflow内部增强代码

HSB与RGB的对应关系

浏览过的版块