分类任务数据增强方法总结
图像旋转
代码:
import os
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
# import gc
# test for one image now
Folder_PATH = "./ship_cut_images/Train"
save_path = './rotation'
# get image files list
image_file_list = os.listdir(Folder_PATH)
plt.figure()
for ind, image_name in enumerate(image_file_list):
# image read in
image_name_prefix = image_name[:-4]
image_filepath = Folder_PATH + '/' + image_name
image_pil = Image.open(image_filepath)
image = np.array(image_pil)
shape = np.array(list(image.shape), np.int32)
# rotation augment
h, w = shape[0], shape[1]
for rot_angle in [45, 90, 135, 180, 225, 270, 315]:
M = cv2.getRotationMatrix2D((h / 2, w / 2), rot_angle, 1)
rotation = cv2.warpAffine(image, M, (h, w))
# save rotation result
shape = list(rotation.shape)
if len(shape) == 2:
shape.append(1)
img = Image.fromarray(rotation, 'RGB' if shape[2] == 3 else 'L')
if not os.path.isdir(save_path):
os.mkdir(save_path)
img.save(save_path + '/{}_rotation_{}.jpg'.format(image_name_prefix, rot_angle))
plt.imshow(rotation)
plt.show()
# os.listdir(FILE_PATH)
tensorflow内部增强代码
数据尺寸变化
tf.image.resize_image(img_tensor, [H, W]): 将图像resize到(H, W)大小,其中H = h * 1.2, W = w * 1.2.
不使用tf.image.resize_image_with_crop_or_pad.无法使用该函数完成resize功能——不能保持原始图像的完整型(存在crop);也无法在数据增强时使用该函数完成crop功能,因为其没有随机性。
数据增强——裁剪
tf.random_crop(image_tensor, [h, w, c]): 将图像从(H, W, c)随机裁剪为(h, w, c).
数据增强——翻转
tf.image.random_flip_with_left_right(image_tensor)
tf.image.random_flip_with_up_down(image_tensor)
数据增强——色调、饱和度、亮度(HSB)与对比度
tf.image.random_hue(image_tensor, max_delta=0.1): 色调随机改变,参数决定变化大小范围,注意灰度图不能使用
tf.image.random_saturation(image_tensor, lower=0.0, upper=2.0) :饱和度随机改变,参数设置选值范围,注意灰度图不能使用
tf.image.random_brightness(image_tensor, max_delta=0.2): 亮度随机改变,参数设置变化大小范围,可用于灰度图
tf.image.random_contrast(image_tensor, lower=0.3, upper=1.0):对比度随机改变,参数设置选值范围,可用于灰度图
3 channel data process: 代码如下:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
_HEIGHT = 256
_WIDTH = 256
_CHANNELS = 3
# preprocessing parameters
random_extend_ratio = 1.2
random_hue_max_delta = 0.1
random_contrast_lower = 0.3
random_contrast_upper = 1.0
random_saturation_lower = 0.0
random_saturation_upper = 2.0
random_brightness_max_delta = 0.5
def preprocess_image(image, is_training):
if is_training:
image = tf.image.resize_images(images=image,
size=[tf.cast(_HEIGHT * random_extend_ratio, tf.int32),
tf.cast(_WIDTH * random_extend_ratio, tf.int32)])
# image = tf.image.resize_image_with_crop_or_pad(
# image, _HEIGHT, _WIDTH
# )
image = tf.random_crop(image, [_HEIGHT, _WIDTH, _CHANNELS])
# flip
image = tf.image.random_flip_left_right(image)
image = tf.image.random_flip_up_down(image)
# adjust hue, contrast, saturation, bright
image = tf.image.random_hue(image, max_delta=random_hue_max_delta)
image = tf.image.random_contrast(image, lower=random_contrast_lower, upper=random_contrast_upper)
image = tf.image.random_saturation(image, lower=random_saturation_lower, upper=random_saturation_upper)
image = tf.image.random_brightness(image, max_delta=random_brightness_max_delta)
#
else:
# according to the test, resize_images & resize_area have the same resize function which didn't appear the
# problem mentioned by the blog(), while resize_bicubic() has a side effect when align_corners is setted as True.
image = tf.image.resize_images(images=image,
size=[_HEIGHT, _WIDTH])
# image = tf.image.per_image_standardization(image)
return image
def read_single_example_and_decode(filename_queue):
# reader = tf.TFRecordReader(options=tfrecord_options)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized=serialized_example,
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'channel': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
)
img_height = tf.cast(features['height'], tf.int32)
img_width = tf.cast(features['width'], tf.int32)
img_channel = tf.cast(features['channel'], tf.int32)
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, shape=[img_height, img_width, img_channel])
label = tf.cast(features['label'], tf.int32)
return img, label
def next_batch(dataset_name, batch_size, is_training):
if dataset_name == "one_image":
pattern = "./data/one_image.tfrecords"
else:
raise ValueError("one_image only")
print('tfrecord path is -->', os.path.abspath(pattern))
# filename_tensorlist = tf.train.match_filenames_once(pattern)
filename_queue = tf.train.string_input_producer([pattern])
image, label = read_single_example_and_decode(filename_queue)
image = preprocess_image(image, is_training)
img_batch, label_batch = tf.train.batch([image, label],
batch_size=batch_size,
capacity=1,
num_threads=1,
dynamic_pad=True)
return img_batch, label_batch
# return image, label
# obtain the mask for seg
def input_fn(filename, is_training, batch_size, shuffle_buffer, num_epochs=1):
##
if os.path.exists(filename):
pass
else:
raise ValueError("not such file exists")
def _parser(example_proto):
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'channel': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
parsed_features = tf.parse_single_example(example_proto, features=features)
height = tf.cast(parsed_features['height'], tf.int32)
width = tf.cast(parsed_features['width'], tf.int32)
c = tf.cast(parsed_features['channel'], tf.int32)
image = tf.decode_raw(parsed_features['img_raw'], tf.uint8)
image = tf.reshape(image, [height, width, c])
image = preprocess_image(image, is_training)
label = tf.cast(parsed_features['label'], tf.int32)
return image, label
dataset = tf.data.TFRecordDataset(filename)
dataset = dataset.prefetch(buffer_size=batch_size)
if is_training:
dataset = dataset.shuffle(buffer_size=shuffle_buffer)
dataset = dataset.repeat(num_epochs)
dataset = dataset.map(_parser)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next_image, next_label = iterator.get_next()
return next_image, next_label
if __name__ == '__main__':
image, label = next_batch(dataset_name="one_image", batch_size=1, is_training=True)
# image, label = input_fn("./data/one_image.tfrecords",
# is_training=False, batch_size=1, shuffle_buffer=1, num_epochs=1)
tf.summary.image("image", image)
summary_op = tf.summary.merge_all()
init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
global_step = tf.train.get_or_create_global_step()
with tf.Session() as sess:
writer = tf.summary.FileWriter("./debug_summary", sess.graph)
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
img_ = sess.run(image)
img_show = np.array(np.squeeze(img_,), dtype=np.uint8)
plt.figure()
plt.imshow(img_show)
plt.show()
summary = sess.run(summary_op)
writer.add_summary(summary, 0)
coord.request_stop()
coord.join(threads)
1 channel data process:代码如下:
import tensorflow as tf
import os
import numpy as np
import matplotlib.pyplot as plt
_HEIGHT = 256
_WIDTH = 256
_CHANNELS = 1
# preprocessing parameters
random_extend_ratio = 1.2
random_contrast_lower = 0.3
random_contrast_upper = 1.0
random_brightness_max_delta = 0.5
def preprocess_image(image, is_training):
if is_training:
image = tf.image.resize_images(images=image,
size=[tf.cast(_HEIGHT * random_extend_ratio, tf.int32),
tf.cast(_WIDTH * random_extend_ratio, tf.int32)])
image = tf.random_crop(image, [_HEIGHT, _WIDTH, _CHANNELS])
# flip
image = tf.image.random_flip_left_right(image)
image = tf.image.random_flip_up_down(image)
# adjust hue, contrast, saturation, bright(hue and saturation are not supported for one channel gray image)
image = tf.image.random_contrast(image, lower=random_contrast_lower, upper=random_contrast_upper)
image = tf.image.random_brightness(image, max_delta=random_brightness_max_delta)
else:
# according to the test, resize_images & resize_area have the same resize function which didn't appear the
# problem mentioned by the blog(), while resize_bicubic() has a side effect when align_corners is setted as True.
image = tf.image.resize_images(images=image,
size=[_HEIGHT, _WIDTH])
image = tf.image.per_image_standardization(image)
return image
def read_single_example_and_decode(filename_queue):
# reader = tf.TFRecordReader(options=tfrecord_options)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized=serialized_example,
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'channel': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
)
img_height = tf.cast(features['height'], tf.int32)
img_width = tf.cast(features['width'], tf.int32)
img_channel = tf.cast(features['channel'], tf.int32)
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, shape=[img_height, img_width, img_channel])
label = tf.cast(features['label'], tf.int32)
return img, label
def next_batch(dataset_name, batch_size, is_training):
if dataset_name == "one_image":
pattern = "./data/one_sar.tfrecords"
else:
raise ValueError("one_image only")
print('tfrecord path is -->', os.path.abspath(pattern))
# filename_tensorlist = tf.train.match_filenames_once(pattern)
filename_queue = tf.train.string_input_producer([pattern])
image, label = read_single_example_and_decode(filename_queue)
image = preprocess_image(image, is_training)
img_batch, label_batch = tf.train.batch([image, label],
batch_size=batch_size,
capacity=1,
num_threads=1,
dynamic_pad=True)
return img_batch, label_batch
# return image, label
# obtain the mask for seg
def input_fn(filename, is_training, batch_size, shuffle_buffer, num_epochs=1):
##
if os.path.exists(filename):
pass
else:
raise ValueError("not such file exists")
def _parser(example_proto):
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'channel': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
parsed_features = tf.parse_single_example(example_proto, features=features)
height = tf.cast(parsed_features['height'], tf.int32)
width = tf.cast(parsed_features['width'], tf.int32)
c = tf.cast(parsed_features['channel'], tf.int32)
image = tf.decode_raw(parsed_features['img_raw'], tf.uint8)
image = tf.reshape(image, [height, width, c])
image = preprocess_image(image, is_training)
label = tf.cast(parsed_features['label'], tf.int32)
return image, label
dataset = tf.data.TFRecordDataset(filename)
dataset = dataset.prefetch(buffer_size=batch_size)
if is_training:
dataset = dataset.shuffle(buffer_size=shuffle_buffer)
dataset = dataset.repeat(num_epochs)
dataset = dataset.map(_parser)
dataset = dataset.batch(batch_size)
iterator = dataset.make_one_shot_iterator()
next_image, next_label = iterator.get_next()
return next_image, next_label
if __name__ == '__main__':
image, label = next_batch(dataset_name="one_image", batch_size=1, is_training=True)
# image, label = input_fn("./data/one_image.tfrecords",
# is_training=False, batch_size=1, shuffle_buffer=1, num_epochs=1)
tf.summary.image("image", image)
summary_op = tf.summary.merge_all()
init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
global_step = tf.train.get_or_create_global_step()
with tf.Session() as sess:
writer = tf.summary.FileWriter("./sar_summary", sess.graph)
sess.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
img_ = sess.run(image)
img_show = np.array(np.squeeze(img_,), dtype=np.uint8)
plt.figure()
plt.imshow(img_show)
plt.show()
summary = sess.run(summary_op)
writer.add_summary(summary, 0)
coord.request_stop()
coord.join(threads)
HSB与RGB的对应关系

|