Source code for trojai.datagen.image_size_xforms

import logging

import cv2
import numpy as np
from numpy.random import RandomState

from .image_entity import ImageEntity, GenericImageEntity
from .transform_interface import Transform

logger = logging.getLogger(__name__)

"""
Module contains various classes that relate to size transformations of input objects
"""


[docs]class Resize(Transform): """ Resizes an Entity """ def __init__(self, new_size: tuple = (200, 200), interpolation: int = cv2.INTER_CUBIC) -> None: """ Initialize the resizer object :param new_size: a tuple of the size in pixes for x and y dimensions :param interpolation: the interpolation method to resize the input Entity """ self.new_size = new_size self.interpolation = interpolation
[docs] def do(self, img_obj: ImageEntity, random_state_obj: RandomState) -> ImageEntity: """ Perform the resizing :param img_obj: The input object to be resized according the specified configuration :param random_state_obj: ignored :return: The resized object """ img_out = cv2.resize(img_obj.get_data(), self.new_size, interpolation=self.interpolation) mask_out = cv2.resize(img_obj.get_mask().astype(np.float32), self.new_size, interpolation=self.interpolation).astype(bool) logger.debug("Resized image of shape=%s to shape=%s using %d interpolation" % (str(img_obj.get_data().shape), str(self.new_size), self.interpolation)) return GenericImageEntity(img_out, mask_out)
[docs]class RandomResize(Transform): """ Resizes an Entity """ def __init__(self, new_size_minimum: tuple = (200, 200), new_size_maximum: tuple = (300, 300), interpolation: int = cv2.INTER_CUBIC) -> None: """ Initialize the resizer object :param new_size_minimum: a tuple of the minimum size in pixes for x and y dimensions :param new_size_maximum: a tuple of the maximum size in pixes for x and y dimensions :param interpolation: the interpolation method to resize the input Entity """ self.new_size_minimum = new_size_minimum self.new_size_maximum = new_size_maximum self.interpolation = interpolation
[docs] def do(self, img_obj: ImageEntity, random_state_obj: RandomState) -> ImageEntity: """ Perform the resizing :param img_obj: The input object to be resized according the specified configuration :param random_state_obj: ignored :return: The resized object """ # select a new size from within the range of calid new sizes y = random_state_obj.randint(self.new_size_minimum[0], self.new_size_maximum[0]) x = random_state_obj.randint(self.new_size_minimum[1], self.new_size_maximum[1]) new_size = (y, x) img_out = cv2.resize(img_obj.get_data(), new_size, interpolation=self.interpolation) mask_out = cv2.resize(img_obj.get_mask().astype(np.float32), new_size, interpolation=self.interpolation).astype(bool) logger.debug("Resized image of shape=%s to shape=%s using %d interpolation" % (str(img_obj.get_data().shape), str(new_size), self.interpolation)) return GenericImageEntity(img_out, mask_out)
[docs]class RandomPadToSize(Transform): """ Resizes an Entity """ def __init__(self, new_size: tuple = (200, 200), mode: str = 'constant', pad_value: int = 0) -> None: """ Initialize the resizer object :param new_size: a tuple of the size in pixes for x and y dimensions :param mode: what type of padding to use, supports numpy.pad options :param pad_value: the value to use when padding """ self.new_size = new_size self.mode = mode self.pad_value = pad_value
[docs] def do(self, img_obj: ImageEntity, random_state_obj: RandomState) -> ImageEntity: """ Perform the resizing :param img_obj: The input object to be resized according the specified configuration :param random_state_obj: ignored :return: The resized object """ input_shape = img_obj.get_data().shape if self.new_size[0] < input_shape[0] or self.new_size[1] < input_shape[1]: raise RuntimeError('Invalid pad new_size {} smaller than input image size {}'.format(self.new_size, input_shape)) total_pad_y = self.new_size[0] - input_shape[0] total_pad_x = self.new_size[1] - input_shape[1] pre_pad_value_y = random_state_obj.randint(0, total_pad_y) pre_pad_value_x = random_state_obj.randint(0, total_pad_x) pad_values = (pre_pad_value_y, total_pad_y - pre_pad_value_y, pre_pad_value_x, total_pad_x - pre_pad_value_x) return Pad(pad_values, self.mode, self.pad_value).do(img_obj, random_state_obj)
[docs]class Pad(Transform): """ Resizes an Entity """ def __init__(self, pad_amounts: tuple = (0, 0, 0, 0), mode: str = 'constant', pad_value: int = 0) -> None: """ Initialize the resizer object :param pad_amounts: a tuple of the pixel count o add to each side (y_pre, y_post, x_pre, x_post) :param mode: what type of padding to use, supports numpy.pad options :param pad_value: the value to use when padding """ self.pad_amounts = pad_amounts self.mode = mode self.pad_value = pad_value
[docs] def do(self, img_obj: ImageEntity, random_state_obj: RandomState) -> ImageEntity: """ Perform the resizing :param img_obj: The input object to be resized according the specified configuration :param random_state_obj: ignored :return: The resized object """ if np.count_nonzero(np.asarray(self.pad_amounts)) == 0: return img_obj img = img_obj.get_data() msk = img_obj.get_mask() kwargs = {} if self.mode == 'constant': kwargs = {'constant_values': self.pad_value} if len(img.shape) == 2: img_out = np.pad(img, pad_width=((self.pad_amounts[0], self.pad_amounts[1]), (self.pad_amounts[2], self.pad_amounts[3])), mode=self.mode, **kwargs) elif len(img.shape) == 3: img_out = np.pad(img, pad_width=((self.pad_amounts[0], self.pad_amounts[1]), (self.pad_amounts[2], self.pad_amounts[3]), (0, 0)), mode=self.mode, **kwargs) else: raise RuntimeError('Unexpected image shape: {}'.format(img.shape)) if len(msk.shape) == 2: mask_out = np.pad(msk, pad_width=((self.pad_amounts[0], self.pad_amounts[1]), (self.pad_amounts[2], self.pad_amounts[3])), mode=self.mode, **kwargs) elif len(msk.shape) == 3: mask_out = np.pad(msk, pad_width=((self.pad_amounts[0], self.pad_amounts[1]), (self.pad_amounts[2], self.pad_amounts[3]), (0, 0)), mode=self.mode, **kwargs) else: raise RuntimeError('Unexpected mask shape: {}'.format(msk.shape)) logger.debug("Padded image of shape=%s to shape=%s" % (str(img_obj.get_data().shape), str(img.shape))) return GenericImageEntity(img_out, mask_out)
[docs]class RandomSubCrop(Transform): """ Resizes an Entity """ def __init__(self, new_size: tuple = (200, 200)) -> None: """ Initialize the crop object :param new_size: a tuple of the size in pixels for x and y dimensions """ self.new_size = new_size
[docs] def do(self, img_obj: ImageEntity, random_state_obj: RandomState) -> ImageEntity: """ Perform the resizing :param img_obj: The input object to be cropped according the specified configuration :param random_state_obj: ignored :return: The cropped object """ img = img_obj.get_data() msk = img_obj.get_mask() if self.new_size[0] > img.shape[0]: raise RuntimeError('Invalid subcrop size: requested height {} is larger than source image height {}'.format(self.new_size[0], img.shape[0])) if self.new_size[1] > img.shape[1]: raise RuntimeError('Invalid subcrop size: requested width {} is larger than source image width {}'.format(self.new_size[1], img.shape[1])) if self.new_size[0] == img.shape[0] and self.new_size[1] == img.shape[1]: return img_obj y_st = np.random.randint(0, img.shape[0] - self.new_size[0]) x_st = np.random.randint(0, img.shape[1] - self.new_size[1]) img_out = img[y_st:y_st + self.new_size[0], x_st:x_st + self.new_size[1]] mask_out = msk[y_st:y_st + self.new_size[0], x_st:x_st + self.new_size[1]] logger.debug("Cropped source image size {} to output size {}".format(img_obj.get_data().shape, img_out.shape)) return GenericImageEntity(img_out, mask_out)