From e9df99b9e8edb475e218b4eb918ad06dd5df238f Mon Sep 17 00:00:00 2001 From: deeppomf Date: Wed, 7 Aug 2019 04:04:56 -0400 Subject: [PATCH] Add DCPv2 files --- config.py | 38 +++ decensor.py | 251 ++++++++++++++++++ file.py | 56 ++++ libs/utils.py | 135 ++++++++++ model.py | 105 ++++++++ models/download and unzip model here!.URL | 6 - module.py | 305 ++++++++++++++++++++++ ops.py | 233 +++++++++++++++++ requirements.txt | Bin 0 -> 774 bytes 9 files changed, 1123 insertions(+), 6 deletions(-) create mode 100755 config.py create mode 100755 decensor.py create mode 100755 file.py create mode 100755 libs/utils.py create mode 100755 model.py delete mode 100644 models/download and unzip model here!.URL create mode 100755 module.py create mode 100755 ops.py create mode 100755 requirements.txt diff --git a/config.py b/config.py new file mode 100755 index 0000000..a345081 --- /dev/null +++ b/config.py @@ -0,0 +1,38 @@ +import argparse + +def str2floatarr(v): + if type(v) == str: + try: + return [float(v) for v in v.split(',')] + except: + raise argparse.ArgumentTypeError('Integers seperated by commas expected.') + else: + raise argparse.ArgumentTypeError('Integers seperated by commas expected.') + +def str2bool(v): + if v.lower() in ('yes', 'true', 't', 'y', '1', True): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0', False): + return False + else: + raise argparse.ArgumentTypeError('Boolean value expected.') + +def get_args(): + parser = argparse.ArgumentParser(description='') + + #Input output folders settings + parser.add_argument('--decensor_input_path', dest='decensor_input_path', default='./decensor_input/', help='input images with censored regions colored green to be decensored by decensor.py path') + parser.add_argument('--decensor_input_original_path', dest='decensor_input_original_path', default='./decensor_input_original/', help='input images with no modifications to be decensored by decensor.py path') + parser.add_argument('--decensor_output_path', dest='decensor_output_path', default='./decensor_output/', help='output images generated from running decensor.py path') + + #Decensor settings + parser.add_argument('--mask_color_red', dest='mask_color_red', default=0, help='red channel of mask color in decensoring') + parser.add_argument('--mask_color_green', dest='mask_color_green', default=255, help='green channel of mask color in decensoring') + parser.add_argument('--mask_color_blue', dest='mask_color_blue', default=0, help='blue channel of mask color in decensoring') + parser.add_argument('--is_mosaic', dest='is_mosaic', default='False', type=str2bool, help='true if image has mosaic censoring, false otherwise') + + args = parser.parse_args() + return args + +if __name__ == '__main__': + get_args() diff --git a/decensor.py b/decensor.py new file mode 100755 index 0000000..c90c1d7 --- /dev/null +++ b/decensor.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 + +try: + import numpy as np + from PIL import Image + + import os + from copy import deepcopy + + import config + import file + from model import InpaintNN + from libs.utils import * +except ImportError as e: + print("Error when importing libraries: ", e) + print("Some Python libraries are missing. You can install all requirements by running in the command line 'pip install -r requirements.txt' ") + exit(1) + +class Decensor: + + def __init__(self): + self.args = config.get_args() + self.is_mosaic = self.args.is_mosaic + + self.mask_color = [self.args.mask_color_red/255.0, self.args.mask_color_green/255.0, self.args.mask_color_blue/255.0] + + if not os.path.exists(self.args.decensor_output_path): + os.makedirs(self.args.decensor_output_path) + + self.load_model() + + def get_mask(self, colored): + mask = np.ones(colored.shape, np.uint8) + i, j = np.where(np.all(colored[0] == self.mask_color, axis=-1)) + mask[0, i, j] = 0 + return mask + + def load_model(self): + self.model = InpaintNN(bar_model_name = "./models/bar/Train_775000.meta", bar_checkpoint_name = "./models/bar/", mosaic_model_name = "./models/mosaic/Train_290000.meta", mosaic_checkpoint_name = "./models/mosaic/", is_mosaic=self.is_mosaic) + + def decensor_all_images_in_folder(self): + #load model once at beginning and reuse same model + #self.load_model() + color_dir = self.args.decensor_input_path + file_names = os.listdir(color_dir) + + input_dir = self.args.decensor_input_path + output_dir = self.args.decensor_output_path + + # Change False to True before release --> file.check_file( input_dir, output_dir, True) + file_names, self.files_removed = file.check_file( input_dir, output_dir, False) + + #convert all images into np arrays and put them in a list + for file_name in file_names: + color_file_path = os.path.join(color_dir, file_name) + color_bn, color_ext = os.path.splitext(file_name) + if os.path.isfile(color_file_path) and color_ext.casefold() == ".png": + print("--------------------------------------------------------------------------") + print("Decensoring the image {}".format(color_file_path)) + try : + colored_img = Image.open(color_file_path) + except: + print("Cannot identify image file (" +str(color_file_path)+")") + self.files_removed.append((color_file_path,3)) + # incase of abnormal file format change (ex : text.txt -> text.png) + continue + + #if we are doing a mosaic decensor + if self.is_mosaic: + #get the original file that hasn't been colored + ori_dir = self.args.decensor_input_original_path + #since the original image might not be a png, test multiple file formats + valid_formats = {".png", ".jpg", ".jpeg"} + for test_file_name in os.listdir(ori_dir): + test_bn, test_ext = os.path.splitext(test_file_name) + if (test_bn == color_bn) and (test_ext.casefold() in valid_formats): + ori_file_path = os.path.join(ori_dir, test_file_name) + ori_img = Image.open(ori_file_path) + # colored_img.show() + self.decensor_image(ori_img, colored_img, file_name) + break + else: #for...else, i.e if the loop finished without encountering break + print("Corresponding original, uncolored image not found in {}".format(color_file_path)) + print("Check if it exists and is in the PNG or JPG format.") + else: + self.decensor_image(colored_img, colored_img, file_name) + else: + print("--------------------------------------------------------------------------") + print("Irregular file detected : "+str(color_file_path)) + print("--------------------------------------------------------------------------") + if(self.files_removed is not None): + file.error_messages(None, self.files_removed) + print("\nDecensoring complete!") + + #decensors one image at a time + #TODO: decensor all cropped parts of the same image in a batch (then i need input for colored an array of those images and make additional changes) + def decensor_image(self, ori, colored, file_name=None): + width, height = ori.size + #save the alpha channel if the image has an alpha channel + has_alpha = False + if (ori.mode == "RGBA"): + has_alpha = True + alpha_channel = np.asarray(ori)[:,:,3] + alpha_channel = np.expand_dims(alpha_channel, axis =-1) + ori = ori.convert('RGB') + + ori_array = image_to_array(ori) + ori_array = np.expand_dims(ori_array, axis = 0) + + if self.is_mosaic: + #if mosaic decensor, mask is empty + # mask = np.ones(ori_array.shape, np.uint8) + # print(mask.shape) + colored = colored.convert('RGB') + color_array = image_to_array(colored) + color_array = np.expand_dims(color_array, axis = 0) + mask = self.get_mask(color_array) + mask_reshaped = mask[0,:,:,:] * 255.0 + mask_img = Image.fromarray(mask_reshaped.astype('uint8')) + # mask_img.show() + + else: + mask = self.get_mask(ori_array) + + #colored image is only used for finding the regions + regions = find_regions(colored.convert('RGB'), [v*255 for v in self.mask_color]) + print("Found {region_count} censored regions in this image!".format(region_count = len(regions))) + + if len(regions) == 0 and not self.is_mosaic: + print("No green regions detected! Make sure you're using exactly the right color.") + return + + output_img_array = ori_array[0].copy() + + for region_counter, region in enumerate(regions, 1): + bounding_box = expand_bounding(ori, region, expand_factor=1.5) + crop_img = ori.crop(bounding_box) + # crop_img.show() + #convert mask back to image + mask_reshaped = mask[0,:,:,:] * 255.0 + mask_img = Image.fromarray(mask_reshaped.astype('uint8')) + #resize the cropped images + crop_img = crop_img.resize((256, 256)) + crop_img_array = image_to_array(crop_img) + #resize the mask images + mask_img = mask_img.crop(bounding_box) + mask_img = mask_img.resize((256, 256)) + # mask_img.show() + #convert mask_img back to array + mask_array = image_to_array(mask_img) + #the mask has been upscaled so there will be values not equal to 0 or 1 + + # mask_array[mask_array > 0] = 1 + # crop_img_array[..., :-1][mask_array==0] = (0,0,0) + + if not self.is_mosaic: + a, b = np.where(np.all(mask_array == 0, axis = -1)) + # print(a,b) + # print(crop_img_array[a,b]) + # print(crop_img_array[a,b,0]) + # print(crop_img_array.shape) + # print(type(crop_img_array[0,0])) + crop_img_array[a,b,:] = 0. + temp = Image.fromarray((crop_img_array * 255.0).astype('uint8')) + # temp.show() + + # if self.is_mosaic: + # a, b = np.where(np.all(mask_array == 0, axis = -1)) + # print(a, b) + # coords = [coord for coord in zip(a,b) if ((coord[0] + coord[1]) % 2 == 0)] + # a,b = zip(*coords) + + # mask_array[a,b] = 1 + # mask_array = mask_array * 255.0 + # img = Image.fromarray(mask_array.astype('uint8')) + # img.show() + # return + + crop_img_array = np.expand_dims(crop_img_array, axis = 0) + mask_array = np.expand_dims(mask_array, axis = 0) + + # print(np.amax(crop_img_array)) + # print(np.amax(mask_array)) + # # print(np.amax(masked)) + + # print(np.amin(crop_img_array)) + # print(np.amin(mask_array)) + # # print(np.amin(masked)) + + # print(mask_array) + + crop_img_array = crop_img_array * 2.0 - 1 + # mask_array = mask_array / 255.0 + + # Run predictions for this batch of images + pred_img_array = self.model.predict(crop_img_array, crop_img_array, mask_array) + + pred_img_array = np.squeeze(pred_img_array, axis = 0) + pred_img_array = (255.0 * ((pred_img_array + 1.0) / 2.0)).astype(np.uint8) + + #scale prediction image back to original size + bounding_width = bounding_box[2]-bounding_box[0] + bounding_height = bounding_box[3]-bounding_box[1] + #convert np array to image + + # print(bounding_width,bounding_height) + # print(pred_img_array.shape) + + pred_img = Image.fromarray(pred_img_array.astype('uint8')) + # pred_img.show() + pred_img = pred_img.resize((bounding_width, bounding_height), resample = Image.BICUBIC) + # pred_img.show() + + pred_img_array = image_to_array(pred_img) + + # print(pred_img_array.shape) + pred_img_array = np.expand_dims(pred_img_array, axis = 0) + + # copy the decensored regions into the output image + for i in range(len(ori_array)): + for col in range(bounding_width): + for row in range(bounding_height): + bounding_width_index = col + bounding_box[0] + bounding_height_index = row + bounding_box[1] + if (bounding_width_index, bounding_height_index) in region: + output_img_array[bounding_height_index][bounding_width_index] = pred_img_array[i,:,:,:][row][col] + print("{region_counter} out of {region_count} regions decensored.".format(region_counter=region_counter, region_count=len(regions))) + + output_img_array = output_img_array * 255.0 + + #restore the alpha channel if the image had one + if has_alpha: + output_img_array = np.concatenate((output_img_array, alpha_channel), axis = 2) + + output_img = Image.fromarray(output_img_array.astype('uint8')) + + if file_name != None: + #save the decensored image + #file_name, _ = os.path.splitext(file_name) + save_path = os.path.join(self.args.decensor_output_path, file_name) + output_img.save(save_path) + + print("Decensored image saved to {save_path}!".format(save_path=save_path)) + return + else: + print("Decensored image. Returning it.") + return output_img + +if __name__ == '__main__': + decensor = Decensor() + decensor.decensor_all_images_in_folder() diff --git a/file.py b/file.py new file mode 100755 index 0000000..ab902d3 --- /dev/null +++ b/file.py @@ -0,0 +1,56 @@ +import os + +def check_file(input_dir, output_dir, Release_version = True): + file_list = [] + output_file_list = [] + files_removed = [] + + input_dir = os.listdir(input_dir) + output_dir = os.listdir(output_dir) + + for file_in in input_dir: + if not file_in.startswith('.'): + file_list.append(file_in) + + if(Release_version is True): + print("\nChecking valid files...") + for file_out in output_dir: + if file_out.lower().endswith('.png'): + output_file_list.append(file_out) + + # solving https://github.com/deeppomf/DeepCreamPy/issues/25 + # appending in list with reason as tuple (file name, reason) + for lhs in file_list: + lhs.lower() + if not lhs.lower().endswith('.png') : + files_removed.append((lhs, 0)) + for rhs in output_file_list: + if(lhs == rhs): + files_removed.append((lhs, 1)) + + # seperated detecting same file names and deleting file name list + # just in case of index_error and show list of files which will not go though + # decensor process + print("\n### These files will not be decensored for following reason ###\n") + + error_messages(file_list, files_removed) + input("\nPress anything to continue...") + + print("\n###################################\n") + + return file_list, files_removed + +def error_messages(file_list, files_removed): + + if files_removed is None: + return + + for remove_this,reason in files_removed: + if(file_list is not None): + file_list.remove(remove_this) + if reason == 0: + print(" REMOVED : (" + str(remove_this) +") is not PNG file format") + elif reason == 1: + print(" REMOVED : (" + str(remove_this) +") already exists") + elif reason == 2: + print(" REMOVED : (" + str(remove_this) +") file unreadable") \ No newline at end of file diff --git a/libs/utils.py b/libs/utils.py new file mode 100755 index 0000000..2e4d1f9 --- /dev/null +++ b/libs/utils.py @@ -0,0 +1,135 @@ +import numpy as np +from PIL import Image, ImageDraw + +#convert PIL image to numpy array +def image_to_array(image): + array = np.asarray(image) + return np.array(array / 255.0) + +#find strongly connected components with the mask color +def find_regions(image, mask_color): + pixel = image.load() + neighbors = dict() + width, height = image.size + for x in range(width): + for y in range(height): + if is_right_color(pixel[x,y], *mask_color): + neighbors[x, y] = {(x,y)} + for x, y in neighbors: + candidates = (x + 1, y), (x, y + 1) + for candidate in candidates: + if candidate in neighbors: + neighbors[x, y].add(candidate) + neighbors[candidate].add((x, y)) + closed_list = set() + + def connected_component(pixel): + region = set() + open_list = {pixel} + while open_list: + pixel = open_list.pop() + closed_list.add(pixel) + open_list |= neighbors[pixel] - closed_list + region.add(pixel) + return region + + regions = [] + for pixel in neighbors: + if pixel not in closed_list: + regions.append(connected_component(pixel)) + regions.sort(key = len, reverse = True) + return regions + +# risk of box being bigger than the image +def expand_bounding(img, region, expand_factor=1.5, min_size = 256): + #expand bounding box to capture more context + x, y = zip(*region) + min_x, min_y, max_x, max_y = min(x), min(y), max(x), max(y) + width, height = img.size + width_center = width//2 + height_center = height//2 + bb_width = max_x - min_x + bb_height = max_y - min_y + x_center = (min_x + max_x)//2 + y_center = (min_y + max_y)//2 + current_size = max(bb_width, bb_height) + current_size = int(current_size * expand_factor) + max_size = min(width, height) + if current_size > max_size: + current_size = max_size + elif current_size < min_size: + current_size = min_size + x1 = x_center - current_size//2 + x2 = x_center + current_size//2 + y1 = y_center - current_size//2 + y2 = y_center + current_size//2 + x1_square = x1 + y1_square = y1 + x2_square = x2 + y2_square = y2 + #move bounding boxes that are partially outside of the image inside the image + if (y1_square < 0 or y2_square > (height - 1)) and (x1_square < 0 or x2_square > (width - 1)): + #conservative square region + if x1_square < 0 and y1_square < 0: + x1_square = 0 + y1_square = 0 + x2_square = current_size + y2_square = current_size + elif x2_square > (width - 1) and y1_square < 0: + x1_square = width - current_size - 1 + y1_square = 0 + x2_square = width - 1 + y2_square = current_size + elif x1_square < 0 and y2_square > (height - 1): + x1_square = 0 + y1_square = height - current_size - 1 + x2_square = current_size + y2_square = height - 1 + elif x2_square > (width - 1) and y2_square > (height - 1): + x1_square = width - current_size - 1 + y1_square = height - current_size - 1 + x2_square = width - 1 + y2_square = height - 1 + else: + x1_square = x1 + y1_square = y1 + x2_square = x2 + y2_square = y2 + else: + if x1_square < 0: + difference = x1_square + x1_square -= difference + x2_square -= difference + if x2_square > (width - 1): + difference = x2_square - width + 1 + x1_square -= difference + x2_square -= difference + if y1_square < 0: + difference = y1_square + y1_square -= difference + y2_square -= difference + if y2_square > (height - 1): + difference = y2_square - height + 1 + y1_square -= difference + y2_square -= difference + # if y1_square < 0 or y2_square > (height - 1): + + #if bounding box goes outside of the image for some reason, set bounds to original, unexpanded values + #print(width, height) + if x2_square > width or y2_square > height: + print("bounding box out of bounds!") + print(x1_square, y1_square, x2_square, y2_square) + x1_square, y1_square, x2_square, y2_square = min_x, min_y, max_x, max_y + return x1_square, y1_square, x2_square, y2_square + +def is_right_color(pixel, r2, g2, b2): + r1, g1, b1 = pixel + return r1 == r2 and g1 == g2 and b1 == b2 + +if __name__ == '__main__': + image = Image.open('') + no_alpha_image = image.convert('RGB') + draw = ImageDraw.Draw(no_alpha_image) + for region in find_regions(no_alpha_image, [0,255,0]): + draw.rectangle(expand_bounding(no_alpha_image, region), outline=(0, 255, 0)) + no_alpha_image.show() \ No newline at end of file diff --git a/model.py b/model.py new file mode 100755 index 0000000..118d1c1 --- /dev/null +++ b/model.py @@ -0,0 +1,105 @@ +import tensorflow as tf +import os +import numpy as np +import module as mm + +#suppress tensorflow deprecation warnings +tf.logging.set_verbosity(tf.logging.ERROR) + +class InpaintNN(): + + def __init__(self, input_height=256, input_width=256, batch_size = 1, bar_model_name=None, bar_checkpoint_name=None, mosaic_model_name=None, mosaic_checkpoint_name = None, is_mosaic=False): + self.bar_model_name = bar_model_name + self.bar_checkpoint_name = bar_checkpoint_name + self.mosaic_model_name = mosaic_model_name + self.mosaic_checkpoint_name = mosaic_checkpoint_name + self.is_mosaic = is_mosaic + + self.input_height = input_height + self.input_width = input_width + self.batch_size = batch_size + + self.build_model() + + def build_model(self): + # ------- variables + + self.X = tf.placeholder(tf.float32, [self.batch_size, self.input_height, self.input_width, 3]) + self.Y = tf.placeholder(tf.float32, [self.batch_size, self.input_height, self.input_width, 3]) + + self.MASK = tf.placeholder(tf.float32, [self.batch_size, self.input_height, self.input_width, 3]) + IT = tf.placeholder(tf.float32) + + # ------- structure + + input = tf.concat([self.X, self.MASK], 3) + + vec_en = mm.encoder(input, reuse=False, name='G_en') + + vec_con = mm.contextual_block(vec_en, vec_en, self.MASK, 3, 50.0, 'CB1', stride=1) + + I_co = mm.decoder(vec_en, self.input_height, self.input_height, reuse=False, name='G_de') + I_ge = mm.decoder(vec_con, self.input_height, self.input_height, reuse=True, name='G_de') + + self.image_result = I_ge * (1-self.MASK) + self.Y*self.MASK + + D_real_red = mm.discriminator_red(self.Y, reuse=False, name='disc_red') + D_fake_red = mm.discriminator_red(self.image_result, reuse=True, name='disc_red') + + # ------- Loss + + Loss_D_red = tf.reduce_mean(tf.nn.relu(1+D_fake_red)) + tf.reduce_mean(tf.nn.relu(1-D_real_red)) + + Loss_D = Loss_D_red + + Loss_gan_red = -tf.reduce_mean(D_fake_red) + + Loss_gan = Loss_gan_red + + Loss_s_re = tf.reduce_mean(tf.abs(I_ge - self.Y)) + Loss_hat = tf.reduce_mean(tf.abs(I_co - self.Y)) + + A = tf.image.rgb_to_yuv((self.image_result+1)/2.0) + A_Y = tf.to_int32(A[:, :, :, 0:1]*255.0) + + B = tf.image.rgb_to_yuv((self.Y+1)/2.0) + B_Y = tf.to_int32(B[:, :, :, 0:1]*255.0) + + ssim = tf.reduce_mean(tf.image.ssim(A_Y, B_Y, 255.0)) + + alpha = IT/1000000 + + Loss_G = 0.1*Loss_gan + 10*Loss_s_re + 5*(1-alpha) * Loss_hat + + # --------------------- variable & optimizer + + var_D = [v for v in tf.global_variables() if v.name.startswith('disc_red')] + var_G = [v for v in tf.global_variables() if v.name.startswith('G_en') or v.name.startswith('G_de') or v.name.startswith('CB1')] + + update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) + + with tf.control_dependencies(update_ops): + optimize_D = tf.train.AdamOptimizer(learning_rate=0.0004, beta1=0.5, beta2=0.9).minimize(Loss_D, var_list=var_D) + optimize_G = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5, beta2=0.9).minimize(Loss_G, var_list=var_G) + + config = tf.ConfigProto() + # config.gpu_options.per_process_gpu_memory_fraction = 0.4 + # config.gpu_options.allow_growth = False + + self.sess = tf.Session(config=config) + + init = tf.global_variables_initializer() + self.sess.run(init) + saver = tf.train.Saver() + + if self.is_mosaic: + Restore = tf.train.import_meta_graph(self.mosaic_model_name) + Restore.restore(self.sess, tf.train.latest_checkpoint(self.mosaic_checkpoint_name)) + else: + Restore = tf.train.import_meta_graph(self.bar_model_name) + Restore.restore(self.sess, tf.train.latest_checkpoint(self.bar_checkpoint_name)) + + def predict(self, censored, unused, mask): + img_sample = self.sess.run(self.image_result, feed_dict={self.X: censored, self.Y: unused, self.MASK: mask}) + + return img_sample \ No newline at end of file diff --git a/models/download and unzip model here!.URL b/models/download and unzip model here!.URL deleted file mode 100644 index 57be7a1..0000000 --- a/models/download and unzip model here!.URL +++ /dev/null @@ -1,6 +0,0 @@ -[InternetShortcut] -URL=https://drive.google.com/file/d/1Nzh2KAUO_rYPu2vDM_YP1EgbWtjI2hRX/view -IDList= -HotKey=0 -IconFile=C:\Users\larry\AppData\Local\Mozilla\Firefox\Profiles\znm4q5yy.default\shortcutCache\pFyHNsPIdcXl_5JGSJ4Z+A==.ico -IconIndex=0 diff --git a/module.py b/module.py new file mode 100755 index 0000000..5545191 --- /dev/null +++ b/module.py @@ -0,0 +1,305 @@ +from __future__ import division +from ops import * +import tensorflow.contrib.layers as layers +import math + +def conv_nn(input, dims1, dims2, size1, size2, k_size = 3): + + pp = tf.pad(input, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + L1 = layers.conv2d(pp, dims1, [k_size, k_size], stride=[1, 1], padding='VALID', activation_fn=None) + L1 = tf.nn.elu(L1) + + pp = tf.pad(L1, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + L2 = layers.conv2d(pp, dims2, [k_size, k_size], stride=[1, 1], padding='VALID', activation_fn=None) + L2 = tf.nn.elu(L2) + L2 = tf.image.resize_nearest_neighbor(L2, (size1, size2)) + + return L2 + +def encoder(input, reuse, name): + with tf.variable_scope(name): + if reuse: + tf.get_variable_scope().reuse_variables() + else: + assert tf.get_variable_scope().reuse is False + + p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + CL1 = layers.conv2d(p, 32, [5, 5], stride=[1, 1], padding='VALID', activation_fn=None) + CL1 = tf.nn.elu(CL1) # 256 256 32 + + p = tf.pad(CL1, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + CL2 = layers.conv2d(p, 64, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None) + CL2 = tf.nn.elu(CL2) # 128 128 64 + + p = tf.pad(CL2, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + CL3 = layers.conv2d(p, 64, [3, 3], stride=[1, 1], padding='VALID', activation_fn=None) + CL3 = tf.nn.elu(CL3) # 128 128 64 + + p = tf.pad(CL3, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + CL4 = layers.conv2d(p, 128, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None) + CL4 = tf.nn.elu(CL4) # 64 64 128 + + p = tf.pad(CL4, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + CL5 = layers.conv2d(p, 128, [3, 3], stride=[1, 1], padding='VALID', activation_fn=None) + CL5 = tf.nn.elu(CL5) # 64 64 128 + + p = tf.pad(CL5, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT") + CL6 = layers.conv2d(p, 256, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None) + CL6 = tf.nn.elu(CL6) # 32 32 128 + + p = tf.pad(CL6, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + DCL1 = layers.conv2d(p, 256, [3, 3], rate=2, stride=[1, 1], padding='VALID', activation_fn=None) + DCL1 = tf.nn.elu(DCL1) + p = tf.pad(DCL1, [[0, 0], [4, 4], [4, 4], [0, 0]], "REFLECT") + DCL2 = layers.conv2d(p, 256, [3, 3], rate=4, stride=[1, 1], padding='VALID', activation_fn=None) + DCL2 = tf.nn.elu(DCL2) + p = tf.pad(DCL2, [[0, 0], [8, 8], [8, 8], [0, 0]], "REFLECT") + DCL3 = layers.conv2d(p, 256, [3, 3], rate=8, stride=[1, 1], padding='VALID', activation_fn=None) + DCL3 = tf.nn.elu(DCL3) + p = tf.pad(DCL3, [[0, 0], [16, 16], [16, 16], [0, 0]], "REFLECT") + DCL4 = layers.conv2d(p, 256, [3, 3], rate=16, stride=[1, 1], padding='VALID', activation_fn=None) + DCL4 = tf.nn.elu(DCL4) # 32 32 128 + + return DCL4 + +def decoder(input, size1, size2, reuse, name): + with tf.variable_scope(name): + if reuse: + tf.get_variable_scope().reuse_variables() + else: + assert tf.get_variable_scope().reuse is False + + DL1 = conv_nn(input, 128, 128, int(size1/4), int(size2/4)) # 64 64 128 + + DL2 = conv_nn(DL1, 64, 64, int(size1/2), int(size2/2)) # 128 128 64 + + DL3 = conv_nn(DL2, 32, 32, int(size1), int(size2)) + + DL4 = conv_nn(DL3, 16, 16, int(size1), int(size2)) + + LL2 = layers.conv2d(DL4, 3, [3, 3], stride=[1, 1], padding='SAME', activation_fn=None) # 256 256 3 + LL2 = tf.clip_by_value(LL2, -1.0, 1.0) + + return LL2 + +def discriminator_G(input, reuse, name): + with tf.variable_scope(name): + # image is 256 x 256 x input_c_dim + if reuse: + tf.get_variable_scope().reuse_variables() + else: + assert tf.get_variable_scope().reuse is False + + p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L1 = layers.conv2d(p, 64, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L1 = instance_norm(L1, 'di1') + L1 = tf.nn.leaky_relu(L1) + + p = tf.pad(L1, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L2 = layers.conv2d(p, 128, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L2 = instance_norm(L2, 'di2') + L2 = tf.nn.leaky_relu(L2) + + p = tf.pad(L2, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L3 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L3 = instance_norm(L3, 'di3') + L3 = tf.nn.leaky_relu(L3) + + p = tf.pad(L3, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L4 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L4 = instance_norm(L4, 'di4') + L4 = tf.nn.leaky_relu(L4) + L4 = layers.flatten(L4) + + L5 = tf.layers.dense(L4, 1) + + return L5 + +def discriminator_L(input, reuse, name): + with tf.variable_scope(name): + # image is 256 x 256 x input_c_dim + if reuse: + tf.get_variable_scope().reuse_variables() + else: + assert tf.get_variable_scope().reuse is False + + p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L1 = layers.conv2d(p, 64, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L1 = instance_norm(L1, 'di1l') + L1 = tf.nn.leaky_relu(L1) # 32 32 64 + + p = tf.pad(L1, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L2 = layers.conv2d(p, 128, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L2 = instance_norm(L2, 'di2l') + L2 = tf.nn.leaky_relu(L2) # 16 16 128 + + p = tf.pad(L2, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L3 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L3 = instance_norm(L3, 'di3l') + L3 = tf.nn.leaky_relu(L3) # 8 8 256 + + p = tf.pad(L3, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT") + L4 = layers.conv2d(p, 512, [5, 5], stride=2, padding='VALID', activation_fn=None) + #L4 = instance_norm(L4, 'di4l') + L4 = tf.nn.leaky_relu(L4) # 4 4 512 + L4 = layers.flatten(L4) + + L5 = tf.layers.dense(L4, 1) + + return L5 + +def discriminator_red(input, reuse, name): + with tf.variable_scope(name): + # image is 256 x 256 x input_c_dim + if reuse: + tf.get_variable_scope().reuse_variables() + else: + assert tf.get_variable_scope().reuse is False + + L1 = convolution_SN(input, 64, 5, 2, 'l1') + # L1 = instance_norm(L1, 'di1') + L1 = tf.nn.leaky_relu(L1) + + L2 = convolution_SN(L1, 128, 5, 2, 'l2') + # L2 = instance_norm(L2, 'di2') + L2 = tf.nn.leaky_relu(L2) + + L3 = convolution_SN(L2, 256, 5, 2, 'l3') + # L3 = instance_norm(L3, 'di3') + L3 = tf.nn.leaky_relu(L3) + + L4 = convolution_SN(L3, 256, 5, 2, 'l4') + # L4 = instance_norm(L4, 'di4') + L4 = tf.nn.leaky_relu(L4) + + L5 = convolution_SN(L4, 256, 5, 2, 'l5') + # L5 = instance_norm(L5, 'di5') + L5 = tf.nn.leaky_relu(L5) + + L6 = convolution_SN(L5, 512, 5, 2, 'l6') + # L6 = instance_norm(L6, 'di6') + L6 = tf.nn.leaky_relu(L6) + + L7 = dense_RED_SN(L6, 'l7') + + return L7 + +def contextual_block(bg_in, fg_in, mask, k_size, lamda, name, stride=1): + with tf.variable_scope(name): + b, h, w, dims = [i.value for i in bg_in.get_shape()] + temp = tf.image.resize_nearest_neighbor(mask, (h, w)) + temp = tf.expand_dims(temp[:, :, :, 0], 3) # b 128 128 1 + mask_r = tf.tile(temp, [1, 1, 1, dims]) # b 128 128 128 + bg = bg_in * mask_r + + kn = int((k_size - 1) / 2) + c = 0 + for p in range(kn, h - kn, stride): + for q in range(kn, w - kn, stride): + c += 1 + + patch1 = tf.extract_image_patches(bg, [1, k_size, k_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID') + + patch1 = tf.reshape(patch1, (b, 1, c, k_size*k_size*dims)) + patch1 = tf.reshape(patch1, (b, 1, 1, c, k_size * k_size * dims)) + patch1 = tf.transpose(patch1, [0, 1, 2, 4, 3]) + + patch2 = tf.extract_image_patches(fg_in, [1,k_size,k_size,1], [1,1,1,1], [1,1,1,1], 'SAME') + ACL = [] + + for ib in range(b): + + k1 = patch1[ib, :, :, :, :] + k1d = tf.reduce_sum(tf.square(k1), axis=2) + k2 = tf.reshape(k1, (k_size, k_size, dims, c)) + ww = patch2[ib, :, :, :] + wwd = tf.reduce_sum(tf.square(ww), axis=2, keepdims=True) + ft = tf.expand_dims(ww, 0) + + CS = tf.nn.conv2d(ft, k1, strides=[1, 1, 1, 1], padding='SAME') + + tt = k1d + wwd + + DS1 = tf.expand_dims(tt, 0) - 2 * CS + + DS2 = (DS1 - tf.reduce_mean(DS1, 3, True)) / reduce_std(DS1, 3, True) + DS2 = -1 * tf.nn.tanh(DS2) + + CA = softmax(lamda * DS2) + + ACLt = tf.nn.conv2d_transpose(CA, k2, output_shape=[1, h, w, dims], strides=[1, 1, 1, 1], padding='SAME') + ACLt = ACLt / (k_size ** 2) + + if ib == 0: + ACL = ACLt + else: + ACL = tf.concat((ACL, ACLt), 0) + + ACL = bg + ACL * (1.0 - mask_r) + + con1 = tf.concat([bg_in, ACL], 3) + ACL2 = layers.conv2d(con1, dims, [1, 1], stride=[1, 1], padding='VALID', activation_fn=None, scope='ML') + ACL2 = tf.nn.elu(ACL2) + + return ACL2 + +def contextual_block_cs(bg_in, fg_in, mask, k_size, lamda, name, stride=1): + with tf.variable_scope(name): + b, h, w, dims = [i.value for i in bg_in.get_shape()] + temp = tf.image.resize_nearest_neighbor(mask, (h, w)) + temp = tf.expand_dims(temp[:, :, :, 0], 3) # b 128 128 1 + mask_r = tf.tile(temp, [1, 1, 1, dims]) # b 128 128 128 + bg = bg_in * mask_r + + kn = int((k_size - 1) / 2) + c = 0 + for p in range(kn, h - kn, stride): + for q in range(kn, w - kn, stride): + c += 1 + + patch1 = tf.extract_image_patches(bg, [1, k_size, k_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID') + + patch1 = tf.reshape(patch1, (b, 1, c, k_size*k_size*dims)) + patch1 = tf.reshape(patch1, (b, 1, 1, c, k_size * k_size * dims)) + patch1 = tf.transpose(patch1, [0, 1, 2, 4, 3]) + + patch2 = tf.extract_image_patches(fg_in, [1,k_size,k_size,1], [1,1,1,1], [1,1,1,1], 'SAME') + ACL = [] + + fuse_weight = tf.reshape(tf.eye(3), [3, 3, 1, 1]) + + for ib in range(b): + + k1 = patch1[ib, :, :, :, :] + k2 = k1 / tf.sqrt(tf.reduce_sum(tf.square(k1), axis=2, keepdims=True) + 1e-16) + k1 = tf.reshape(k1, (k_size, k_size, dims, c)) + ww = patch2[ib, :, :, :] + ft = ww / tf.sqrt(tf.reduce_sum(tf.square(ww), axis=2, keepdims=True) + 1e-16) + ft = tf.expand_dims(ft, 0) + + CA = tf.nn.conv2d(ft, k2, strides=[1, 1, 1, 1], padding='SAME') + + CA = tf.reshape(CA, [1, h * w, c, 1]) + CA = tf.nn.conv2d(CA, fuse_weight, strides=[1, 1, 1, 1], padding='SAME') + CA = tf.reshape(CA, [1, h, w, int(math.sqrt(c)), int(math.sqrt(c))]) + CA = tf.transpose(CA, [0, 2, 1, 4, 3]) + CA = tf.reshape(CA, [1, h * w, c, 1]) + CA = tf.nn.conv2d(CA, fuse_weight, strides=[1, 1, 1, 1], padding='SAME') + CA = tf.reshape(CA, [1, h, w, int(math.sqrt(c)), int(math.sqrt(c))]) + CA = tf.transpose(CA, [0, 2, 1, 4, 3]) + CA = tf.reshape(CA, [1, h, w, c]) + + CA2 = softmax(lamda * CA) + + ACLt = tf.nn.conv2d_transpose(CA2, k1, output_shape=[1, h, w, dims], strides=[1, 1, 1, 1], padding='SAME') + ACLt = ACLt / (k_size ** 2) + + if ib == 0: + ACL = ACLt + else: + ACL = tf.concat((ACL, ACLt), 0) + + ACL2 = bg + ACL * (1.0 - mask_r) + + return ACL2 + diff --git a/ops.py b/ops.py new file mode 100755 index 0000000..cd4e36b --- /dev/null +++ b/ops.py @@ -0,0 +1,233 @@ +import tensorflow as tf +import tensorflow.contrib.layers as layers +import numpy as np +import random as rr +import math as mt +import cv2 +from scipy import misc + +def instance_norm(input, name="instance_norm"): + with tf.variable_scope(name): + depth = input.get_shape()[3] + scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32)) + offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0)) + mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True) + epsilon = 1e-5 + inv = tf.rsqrt(variance + epsilon) + normalized = (input-mean)*inv + return scale*normalized + offset + +def make_sq_mask(size, m_size, batch_size): + + start_x = rr.randint(0, size - m_size-1) + start_y = rr.randint(0, size - m_size-1) + + temp = np.ones([batch_size, size, size, 3]) + + temp[:, start_x:start_x + m_size, start_y:start_y + m_size, 0:3] *= 0 + + return temp, start_x, start_y + +def softmax(input): + + k = tf.exp(input - 3) + k = tf.reduce_sum(k, 3, True) + # k = k - num * tf.ones_like(k) + + ouput = tf.exp(input - 3) / k + + return ouput + +def reduce_var(x, axis=None, keepdims=False): + """Variance of a tensor, alongside the specified axis. + + # Arguments + x: A tensor or variable. + axis: An integer, the axis to compute the variance. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. + + # Returns + A tensor with the variance of elements of `x`. + """ + m = tf.reduce_mean(x, axis=axis, keepdims=True) + devs_squared = tf.square(x - m) + return tf.reduce_mean(devs_squared, axis=axis, keepdims=keepdims) + +def reduce_std(x, axis=None, keepdims=False): + """Standard deviation of a tensor, alongside the specified axis. + + # Arguments + x: A tensor or variable. + axis: An integer, the axis to compute the standard deviation. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, + the reduced dimension is retained with length 1. + + # Returns + A tensor with the standard deviation of elements of `x`. + """ + return tf.sqrt(reduce_var(x, axis=axis, keepdims=keepdims)) + +def l2_norm(v, eps=1e-12): + return v / (tf.reduce_sum(v ** 2) ** 0.5 + eps) + +def ff_mask(size, b_zise, maxLen, maxWid, maxAng, maxNum, maxVer, minLen = 20, minWid = 15, minVer = 5): + + mask = np.ones((b_zise, size, size, 3)) + + num = rr.randint(3, maxNum) + + for i in range(num): + startX = rr.randint(0, size) + startY = rr.randint(0, size) + numVer = rr.randint(minVer, maxVer) + width = rr.randint(minWid, maxWid) + for j in range(numVer): + angle = rr.uniform(-maxAng, maxAng) + length = rr.randint(minLen, maxLen) + + endX = min(size-1, max(0, int(startX + length * mt.sin(angle)))) + endY = min(size-1, max(0, int(startY + length * mt.cos(angle)))) + + if endX >= startX: + lowx = startX + highx = endX + else: + lowx = endX + highx = startX + if endY >= startY: + lowy = startY + highy = endY + else: + lowy = endY + highy = startY + + if abs(startY-endY) + abs(startX - endX) != 0: + + wlx = max(0, lowx-int(abs(width * mt.cos(angle)))) + whx = min(size - 1, highx+1 + int(abs(width * mt.cos(angle)))) + wly = max(0, lowy - int(abs(width * mt.sin(angle)))) + why = min(size - 1, highy+1 + int(abs(width * mt.sin(angle)))) + + for x in range(wlx, whx): + for y in range(wly, why): + + d = abs((endY-startY)*x - (endX -startX)*y - endY*startX + startY*endX) / mt.sqrt((startY-endY)**2 + (startX -endX)**2) + + if d <= width: + mask[:, x, y, :] = 0 + + wlx = max(0, lowx-width) + whx = min(size - 1, highx+width+1) + wly = max(0, lowy - width) + why = min(size - 1, highy + width + 1) + + for x2 in range(wlx, whx): + for y2 in range(wly, why): + + d1 = (startX - x2) ** 2 + (startY - y2) ** 2 + d2 = (endX - x2) ** 2 + (endY - y2) ** 2 + + if np.sqrt(d1) <= width: + mask[:, x2, y2, :] = 0 + if np.sqrt(d2) <= width: + mask[:, x2, y2, :] = 0 + startX = endX + startY = endY + + return mask + +def ff_mask_batch(size, b_size, maxLen, maxWid, maxAng, maxNum, maxVer, minLen = 20, minWid = 15, minVer = 5): + + mask = None + temp = ff_mask(size, 1, maxLen, maxWid, maxAng, maxNum, maxVer, minLen=minLen, minWid=minWid, minVer=minVer) + temp = temp[0] + for ib in range(b_size): + if ib == 0: + mask = np.expand_dims(temp, 0) + else: + mask = np.concatenate((mask, np.expand_dims(temp, 0)), 0) + + temp = cv2.rotate(temp, cv2.ROTATE_90_CLOCKWISE) + if ib == 3: + temp = cv2.flip(temp, 0) + + return mask + +def spectral_norm(w, name, iteration=1): + w_shape = w.shape.as_list() + w = tf.reshape(w, [-1, w_shape[-1]]) + + u = tf.get_variable(name+"u", [1, w_shape[-1]], initializer=tf.truncated_normal_initializer(), trainable=False) + + u_hat = u + v_hat = None + for i in range(iteration): + """ + power iteration + Usually iteration = 1 will be enough + """ + v_ = tf.matmul(u_hat, tf.transpose(w)) + v_hat = l2_norm(v_) + + u_ = tf.matmul(v_hat, w) + u_hat = l2_norm(u_) + + sigma = tf.matmul(tf.matmul(v_hat, w), tf.transpose(u_hat)) + w_norm = w / sigma + + with tf.control_dependencies([u.assign(u_hat)]): + w_norm = tf.reshape(w_norm, w_shape) + + return w_norm + +def convolution_SN(tensor, output_dim, kernel_size, stride, name): + _, h, w, c = [i.value for i in tensor.get_shape()] + + w = tf.get_variable(name=name + 'w', shape=[kernel_size, kernel_size, c, output_dim], initializer=layers.xavier_initializer()) + b = tf.get_variable(name=name + 'b', shape=[output_dim], initializer=tf.constant_initializer(0.0)) + + output = tf.nn.conv2d(tensor, filter=spectral_norm(w, name=name + 'w'), strides=[1, stride, stride, 1], padding='SAME') + b + + return output + +def dense_SN(tensor, output_dim, name): + _, h, w, c = [i.value for i in tensor.get_shape()] + + w = tf.get_variable(name=name + 'w', shape=[h, w, c, output_dim], initializer=layers.xavier_initializer()) + b = tf.get_variable(name=name + 'b', shape=[output_dim], initializer=tf.constant_initializer(0.0)) + + output = tf.nn.conv2d(tensor, filter=spectral_norm(w, name=name + 'w'), strides=[1, 1, 1, 1], padding='VALID') + b + + return output + +def dense_RED_SN(tensor, name): + sn_w = None + + _, h, w, c = [i.value for i in tensor.get_shape()] + h = int(h) + w = int(w) + c = int(c) + + weight = tf.get_variable(name=name + '_w', shape=[h*w, 1, c, 1], initializer=layers.xavier_initializer()) + b = tf.get_variable(name=name + '_b', shape=[1, h, w, 1], initializer=tf.constant_initializer(0.0)) + + for it in range(h*w): + w_pixel = weight[it:it+1, :, :, :] + sn_w_pixel = spectral_norm(w_pixel, name=name + 'w_%d' %it) + + if it == 0: + sn_w = sn_w_pixel + else: + sn_w = tf.concat([sn_w, sn_w_pixel], axis=0) + + w_rs = tf.reshape(sn_w, [h, w, c, 1]) + w_rs_t = tf.transpose(w_rs, [3, 0, 1, 2]) + + output_RED = tf.reduce_sum(tensor*w_rs_t + b, axis=3, keepdims=True) + + return output_RED \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100755 index 0000000000000000000000000000000000000000..b1c5d8f9f47f62b403bea9ee3abd28939f5ee96b GIT binary patch literal 774 zcmaJX1Q^Re^h_4}*9 z8Uyyo(BT6QJaf!&$KL{%xIjU4M2kLQOJZ}w>VQU7P9zUVOHEDR%ADP$NFEEjL;f|L zJCtb1myeK~C*HBcO-iP>_X&BTcW((YJd)G#wuVjx^UCeW4k@d@SC6i5iB!~;cne*R*jY+BT?Z%S z%sH`|Sux9=NB4a!}wKlWny<1}_kNWXEgS8sBsyxCyjplqejpzLBd$g1TeS12$ zHuYx0R8setGPjeQ?)X11v$nOTgY7GRUw>(Pa!Mvq6aE!CE9Q8imp1ds|5MnVdR(x_ SS?KaiM#0X;$s}^|uk{BicW|u$ literal 0 HcmV?d00001