Add DCPv2 files

This commit is contained in:
deeppomf 2019-08-07 04:04:56 -04:00
parent 58c6b714a4
commit e9df99b9e8
9 changed files with 1123 additions and 6 deletions

38
config.py Executable file
View File

@ -0,0 +1,38 @@
import argparse
def str2floatarr(v):
if type(v) == str:
try:
return [float(v) for v in v.split(',')]
except:
raise argparse.ArgumentTypeError('Integers seperated by commas expected.')
else:
raise argparse.ArgumentTypeError('Integers seperated by commas expected.')
def str2bool(v):
if v.lower() in ('yes', 'true', 't', 'y', '1', True):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0', False):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def get_args():
parser = argparse.ArgumentParser(description='')
#Input output folders settings
parser.add_argument('--decensor_input_path', dest='decensor_input_path', default='./decensor_input/', help='input images with censored regions colored green to be decensored by decensor.py path')
parser.add_argument('--decensor_input_original_path', dest='decensor_input_original_path', default='./decensor_input_original/', help='input images with no modifications to be decensored by decensor.py path')
parser.add_argument('--decensor_output_path', dest='decensor_output_path', default='./decensor_output/', help='output images generated from running decensor.py path')
#Decensor settings
parser.add_argument('--mask_color_red', dest='mask_color_red', default=0, help='red channel of mask color in decensoring')
parser.add_argument('--mask_color_green', dest='mask_color_green', default=255, help='green channel of mask color in decensoring')
parser.add_argument('--mask_color_blue', dest='mask_color_blue', default=0, help='blue channel of mask color in decensoring')
parser.add_argument('--is_mosaic', dest='is_mosaic', default='False', type=str2bool, help='true if image has mosaic censoring, false otherwise')
args = parser.parse_args()
return args
if __name__ == '__main__':
get_args()

251
decensor.py Executable file
View File

@ -0,0 +1,251 @@
#!/usr/bin/env python3
try:
import numpy as np
from PIL import Image
import os
from copy import deepcopy
import config
import file
from model import InpaintNN
from libs.utils import *
except ImportError as e:
print("Error when importing libraries: ", e)
print("Some Python libraries are missing. You can install all requirements by running in the command line 'pip install -r requirements.txt' ")
exit(1)
class Decensor:
def __init__(self):
self.args = config.get_args()
self.is_mosaic = self.args.is_mosaic
self.mask_color = [self.args.mask_color_red/255.0, self.args.mask_color_green/255.0, self.args.mask_color_blue/255.0]
if not os.path.exists(self.args.decensor_output_path):
os.makedirs(self.args.decensor_output_path)
self.load_model()
def get_mask(self, colored):
mask = np.ones(colored.shape, np.uint8)
i, j = np.where(np.all(colored[0] == self.mask_color, axis=-1))
mask[0, i, j] = 0
return mask
def load_model(self):
self.model = InpaintNN(bar_model_name = "./models/bar/Train_775000.meta", bar_checkpoint_name = "./models/bar/", mosaic_model_name = "./models/mosaic/Train_290000.meta", mosaic_checkpoint_name = "./models/mosaic/", is_mosaic=self.is_mosaic)
def decensor_all_images_in_folder(self):
#load model once at beginning and reuse same model
#self.load_model()
color_dir = self.args.decensor_input_path
file_names = os.listdir(color_dir)
input_dir = self.args.decensor_input_path
output_dir = self.args.decensor_output_path
# Change False to True before release --> file.check_file( input_dir, output_dir, True)
file_names, self.files_removed = file.check_file( input_dir, output_dir, False)
#convert all images into np arrays and put them in a list
for file_name in file_names:
color_file_path = os.path.join(color_dir, file_name)
color_bn, color_ext = os.path.splitext(file_name)
if os.path.isfile(color_file_path) and color_ext.casefold() == ".png":
print("--------------------------------------------------------------------------")
print("Decensoring the image {}".format(color_file_path))
try :
colored_img = Image.open(color_file_path)
except:
print("Cannot identify image file (" +str(color_file_path)+")")
self.files_removed.append((color_file_path,3))
# incase of abnormal file format change (ex : text.txt -> text.png)
continue
#if we are doing a mosaic decensor
if self.is_mosaic:
#get the original file that hasn't been colored
ori_dir = self.args.decensor_input_original_path
#since the original image might not be a png, test multiple file formats
valid_formats = {".png", ".jpg", ".jpeg"}
for test_file_name in os.listdir(ori_dir):
test_bn, test_ext = os.path.splitext(test_file_name)
if (test_bn == color_bn) and (test_ext.casefold() in valid_formats):
ori_file_path = os.path.join(ori_dir, test_file_name)
ori_img = Image.open(ori_file_path)
# colored_img.show()
self.decensor_image(ori_img, colored_img, file_name)
break
else: #for...else, i.e if the loop finished without encountering break
print("Corresponding original, uncolored image not found in {}".format(color_file_path))
print("Check if it exists and is in the PNG or JPG format.")
else:
self.decensor_image(colored_img, colored_img, file_name)
else:
print("--------------------------------------------------------------------------")
print("Irregular file detected : "+str(color_file_path))
print("--------------------------------------------------------------------------")
if(self.files_removed is not None):
file.error_messages(None, self.files_removed)
print("\nDecensoring complete!")
#decensors one image at a time
#TODO: decensor all cropped parts of the same image in a batch (then i need input for colored an array of those images and make additional changes)
def decensor_image(self, ori, colored, file_name=None):
width, height = ori.size
#save the alpha channel if the image has an alpha channel
has_alpha = False
if (ori.mode == "RGBA"):
has_alpha = True
alpha_channel = np.asarray(ori)[:,:,3]
alpha_channel = np.expand_dims(alpha_channel, axis =-1)
ori = ori.convert('RGB')
ori_array = image_to_array(ori)
ori_array = np.expand_dims(ori_array, axis = 0)
if self.is_mosaic:
#if mosaic decensor, mask is empty
# mask = np.ones(ori_array.shape, np.uint8)
# print(mask.shape)
colored = colored.convert('RGB')
color_array = image_to_array(colored)
color_array = np.expand_dims(color_array, axis = 0)
mask = self.get_mask(color_array)
mask_reshaped = mask[0,:,:,:] * 255.0
mask_img = Image.fromarray(mask_reshaped.astype('uint8'))
# mask_img.show()
else:
mask = self.get_mask(ori_array)
#colored image is only used for finding the regions
regions = find_regions(colored.convert('RGB'), [v*255 for v in self.mask_color])
print("Found {region_count} censored regions in this image!".format(region_count = len(regions)))
if len(regions) == 0 and not self.is_mosaic:
print("No green regions detected! Make sure you're using exactly the right color.")
return
output_img_array = ori_array[0].copy()
for region_counter, region in enumerate(regions, 1):
bounding_box = expand_bounding(ori, region, expand_factor=1.5)
crop_img = ori.crop(bounding_box)
# crop_img.show()
#convert mask back to image
mask_reshaped = mask[0,:,:,:] * 255.0
mask_img = Image.fromarray(mask_reshaped.astype('uint8'))
#resize the cropped images
crop_img = crop_img.resize((256, 256))
crop_img_array = image_to_array(crop_img)
#resize the mask images
mask_img = mask_img.crop(bounding_box)
mask_img = mask_img.resize((256, 256))
# mask_img.show()
#convert mask_img back to array
mask_array = image_to_array(mask_img)
#the mask has been upscaled so there will be values not equal to 0 or 1
# mask_array[mask_array > 0] = 1
# crop_img_array[..., :-1][mask_array==0] = (0,0,0)
if not self.is_mosaic:
a, b = np.where(np.all(mask_array == 0, axis = -1))
# print(a,b)
# print(crop_img_array[a,b])
# print(crop_img_array[a,b,0])
# print(crop_img_array.shape)
# print(type(crop_img_array[0,0]))
crop_img_array[a,b,:] = 0.
temp = Image.fromarray((crop_img_array * 255.0).astype('uint8'))
# temp.show()
# if self.is_mosaic:
# a, b = np.where(np.all(mask_array == 0, axis = -1))
# print(a, b)
# coords = [coord for coord in zip(a,b) if ((coord[0] + coord[1]) % 2 == 0)]
# a,b = zip(*coords)
# mask_array[a,b] = 1
# mask_array = mask_array * 255.0
# img = Image.fromarray(mask_array.astype('uint8'))
# img.show()
# return
crop_img_array = np.expand_dims(crop_img_array, axis = 0)
mask_array = np.expand_dims(mask_array, axis = 0)
# print(np.amax(crop_img_array))
# print(np.amax(mask_array))
# # print(np.amax(masked))
# print(np.amin(crop_img_array))
# print(np.amin(mask_array))
# # print(np.amin(masked))
# print(mask_array)
crop_img_array = crop_img_array * 2.0 - 1
# mask_array = mask_array / 255.0
# Run predictions for this batch of images
pred_img_array = self.model.predict(crop_img_array, crop_img_array, mask_array)
pred_img_array = np.squeeze(pred_img_array, axis = 0)
pred_img_array = (255.0 * ((pred_img_array + 1.0) / 2.0)).astype(np.uint8)
#scale prediction image back to original size
bounding_width = bounding_box[2]-bounding_box[0]
bounding_height = bounding_box[3]-bounding_box[1]
#convert np array to image
# print(bounding_width,bounding_height)
# print(pred_img_array.shape)
pred_img = Image.fromarray(pred_img_array.astype('uint8'))
# pred_img.show()
pred_img = pred_img.resize((bounding_width, bounding_height), resample = Image.BICUBIC)
# pred_img.show()
pred_img_array = image_to_array(pred_img)
# print(pred_img_array.shape)
pred_img_array = np.expand_dims(pred_img_array, axis = 0)
# copy the decensored regions into the output image
for i in range(len(ori_array)):
for col in range(bounding_width):
for row in range(bounding_height):
bounding_width_index = col + bounding_box[0]
bounding_height_index = row + bounding_box[1]
if (bounding_width_index, bounding_height_index) in region:
output_img_array[bounding_height_index][bounding_width_index] = pred_img_array[i,:,:,:][row][col]
print("{region_counter} out of {region_count} regions decensored.".format(region_counter=region_counter, region_count=len(regions)))
output_img_array = output_img_array * 255.0
#restore the alpha channel if the image had one
if has_alpha:
output_img_array = np.concatenate((output_img_array, alpha_channel), axis = 2)
output_img = Image.fromarray(output_img_array.astype('uint8'))
if file_name != None:
#save the decensored image
#file_name, _ = os.path.splitext(file_name)
save_path = os.path.join(self.args.decensor_output_path, file_name)
output_img.save(save_path)
print("Decensored image saved to {save_path}!".format(save_path=save_path))
return
else:
print("Decensored image. Returning it.")
return output_img
if __name__ == '__main__':
decensor = Decensor()
decensor.decensor_all_images_in_folder()

56
file.py Executable file
View File

@ -0,0 +1,56 @@
import os
def check_file(input_dir, output_dir, Release_version = True):
file_list = []
output_file_list = []
files_removed = []
input_dir = os.listdir(input_dir)
output_dir = os.listdir(output_dir)
for file_in in input_dir:
if not file_in.startswith('.'):
file_list.append(file_in)
if(Release_version is True):
print("\nChecking valid files...")
for file_out in output_dir:
if file_out.lower().endswith('.png'):
output_file_list.append(file_out)
# solving https://github.com/deeppomf/DeepCreamPy/issues/25
# appending in list with reason as tuple (file name, reason)
for lhs in file_list:
lhs.lower()
if not lhs.lower().endswith('.png') :
files_removed.append((lhs, 0))
for rhs in output_file_list:
if(lhs == rhs):
files_removed.append((lhs, 1))
# seperated detecting same file names and deleting file name list
# just in case of index_error and show list of files which will not go though
# decensor process
print("\n These files will not be decensored for following reason \n")
error_messages(file_list, files_removed)
input("\nPress anything to continue...")
print("\n\n")
return file_list, files_removed
def error_messages(file_list, files_removed):
if files_removed is None:
return
for remove_this,reason in files_removed:
if(file_list is not None):
file_list.remove(remove_this)
if reason == 0:
print(" REMOVED : (" + str(remove_this) +") is not PNG file format")
elif reason == 1:
print(" REMOVED : (" + str(remove_this) +") already exists")
elif reason == 2:
print(" REMOVED : (" + str(remove_this) +") file unreadable")

135
libs/utils.py Executable file
View File

@ -0,0 +1,135 @@
import numpy as np
from PIL import Image, ImageDraw
#convert PIL image to numpy array
def image_to_array(image):
array = np.asarray(image)
return np.array(array / 255.0)
#find strongly connected components with the mask color
def find_regions(image, mask_color):
pixel = image.load()
neighbors = dict()
width, height = image.size
for x in range(width):
for y in range(height):
if is_right_color(pixel[x,y], *mask_color):
neighbors[x, y] = {(x,y)}
for x, y in neighbors:
candidates = (x + 1, y), (x, y + 1)
for candidate in candidates:
if candidate in neighbors:
neighbors[x, y].add(candidate)
neighbors[candidate].add((x, y))
closed_list = set()
def connected_component(pixel):
region = set()
open_list = {pixel}
while open_list:
pixel = open_list.pop()
closed_list.add(pixel)
open_list |= neighbors[pixel] - closed_list
region.add(pixel)
return region
regions = []
for pixel in neighbors:
if pixel not in closed_list:
regions.append(connected_component(pixel))
regions.sort(key = len, reverse = True)
return regions
# risk of box being bigger than the image
def expand_bounding(img, region, expand_factor=1.5, min_size = 256):
#expand bounding box to capture more context
x, y = zip(*region)
min_x, min_y, max_x, max_y = min(x), min(y), max(x), max(y)
width, height = img.size
width_center = width//2
height_center = height//2
bb_width = max_x - min_x
bb_height = max_y - min_y
x_center = (min_x + max_x)//2
y_center = (min_y + max_y)//2
current_size = max(bb_width, bb_height)
current_size = int(current_size * expand_factor)
max_size = min(width, height)
if current_size > max_size:
current_size = max_size
elif current_size < min_size:
current_size = min_size
x1 = x_center - current_size//2
x2 = x_center + current_size//2
y1 = y_center - current_size//2
y2 = y_center + current_size//2
x1_square = x1
y1_square = y1
x2_square = x2
y2_square = y2
#move bounding boxes that are partially outside of the image inside the image
if (y1_square < 0 or y2_square > (height - 1)) and (x1_square < 0 or x2_square > (width - 1)):
#conservative square region
if x1_square < 0 and y1_square < 0:
x1_square = 0
y1_square = 0
x2_square = current_size
y2_square = current_size
elif x2_square > (width - 1) and y1_square < 0:
x1_square = width - current_size - 1
y1_square = 0
x2_square = width - 1
y2_square = current_size
elif x1_square < 0 and y2_square > (height - 1):
x1_square = 0
y1_square = height - current_size - 1
x2_square = current_size
y2_square = height - 1
elif x2_square > (width - 1) and y2_square > (height - 1):
x1_square = width - current_size - 1
y1_square = height - current_size - 1
x2_square = width - 1
y2_square = height - 1
else:
x1_square = x1
y1_square = y1
x2_square = x2
y2_square = y2
else:
if x1_square < 0:
difference = x1_square
x1_square -= difference
x2_square -= difference
if x2_square > (width - 1):
difference = x2_square - width + 1
x1_square -= difference
x2_square -= difference
if y1_square < 0:
difference = y1_square
y1_square -= difference
y2_square -= difference
if y2_square > (height - 1):
difference = y2_square - height + 1
y1_square -= difference
y2_square -= difference
# if y1_square < 0 or y2_square > (height - 1):
#if bounding box goes outside of the image for some reason, set bounds to original, unexpanded values
#print(width, height)
if x2_square > width or y2_square > height:
print("bounding box out of bounds!")
print(x1_square, y1_square, x2_square, y2_square)
x1_square, y1_square, x2_square, y2_square = min_x, min_y, max_x, max_y
return x1_square, y1_square, x2_square, y2_square
def is_right_color(pixel, r2, g2, b2):
r1, g1, b1 = pixel
return r1 == r2 and g1 == g2 and b1 == b2
if __name__ == '__main__':
image = Image.open('')
no_alpha_image = image.convert('RGB')
draw = ImageDraw.Draw(no_alpha_image)
for region in find_regions(no_alpha_image, [0,255,0]):
draw.rectangle(expand_bounding(no_alpha_image, region), outline=(0, 255, 0))
no_alpha_image.show()

105
model.py Executable file
View File

@ -0,0 +1,105 @@
import tensorflow as tf
import os
import numpy as np
import module as mm
#suppress tensorflow deprecation warnings
tf.logging.set_verbosity(tf.logging.ERROR)
class InpaintNN():
def __init__(self, input_height=256, input_width=256, batch_size = 1, bar_model_name=None, bar_checkpoint_name=None, mosaic_model_name=None, mosaic_checkpoint_name = None, is_mosaic=False):
self.bar_model_name = bar_model_name
self.bar_checkpoint_name = bar_checkpoint_name
self.mosaic_model_name = mosaic_model_name
self.mosaic_checkpoint_name = mosaic_checkpoint_name
self.is_mosaic = is_mosaic
self.input_height = input_height
self.input_width = input_width
self.batch_size = batch_size
self.build_model()
def build_model(self):
# ------- variables
self.X = tf.placeholder(tf.float32, [self.batch_size, self.input_height, self.input_width, 3])
self.Y = tf.placeholder(tf.float32, [self.batch_size, self.input_height, self.input_width, 3])
self.MASK = tf.placeholder(tf.float32, [self.batch_size, self.input_height, self.input_width, 3])
IT = tf.placeholder(tf.float32)
# ------- structure
input = tf.concat([self.X, self.MASK], 3)
vec_en = mm.encoder(input, reuse=False, name='G_en')
vec_con = mm.contextual_block(vec_en, vec_en, self.MASK, 3, 50.0, 'CB1', stride=1)
I_co = mm.decoder(vec_en, self.input_height, self.input_height, reuse=False, name='G_de')
I_ge = mm.decoder(vec_con, self.input_height, self.input_height, reuse=True, name='G_de')
self.image_result = I_ge * (1-self.MASK) + self.Y*self.MASK
D_real_red = mm.discriminator_red(self.Y, reuse=False, name='disc_red')
D_fake_red = mm.discriminator_red(self.image_result, reuse=True, name='disc_red')
# ------- Loss
Loss_D_red = tf.reduce_mean(tf.nn.relu(1+D_fake_red)) + tf.reduce_mean(tf.nn.relu(1-D_real_red))
Loss_D = Loss_D_red
Loss_gan_red = -tf.reduce_mean(D_fake_red)
Loss_gan = Loss_gan_red
Loss_s_re = tf.reduce_mean(tf.abs(I_ge - self.Y))
Loss_hat = tf.reduce_mean(tf.abs(I_co - self.Y))
A = tf.image.rgb_to_yuv((self.image_result+1)/2.0)
A_Y = tf.to_int32(A[:, :, :, 0:1]*255.0)
B = tf.image.rgb_to_yuv((self.Y+1)/2.0)
B_Y = tf.to_int32(B[:, :, :, 0:1]*255.0)
ssim = tf.reduce_mean(tf.image.ssim(A_Y, B_Y, 255.0))
alpha = IT/1000000
Loss_G = 0.1*Loss_gan + 10*Loss_s_re + 5*(1-alpha) * Loss_hat
# --------------------- variable & optimizer
var_D = [v for v in tf.global_variables() if v.name.startswith('disc_red')]
var_G = [v for v in tf.global_variables() if v.name.startswith('G_en') or v.name.startswith('G_de') or v.name.startswith('CB1')]
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimize_D = tf.train.AdamOptimizer(learning_rate=0.0004, beta1=0.5, beta2=0.9).minimize(Loss_D, var_list=var_D)
optimize_G = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.5, beta2=0.9).minimize(Loss_G, var_list=var_G)
config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.4
# config.gpu_options.allow_growth = False
self.sess = tf.Session(config=config)
init = tf.global_variables_initializer()
self.sess.run(init)
saver = tf.train.Saver()
if self.is_mosaic:
Restore = tf.train.import_meta_graph(self.mosaic_model_name)
Restore.restore(self.sess, tf.train.latest_checkpoint(self.mosaic_checkpoint_name))
else:
Restore = tf.train.import_meta_graph(self.bar_model_name)
Restore.restore(self.sess, tf.train.latest_checkpoint(self.bar_checkpoint_name))
def predict(self, censored, unused, mask):
img_sample = self.sess.run(self.image_result, feed_dict={self.X: censored, self.Y: unused, self.MASK: mask})
return img_sample

View File

@ -1,6 +0,0 @@
[InternetShortcut]
URL=https://drive.google.com/file/d/1Nzh2KAUO_rYPu2vDM_YP1EgbWtjI2hRX/view
IDList=
HotKey=0
IconFile=C:\Users\larry\AppData\Local\Mozilla\Firefox\Profiles\znm4q5yy.default\shortcutCache\pFyHNsPIdcXl_5JGSJ4Z+A==.ico
IconIndex=0

305
module.py Executable file
View File

@ -0,0 +1,305 @@
from __future__ import division
from ops import *
import tensorflow.contrib.layers as layers
import math
def conv_nn(input, dims1, dims2, size1, size2, k_size = 3):
pp = tf.pad(input, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
L1 = layers.conv2d(pp, dims1, [k_size, k_size], stride=[1, 1], padding='VALID', activation_fn=None)
L1 = tf.nn.elu(L1)
pp = tf.pad(L1, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
L2 = layers.conv2d(pp, dims2, [k_size, k_size], stride=[1, 1], padding='VALID', activation_fn=None)
L2 = tf.nn.elu(L2)
L2 = tf.image.resize_nearest_neighbor(L2, (size1, size2))
return L2
def encoder(input, reuse, name):
with tf.variable_scope(name):
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
CL1 = layers.conv2d(p, 32, [5, 5], stride=[1, 1], padding='VALID', activation_fn=None)
CL1 = tf.nn.elu(CL1) # 256 256 32
p = tf.pad(CL1, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
CL2 = layers.conv2d(p, 64, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None)
CL2 = tf.nn.elu(CL2) # 128 128 64
p = tf.pad(CL2, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
CL3 = layers.conv2d(p, 64, [3, 3], stride=[1, 1], padding='VALID', activation_fn=None)
CL3 = tf.nn.elu(CL3) # 128 128 64
p = tf.pad(CL3, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
CL4 = layers.conv2d(p, 128, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None)
CL4 = tf.nn.elu(CL4) # 64 64 128
p = tf.pad(CL4, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
CL5 = layers.conv2d(p, 128, [3, 3], stride=[1, 1], padding='VALID', activation_fn=None)
CL5 = tf.nn.elu(CL5) # 64 64 128
p = tf.pad(CL5, [[0, 0], [1, 1], [1, 1], [0, 0]], "REFLECT")
CL6 = layers.conv2d(p, 256, [3, 3], stride=[2, 2], padding='VALID', activation_fn=None)
CL6 = tf.nn.elu(CL6) # 32 32 128
p = tf.pad(CL6, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
DCL1 = layers.conv2d(p, 256, [3, 3], rate=2, stride=[1, 1], padding='VALID', activation_fn=None)
DCL1 = tf.nn.elu(DCL1)
p = tf.pad(DCL1, [[0, 0], [4, 4], [4, 4], [0, 0]], "REFLECT")
DCL2 = layers.conv2d(p, 256, [3, 3], rate=4, stride=[1, 1], padding='VALID', activation_fn=None)
DCL2 = tf.nn.elu(DCL2)
p = tf.pad(DCL2, [[0, 0], [8, 8], [8, 8], [0, 0]], "REFLECT")
DCL3 = layers.conv2d(p, 256, [3, 3], rate=8, stride=[1, 1], padding='VALID', activation_fn=None)
DCL3 = tf.nn.elu(DCL3)
p = tf.pad(DCL3, [[0, 0], [16, 16], [16, 16], [0, 0]], "REFLECT")
DCL4 = layers.conv2d(p, 256, [3, 3], rate=16, stride=[1, 1], padding='VALID', activation_fn=None)
DCL4 = tf.nn.elu(DCL4) # 32 32 128
return DCL4
def decoder(input, size1, size2, reuse, name):
with tf.variable_scope(name):
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
DL1 = conv_nn(input, 128, 128, int(size1/4), int(size2/4)) # 64 64 128
DL2 = conv_nn(DL1, 64, 64, int(size1/2), int(size2/2)) # 128 128 64
DL3 = conv_nn(DL2, 32, 32, int(size1), int(size2))
DL4 = conv_nn(DL3, 16, 16, int(size1), int(size2))
LL2 = layers.conv2d(DL4, 3, [3, 3], stride=[1, 1], padding='SAME', activation_fn=None) # 256 256 3
LL2 = tf.clip_by_value(LL2, -1.0, 1.0)
return LL2
def discriminator_G(input, reuse, name):
with tf.variable_scope(name):
# image is 256 x 256 x input_c_dim
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L1 = layers.conv2d(p, 64, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L1 = instance_norm(L1, 'di1')
L1 = tf.nn.leaky_relu(L1)
p = tf.pad(L1, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L2 = layers.conv2d(p, 128, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L2 = instance_norm(L2, 'di2')
L2 = tf.nn.leaky_relu(L2)
p = tf.pad(L2, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L3 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L3 = instance_norm(L3, 'di3')
L3 = tf.nn.leaky_relu(L3)
p = tf.pad(L3, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L4 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L4 = instance_norm(L4, 'di4')
L4 = tf.nn.leaky_relu(L4)
L4 = layers.flatten(L4)
L5 = tf.layers.dense(L4, 1)
return L5
def discriminator_L(input, reuse, name):
with tf.variable_scope(name):
# image is 256 x 256 x input_c_dim
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
p = tf.pad(input, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L1 = layers.conv2d(p, 64, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L1 = instance_norm(L1, 'di1l')
L1 = tf.nn.leaky_relu(L1) # 32 32 64
p = tf.pad(L1, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L2 = layers.conv2d(p, 128, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L2 = instance_norm(L2, 'di2l')
L2 = tf.nn.leaky_relu(L2) # 16 16 128
p = tf.pad(L2, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L3 = layers.conv2d(p, 256, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L3 = instance_norm(L3, 'di3l')
L3 = tf.nn.leaky_relu(L3) # 8 8 256
p = tf.pad(L3, [[0, 0], [2, 2], [2, 2], [0, 0]], "REFLECT")
L4 = layers.conv2d(p, 512, [5, 5], stride=2, padding='VALID', activation_fn=None)
#L4 = instance_norm(L4, 'di4l')
L4 = tf.nn.leaky_relu(L4) # 4 4 512
L4 = layers.flatten(L4)
L5 = tf.layers.dense(L4, 1)
return L5
def discriminator_red(input, reuse, name):
with tf.variable_scope(name):
# image is 256 x 256 x input_c_dim
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
L1 = convolution_SN(input, 64, 5, 2, 'l1')
# L1 = instance_norm(L1, 'di1')
L1 = tf.nn.leaky_relu(L1)
L2 = convolution_SN(L1, 128, 5, 2, 'l2')
# L2 = instance_norm(L2, 'di2')
L2 = tf.nn.leaky_relu(L2)
L3 = convolution_SN(L2, 256, 5, 2, 'l3')
# L3 = instance_norm(L3, 'di3')
L3 = tf.nn.leaky_relu(L3)
L4 = convolution_SN(L3, 256, 5, 2, 'l4')
# L4 = instance_norm(L4, 'di4')
L4 = tf.nn.leaky_relu(L4)
L5 = convolution_SN(L4, 256, 5, 2, 'l5')
# L5 = instance_norm(L5, 'di5')
L5 = tf.nn.leaky_relu(L5)
L6 = convolution_SN(L5, 512, 5, 2, 'l6')
# L6 = instance_norm(L6, 'di6')
L6 = tf.nn.leaky_relu(L6)
L7 = dense_RED_SN(L6, 'l7')
return L7
def contextual_block(bg_in, fg_in, mask, k_size, lamda, name, stride=1):
with tf.variable_scope(name):
b, h, w, dims = [i.value for i in bg_in.get_shape()]
temp = tf.image.resize_nearest_neighbor(mask, (h, w))
temp = tf.expand_dims(temp[:, :, :, 0], 3) # b 128 128 1
mask_r = tf.tile(temp, [1, 1, 1, dims]) # b 128 128 128
bg = bg_in * mask_r
kn = int((k_size - 1) / 2)
c = 0
for p in range(kn, h - kn, stride):
for q in range(kn, w - kn, stride):
c += 1
patch1 = tf.extract_image_patches(bg, [1, k_size, k_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID')
patch1 = tf.reshape(patch1, (b, 1, c, k_size*k_size*dims))
patch1 = tf.reshape(patch1, (b, 1, 1, c, k_size * k_size * dims))
patch1 = tf.transpose(patch1, [0, 1, 2, 4, 3])
patch2 = tf.extract_image_patches(fg_in, [1,k_size,k_size,1], [1,1,1,1], [1,1,1,1], 'SAME')
ACL = []
for ib in range(b):
k1 = patch1[ib, :, :, :, :]
k1d = tf.reduce_sum(tf.square(k1), axis=2)
k2 = tf.reshape(k1, (k_size, k_size, dims, c))
ww = patch2[ib, :, :, :]
wwd = tf.reduce_sum(tf.square(ww), axis=2, keepdims=True)
ft = tf.expand_dims(ww, 0)
CS = tf.nn.conv2d(ft, k1, strides=[1, 1, 1, 1], padding='SAME')
tt = k1d + wwd
DS1 = tf.expand_dims(tt, 0) - 2 * CS
DS2 = (DS1 - tf.reduce_mean(DS1, 3, True)) / reduce_std(DS1, 3, True)
DS2 = -1 * tf.nn.tanh(DS2)
CA = softmax(lamda * DS2)
ACLt = tf.nn.conv2d_transpose(CA, k2, output_shape=[1, h, w, dims], strides=[1, 1, 1, 1], padding='SAME')
ACLt = ACLt / (k_size ** 2)
if ib == 0:
ACL = ACLt
else:
ACL = tf.concat((ACL, ACLt), 0)
ACL = bg + ACL * (1.0 - mask_r)
con1 = tf.concat([bg_in, ACL], 3)
ACL2 = layers.conv2d(con1, dims, [1, 1], stride=[1, 1], padding='VALID', activation_fn=None, scope='ML')
ACL2 = tf.nn.elu(ACL2)
return ACL2
def contextual_block_cs(bg_in, fg_in, mask, k_size, lamda, name, stride=1):
with tf.variable_scope(name):
b, h, w, dims = [i.value for i in bg_in.get_shape()]
temp = tf.image.resize_nearest_neighbor(mask, (h, w))
temp = tf.expand_dims(temp[:, :, :, 0], 3) # b 128 128 1
mask_r = tf.tile(temp, [1, 1, 1, dims]) # b 128 128 128
bg = bg_in * mask_r
kn = int((k_size - 1) / 2)
c = 0
for p in range(kn, h - kn, stride):
for q in range(kn, w - kn, stride):
c += 1
patch1 = tf.extract_image_patches(bg, [1, k_size, k_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID')
patch1 = tf.reshape(patch1, (b, 1, c, k_size*k_size*dims))
patch1 = tf.reshape(patch1, (b, 1, 1, c, k_size * k_size * dims))
patch1 = tf.transpose(patch1, [0, 1, 2, 4, 3])
patch2 = tf.extract_image_patches(fg_in, [1,k_size,k_size,1], [1,1,1,1], [1,1,1,1], 'SAME')
ACL = []
fuse_weight = tf.reshape(tf.eye(3), [3, 3, 1, 1])
for ib in range(b):
k1 = patch1[ib, :, :, :, :]
k2 = k1 / tf.sqrt(tf.reduce_sum(tf.square(k1), axis=2, keepdims=True) + 1e-16)
k1 = tf.reshape(k1, (k_size, k_size, dims, c))
ww = patch2[ib, :, :, :]
ft = ww / tf.sqrt(tf.reduce_sum(tf.square(ww), axis=2, keepdims=True) + 1e-16)
ft = tf.expand_dims(ft, 0)
CA = tf.nn.conv2d(ft, k2, strides=[1, 1, 1, 1], padding='SAME')
CA = tf.reshape(CA, [1, h * w, c, 1])
CA = tf.nn.conv2d(CA, fuse_weight, strides=[1, 1, 1, 1], padding='SAME')
CA = tf.reshape(CA, [1, h, w, int(math.sqrt(c)), int(math.sqrt(c))])
CA = tf.transpose(CA, [0, 2, 1, 4, 3])
CA = tf.reshape(CA, [1, h * w, c, 1])
CA = tf.nn.conv2d(CA, fuse_weight, strides=[1, 1, 1, 1], padding='SAME')
CA = tf.reshape(CA, [1, h, w, int(math.sqrt(c)), int(math.sqrt(c))])
CA = tf.transpose(CA, [0, 2, 1, 4, 3])
CA = tf.reshape(CA, [1, h, w, c])
CA2 = softmax(lamda * CA)
ACLt = tf.nn.conv2d_transpose(CA2, k1, output_shape=[1, h, w, dims], strides=[1, 1, 1, 1], padding='SAME')
ACLt = ACLt / (k_size ** 2)
if ib == 0:
ACL = ACLt
else:
ACL = tf.concat((ACL, ACLt), 0)
ACL2 = bg + ACL * (1.0 - mask_r)
return ACL2

233
ops.py Executable file
View File

@ -0,0 +1,233 @@
import tensorflow as tf
import tensorflow.contrib.layers as layers
import numpy as np
import random as rr
import math as mt
import cv2
from scipy import misc
def instance_norm(input, name="instance_norm"):
with tf.variable_scope(name):
depth = input.get_shape()[3]
scale = tf.get_variable("scale", [depth], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32))
offset = tf.get_variable("offset", [depth], initializer=tf.constant_initializer(0.0))
mean, variance = tf.nn.moments(input, axes=[1,2], keep_dims=True)
epsilon = 1e-5
inv = tf.rsqrt(variance + epsilon)
normalized = (input-mean)*inv
return scale*normalized + offset
def make_sq_mask(size, m_size, batch_size):
start_x = rr.randint(0, size - m_size-1)
start_y = rr.randint(0, size - m_size-1)
temp = np.ones([batch_size, size, size, 3])
temp[:, start_x:start_x + m_size, start_y:start_y + m_size, 0:3] *= 0
return temp, start_x, start_y
def softmax(input):
k = tf.exp(input - 3)
k = tf.reduce_sum(k, 3, True)
# k = k - num * tf.ones_like(k)
ouput = tf.exp(input - 3) / k
return ouput
def reduce_var(x, axis=None, keepdims=False):
"""Variance of a tensor, alongside the specified axis.
# Arguments
x: A tensor or variable.
axis: An integer, the axis to compute the variance.
keepdims: A boolean, whether to keep the dimensions or not.
If `keepdims` is `False`, the rank of the tensor is reduced
by 1. If `keepdims` is `True`,
the reduced dimension is retained with length 1.
# Returns
A tensor with the variance of elements of `x`.
"""
m = tf.reduce_mean(x, axis=axis, keepdims=True)
devs_squared = tf.square(x - m)
return tf.reduce_mean(devs_squared, axis=axis, keepdims=keepdims)
def reduce_std(x, axis=None, keepdims=False):
"""Standard deviation of a tensor, alongside the specified axis.
# Arguments
x: A tensor or variable.
axis: An integer, the axis to compute the standard deviation.
keepdims: A boolean, whether to keep the dimensions or not.
If `keepdims` is `False`, the rank of the tensor is reduced
by 1. If `keepdims` is `True`,
the reduced dimension is retained with length 1.
# Returns
A tensor with the standard deviation of elements of `x`.
"""
return tf.sqrt(reduce_var(x, axis=axis, keepdims=keepdims))
def l2_norm(v, eps=1e-12):
return v / (tf.reduce_sum(v ** 2) ** 0.5 + eps)
def ff_mask(size, b_zise, maxLen, maxWid, maxAng, maxNum, maxVer, minLen = 20, minWid = 15, minVer = 5):
mask = np.ones((b_zise, size, size, 3))
num = rr.randint(3, maxNum)
for i in range(num):
startX = rr.randint(0, size)
startY = rr.randint(0, size)
numVer = rr.randint(minVer, maxVer)
width = rr.randint(minWid, maxWid)
for j in range(numVer):
angle = rr.uniform(-maxAng, maxAng)
length = rr.randint(minLen, maxLen)
endX = min(size-1, max(0, int(startX + length * mt.sin(angle))))
endY = min(size-1, max(0, int(startY + length * mt.cos(angle))))
if endX >= startX:
lowx = startX
highx = endX
else:
lowx = endX
highx = startX
if endY >= startY:
lowy = startY
highy = endY
else:
lowy = endY
highy = startY
if abs(startY-endY) + abs(startX - endX) != 0:
wlx = max(0, lowx-int(abs(width * mt.cos(angle))))
whx = min(size - 1, highx+1 + int(abs(width * mt.cos(angle))))
wly = max(0, lowy - int(abs(width * mt.sin(angle))))
why = min(size - 1, highy+1 + int(abs(width * mt.sin(angle))))
for x in range(wlx, whx):
for y in range(wly, why):
d = abs((endY-startY)*x - (endX -startX)*y - endY*startX + startY*endX) / mt.sqrt((startY-endY)**2 + (startX -endX)**2)
if d <= width:
mask[:, x, y, :] = 0
wlx = max(0, lowx-width)
whx = min(size - 1, highx+width+1)
wly = max(0, lowy - width)
why = min(size - 1, highy + width + 1)
for x2 in range(wlx, whx):
for y2 in range(wly, why):
d1 = (startX - x2) ** 2 + (startY - y2) ** 2
d2 = (endX - x2) ** 2 + (endY - y2) ** 2
if np.sqrt(d1) <= width:
mask[:, x2, y2, :] = 0
if np.sqrt(d2) <= width:
mask[:, x2, y2, :] = 0
startX = endX
startY = endY
return mask
def ff_mask_batch(size, b_size, maxLen, maxWid, maxAng, maxNum, maxVer, minLen = 20, minWid = 15, minVer = 5):
mask = None
temp = ff_mask(size, 1, maxLen, maxWid, maxAng, maxNum, maxVer, minLen=minLen, minWid=minWid, minVer=minVer)
temp = temp[0]
for ib in range(b_size):
if ib == 0:
mask = np.expand_dims(temp, 0)
else:
mask = np.concatenate((mask, np.expand_dims(temp, 0)), 0)
temp = cv2.rotate(temp, cv2.ROTATE_90_CLOCKWISE)
if ib == 3:
temp = cv2.flip(temp, 0)
return mask
def spectral_norm(w, name, iteration=1):
w_shape = w.shape.as_list()
w = tf.reshape(w, [-1, w_shape[-1]])
u = tf.get_variable(name+"u", [1, w_shape[-1]], initializer=tf.truncated_normal_initializer(), trainable=False)
u_hat = u
v_hat = None
for i in range(iteration):
"""
power iteration
Usually iteration = 1 will be enough
"""
v_ = tf.matmul(u_hat, tf.transpose(w))
v_hat = l2_norm(v_)
u_ = tf.matmul(v_hat, w)
u_hat = l2_norm(u_)
sigma = tf.matmul(tf.matmul(v_hat, w), tf.transpose(u_hat))
w_norm = w / sigma
with tf.control_dependencies([u.assign(u_hat)]):
w_norm = tf.reshape(w_norm, w_shape)
return w_norm
def convolution_SN(tensor, output_dim, kernel_size, stride, name):
_, h, w, c = [i.value for i in tensor.get_shape()]
w = tf.get_variable(name=name + 'w', shape=[kernel_size, kernel_size, c, output_dim], initializer=layers.xavier_initializer())
b = tf.get_variable(name=name + 'b', shape=[output_dim], initializer=tf.constant_initializer(0.0))
output = tf.nn.conv2d(tensor, filter=spectral_norm(w, name=name + 'w'), strides=[1, stride, stride, 1], padding='SAME') + b
return output
def dense_SN(tensor, output_dim, name):
_, h, w, c = [i.value for i in tensor.get_shape()]
w = tf.get_variable(name=name + 'w', shape=[h, w, c, output_dim], initializer=layers.xavier_initializer())
b = tf.get_variable(name=name + 'b', shape=[output_dim], initializer=tf.constant_initializer(0.0))
output = tf.nn.conv2d(tensor, filter=spectral_norm(w, name=name + 'w'), strides=[1, 1, 1, 1], padding='VALID') + b
return output
def dense_RED_SN(tensor, name):
sn_w = None
_, h, w, c = [i.value for i in tensor.get_shape()]
h = int(h)
w = int(w)
c = int(c)
weight = tf.get_variable(name=name + '_w', shape=[h*w, 1, c, 1], initializer=layers.xavier_initializer())
b = tf.get_variable(name=name + '_b', shape=[1, h, w, 1], initializer=tf.constant_initializer(0.0))
for it in range(h*w):
w_pixel = weight[it:it+1, :, :, :]
sn_w_pixel = spectral_norm(w_pixel, name=name + 'w_%d' %it)
if it == 0:
sn_w = sn_w_pixel
else:
sn_w = tf.concat([sn_w, sn_w_pixel], axis=0)
w_rs = tf.reshape(sn_w, [h, w, c, 1])
w_rs_t = tf.transpose(w_rs, [3, 0, 1, 2])
output_RED = tf.reduce_sum(tensor*w_rs_t + b, axis=3, keepdims=True)
return output_RED

BIN
requirements.txt Executable file

Binary file not shown.