model version 0.1

yunjey
Commit 3a5e20df92eac09ce9a3158789021ac32f06b151 3a5e20df 1 parent 982b4b52
Showing 4 changed files with 412 additions and 0 deletions
model.py
ops.py
solver.py
train.py
--- a/model.py 0 → 100644
View file @3a5e20d
+++ b/model.py 0 → 100644
View file @3a5e20d
+import tensorflow as tf
+from ops import * 
+
+class DTN(object):
+    """Domain Transfer Network for unsupervised cross-domain image generation
+    
+    Construct discriminator and generator to prepare for training.
+    """
+    
+    def __init__(self, batch_size=100, learning_rate=0.0002, image_size=32, output_size=32, 
+                 dim_color=3, dim_fout=100, dim_df=64, dim_gf=64, dim_ff=64):
+        """
+        Args:
+            learning_rate: (optional) learning rate for discriminator and generator
+            image_size: (optional) spatial size of input image for discriminator
+            output_size: (optional) spatial size of image generated by generator
+            dim_color: (optional) dimension of image color; default is 3 for rgb
+            dim_fout: (optional) dimension of z (random input vector for generator)
+            dim_df: (optional) dimension of discriminator's filter in first convolution layer
+            dim_gf: (optional) dimension of generator's filter in last convolution layer
+            dim_ff: (optional) dimension of function f's filter in first convolution layer
+        """
+        # hyper parameters
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.image_size = image_size
+        self.output_size = output_size
+        self.dim_color = dim_color
+        self.dim_fout = dim_fout
+        self.dim_df = dim_df
+        self.dim_gf = dim_gf
+        self.dim_ff = dim_ff
+        
+        # placeholder
+        self.images = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, dim_color], name='images')
+        #self.z = tf.placeholder(tf.float32, shape=[None, dim_z], name='input_for_generator')
+        
+        # batch normalization layer for discriminator, generator and funtion f
+        self.d_bn1 = batch_norm(name='d_bn1')
+        self.d_bn2 = batch_norm(name='d_bn2')
+        self.d_bn3 = batch_norm(name='d_bn3')
+        
+        self.g_bn1 = batch_norm(name='g_bn1')
+        self.g_bn2 = batch_norm(name='g_bn2')
+        self.g_bn3 = batch_norm(name='g_bn3')
+        self.g_bn4 = batch_norm(name='g_bn4')
+        
+        self.f_bn1 = batch_norm(name='f_bn1')
+        self.f_bn2 = batch_norm(name='f_bn2')
+        self.f_bn3 = batch_norm(name='f_bn3')
+        self.f_bn4 = batch_norm(name='f_bn4') 
+        
+        
+        
+    def function_f(self, images, reuse=False):
+        """f consistancy
+        
+        Args: 
+            images: images for domain S and T, of shape (batch_size, image_size, image_size, dim_color)
+            
+        Returns:
+            out: output vectors, of shape (batch_size, dim_f_out)
+        """
+        with tf.variable_scope('function_f', reuse=reuse):
+            h1 = lrelu(conv2d(images, self.dim_ff, name='f_h1'))                  # (batch_size, 16, 16, 64)
+            h2 = lrelu(self.d_bn1(conv2d(h1, self.dim_ff*2, name='f_h2')))        # (batch_size, 8, 8 128)
+            h3 = lrelu(self.d_bn2(conv2d(h2, self.dim_ff*4, name='f_h3')))        # (batch_size, 4, 4, 256)
+            h4 = lrelu(self.d_bn3(conv2d(h3, self.dim_ff*8, name='f_h4')))        # (batch_size, 2, 2, 512)
+
+            h4 = tf.reshape(h4, [self.batch_size,-1])
+            out = linear(h4, self.dim_fout, name='f_out') 
+        
+        return tf.nn.tanh(out)
+        
+        
+    def generator(self, z, reuse=False):
+        """Generator: Deconvolutional neural network with relu activations.
+        
+        Last deconv layer does not use batch normalization.
+        
+        Args:
+            z: random input vectors, of shape (batch_size, dim_z)
+            
+        Returns:
+            out: generated images, of shape (batch_size, image_size, image_size, dim_color)
+        """
+        if reuse:
+            train = False
+        else:
+            train = True
+        
+        with tf.variable_scope('generator', reuse=reuse):
+            
+            # spatial size for convolution
+            s = self.output_size
+            s2, s4, s8, s16 = s/2, s/4, s/8, s/16     # 32, 16, 8, 4
+            
+            # project and reshape z 
+            h1= linear(z, s16*s16*self.dim_gf*8, name='g_h1')     # (batch_size, 2*2*512)
+            h1 = tf.reshape(h1, [-1, s16, s16, self.dim_gf*8])    # (batch_size, 2, 2, 512) 
+            h1 = relu(self.g_bn1(h1, train=train))
+            
+            h2 = deconv2d(h1, [self.batch_size, s8, s8, self.dim_gf*4], name='g_h2')   # (batch_size, 4, 4, 256)
+            h2 = relu(self.g_bn2(h2, train=train))
+            
+            h3 = deconv2d(h2, [self.batch_size, s4, s4, self.dim_gf*2], name='g_h3')   # (batch_size, 8, 8, 128)
+            h3 = relu(self.g_bn3(h3, train=train))
+            
+            h4 = deconv2d(h3, [self.batch_size, s2, s2, self.dim_gf], name='g_h4')     # (batch_size, 16, 16, 64)
+            h4 = relu(self.g_bn4(h4, train=train))
+            
+            out = deconv2d(h4, [self.batch_size, s, s, self.dim_color], name='g_out')  # (batch_size, 32, 32, dim_color)
+            
+            return tf.nn.tanh(out)
+    
+    
+    def discriminator(self, images, reuse=False):
+        """Discrimator: Convolutional neural network with leaky relu activations.
+        
+        First conv layer does not use batch normalization.
+        
+        Args: 
+            images: real or fake images of shape (batch_size, image_size, image_size, dim_color)  
+        
+        Returns:
+            out: scores for whether it is a real image or a fake image, of shape (batch_size,)
+        """
+        with tf.variable_scope('discriminator', reuse=reuse):
+        
+            # convolution layer
+            h1 = lrelu(conv2d(images, self.dim_df, name='d_h1'))                  # (batch_size, 16, 16, 64)
+            h2 = lrelu(self.d_bn1(conv2d(h1, self.dim_df*2, name='d_h2')))        # (batch_size, 8, 8, 128)
+            h3 = lrelu(self.d_bn2(conv2d(h2, self.dim_df*4, name='d_h3')))        # (batch_size, 4, 4, 256)
+            h4 = lrelu(self.d_bn3(conv2d(h3, self.dim_df*8, name='d_h4')))        # (batch_size, 2, 2, 512)
+
+            # fully connected layer
+            h4 = tf.reshape(h4, [self.batch_size, -1])
+            out = linear(h4, 1, name='d_out')                                     # (batch_size,)  
+
+            return out
+    
+    
+    def build_model(self):
+        
+        # construct generator and discriminator for training phase 
+        self.f_x = self.function_f(self.images)
+        self.fake_images = self.generator(self.f_x)                              # (batch_size, 32, 32, 3)
+        self.logits_real = self.discriminator(self.images)                       # (batch_size,)
+        self.logits_fake = self.discriminator(self.fake_images, reuse=True)      # (batch_size,)
+        self.fgf_x = self.function_f(self.fake_images, reuse=True)   # (batch_size, dim_f)
+        
+        # construct generator for test phase
+        self.sampled_images = self.generator(self.f_x, reuse=True)                # (batch_size, 32, 32, 3)
+        
+        
+        # compute loss 
+        self.d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.logits_real, tf.ones_like(self.logits_real)))
+        self.d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.logits_fake, tf.zeros_like(self.logits_fake)))           
+        self.d_loss = self.d_loss_real + self.d_loss_fake
+        self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.logits_fake, tf.ones_like(self.logits_fake)))
+        self.g_const_loss = tf.reduce_mean(tf.square(self.images - self.fake_images))  # L_TID
+        self.f_const_loss = tf.reduce_mean(tf.square(self.f_x - self.fgf_x))   # L_CONST
+        
+        # divide variables for discriminator and generator 
+        t_vars = tf.trainable_variables()
+        self.d_vars = [var for var in t_vars if 'discriminator' in var.name]
+        self.g_vars = [var for var in t_vars if 'generator' in var.name]
+        self.f_vars = [var for var in t_vars if 'function_f' in var.name]
+        
+        # optimizer for discriminator and generator
+        with tf.name_scope('optimizer'):
+            self.d_optimizer_real = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5).minimize(self.d_loss_real, var_list=self.d_vars)
+            self.d_optimizer_fake = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5).minimize(self.d_loss_fake, var_list=self.d_vars)
+            self.g_optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5).minimize(self.g_loss, var_list=self.g_vars+self.f_vars)   
+            self.g_optimizer_const = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5).minimize(self.g_const_loss, var_list=self.g_vars+self.f_vars)     
+            self.f_optimizer_const = tf.train.AdamOptimizer(self.learning_rate, beta1=0.5).minimize(self.f_const_loss, var_list=self.f_vars+self.g_vars)     
+            
+            
+        # summary ops for tensorboard visualization
+        tf.scalar_summary('d_loss_real', self.d_loss_real)
+        tf.scalar_summary('d_loss_fake', self.d_loss_fake)
+        tf.scalar_summary('d_loss', self.d_loss)
+        tf.scalar_summary('g_loss', self.g_loss)
+        tf.scalar_summary('g_const_loss', self.g_const_loss)
+        tf.scalar_summary('f_const_loss', self.f_const_loss)
+        tf.image_summary('original_images', self.images, max_images=6)
+        tf.image_summary('sampled_images', self.sampled_images, max_images=6)
+        
+        for var in tf.trainable_variables():
+            tf.histogram_summary(var.op.name, var)
+            
+        self.summary_op = tf.merge_all_summaries() 
+        
+        self.saver = tf.train.Saver()
\ No newline at end of file
--- a/ops.py 0 → 100644
View file @3a5e20d
+++ b/ops.py 0 → 100644
View file @3a5e20d
+import tensorflow as tf
+    
+
+class batch_norm(object):
+    """Computes batch normalization operation
+    
+    Args:
+        x: input tensor of shape (batch_size, width, height, channels_in) or (batch_size, dim_in)
+        train: True or False;  At train mode, it normalizes the input with mini-batch statistics
+                               At test mode, it normalizes the input with the moving averages and variances
+ 
+    Returns:
+        out: batch normalized output of the same shape with x
+    """
+    def __init__(self, name):
+        self.name = name
+    
+    def __call__(self, x, train=True):
+        out = tf.contrib.layers.batch_norm(x, decay=0.99, center=True, scale=True, activation_fn=None, 
+                                           updates_collections=None, is_training=train, scope=self.name)
+        return out
+    
+    
+def conv2d(x, channel_out, k_w=5, k_h=5, s_w=2, s_h=2, name=None):
+    """Computes convolution operation
+    
+    Args:
+        x: input tensor of shape (batch_size, width_in, heigth_in, channel_in)
+        channel_out: number of channel for output tensor
+        k_w: kernel width size; default is 5
+        k_h: kernel height size; default is 5
+        s_w: stride size for width; default is 2
+        s_h: stride size for heigth; default is 2
+        
+    Returns:
+        out: output tensor of shape (batch_size, width_out, height_out, channel_out)
+    """
+    channel_in = x.get_shape()[-1]
+    
+    with tf.variable_scope(name):
+        w = tf.get_variable('w', shape=[k_w, k_h, channel_in, channel_out], 
+                            initializer=tf.contrib.layers.xavier_initializer())
+        b = tf.get_variable('b', shape=[channel_out], initializer=tf.constant_initializer(0.0))
+        
+        out = tf.nn.conv2d(x, w, strides=[1, s_w, s_h, 1], padding='SAME') + b
+        
+        return out
+    
+    
+def deconv2d(x, output_shape, k_w=5, k_h=5, s_w=2, s_h=2, name=None):
+    """Computes deconvolution operation
+    
+    Args:
+        x: input tensor of shape (batch_size, width_in, height_in, channel_in)
+        output_shape: list corresponding to [batch_size, width_out, height_out, channel_out]
+        k_w: kernel width size; default is 5
+        k_h: kernel height size; default is 5
+        s_w: stride size for width; default is 2
+        s_h: stride size for heigth; default is 2
+        
+    Returns:
+        out: output tensor of shape (batch_size, width_out, hegith_out, channel_out)
+    """
+    channel_in = x.get_shape()[-1]
+    channel_out = output_shape[-1]
+    
+    
+    with tf.variable_scope(name):
+        w = tf.get_variable('w', shape=[k_w, k_h, channel_out, channel_in], 
+                            initializer=tf.contrib.layers.xavier_initializer())
+        b = tf.get_variable('b', shape=[channel_out], initializer=tf.constant_initializer(0.0))
+        
+        out = tf.nn.conv2d_transpose(x, filter=w, output_shape=output_shape, strides=[1, s_w, s_h, 1]) + b
+    
+        return out
+    
+def linear(x, dim_out, name=None):
+    """Computes linear transform (fully-connected layer)
+    
+    Args:
+        x: input tensor of shape (batch_size, dim_in)
+        dim_out: dimension for output tensor
+        
+    Returns:
+        out: output tensor of shape (batch_size, dim_out)
+    """
+    dim_in = x.get_shape()[-1]
+    
+    with tf.variable_scope(name):
+        w = tf.get_variable('w', shape=[dim_in, dim_out], initializer=tf.contrib.layers.xavier_initializer())
+        b = tf.get_variable('b', shape=[dim_out], initializer=tf.constant_initializer(0.0))
+        
+        out = tf.matmul(x, w) + b
+        
+        return out
+    
+
+def relu(x):
+    return tf.nn.relu(x)
+
+
+def lrelu(x, leak=0.2):
+    return tf.maximum(x, leak*x)
\ No newline at end of file
--- a/solver.py 0 → 100644
View file @3a5e20d
+++ b/solver.py 0 → 100644
View file @3a5e20d
+import tensorflow as tf
+import numpy as np
+import os
+import scipy.io 
+import hickle
+from scipy import ndimage
+
+
+class Solver(object):
+    """Load dataset and train DCGAN"""
+    
+    def __init__(self, model, num_epoch=10, mnist_path= 'mnist/', svhn_path='svhn/', model_save_path='model/', log_path='log/'):
+        self.model = model
+        self.num_epoch = num_epoch
+        self.mnist_path = mnist_path
+        self.svhn_path = svhn_path
+        self.model_save_path = model_save_path
+        self.log_path = log_path
+        
+        # create directory if not exists
+        if not os.path.exists(log_path):
+            os.makedirs(log_path)
+        if not os.path.exists(model_save_path):
+            os.makedirs(model_save_path)
+        
+        # construct the dcgan model
+        model.build_model()
+        
+        # load dataset
+        self.svhn = self.load_svhn(self.svhn_path)
+        self.mnist = self.load_mnist(self.mnist_path)
+        
+        
+    def load_svhn(self, image_path, split='train'):
+        print ('loading svhn image dataset..')
+        if split == 'train':
+            svhn = scipy.io.loadmat(os.path.join(image_path, 'train_32x32.mat'))
+        else:
+            svhn = scipy.io.loadmat(os.path.join(image_path, 'test_32x32.mat'))
+            
+        images = np.transpose(svhn['X'], [3, 0, 1, 2])    
+        images = images / 127.5 - 1
+        print ('finished loading svhn image dataset..!')
+        return images
+    
+    
+    def load_mnist(self, image_path, split='train'):
+        print ('loading mnist image dataset..')
+        if split == 'train':
+            image_file = os.path.join(image_path, 'train.images.hkl')
+        else:
+            image_file = os.path.join(image_path, 'test.images.hkl')
+        
+        images = hickle.load(image_file)
+        images = images / 127.5 - 1
+        print ('finished loading mnist image dataset..!')
+        return images
+    
+    
+    def train(self):
+        model=self.model
+        
+        #load image dataset
+        svhn = self.svhn
+        mnist = self.mnist
+        
+        num_iter_per_epoch = int(mnist.shape[0] / model.batch_size)
+        
+        config = tf.ConfigProto(allow_soft_placement = True)
+        config.gpu_options.allow_growth = True
+        with tf.Session(config=config) as sess:
+            # initialize parameters
+            tf.initialize_all_variables().run()
+            summary_writer = tf.train.SummaryWriter(logdir=self.log_path, graph=tf.get_default_graph())
+             
+            for e in range(self.num_epoch):
+                for i in range(num_iter_per_epoch):
+                    
+                    # train model for domain S
+                    image_batch = svhn[i*model.batch_size:(i+1)*model.batch_size]
+                    feed_dict = {model.images: image_batch}
+                    sess.run(model.d_optimizer_fake, feed_dict)
+                    sess.run(model.f_optimizer_const, feed_dict)
+                    sess.run(model.g_optimizer, feed_dict)
+                    
+                    if i % 10 == 0:
+                        feed_dict = {model.images: image_batch}
+                        summary, d_loss, g_loss = sess.run([model.summary_op, model.d_loss, model.g_loss], feed_dict)
+                        summary_writer.add_summary(summary, e*num_iter_per_epoch + i)
+                        print ('Epoch: [%d] Step: [%d/%d] d_loss: [%.6f] g_loss: [%.6f]' %(e+1, i+1, num_iter_per_epoch, d_loss, g_loss))
+                    
+                    # train model for domain T
+                    image_batch = mnist[i*model.batch_size:(i+1)*model.batch_size]
+                    feed_dict = {model.images: image_batch}
+                    sess.run(model.d_optimizer_real, feed_dict)
+                    sess.run(model.d_optimizer_fake, feed_dict)
+                    sess.run(model.g_optimizer, feed_dict)
+                    sess.run(model.g_optimizer_const, feed_dict)
+                    
+                 
+                        
+                    if i % 500 == 0:  
+                        model.saver.save(sess, os.path.join(self.model_save_path, 'dcgan-%d' %(e+1)), global_step=i+1) 
+                        print ('model/dcgan-%d-%d saved' %(e+1, i+1))
\ No newline at end of file
--- a/train.py 0 → 100644
View file @3a5e20d
+++ b/train.py 0 → 100644
View file @3a5e20d
+from model import DTN
+from solver import Solver
+
+def main():
+    model = DTN()
+    solver = Solver(model, num_epoch=10, svhn_path='svhn/', model_save_path='model/', log_path='log/')
+    solver.train()
+    
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file