linear_modern.py
4.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
use_weight_normalization_default = False
def linear(args, output_size, bias, bias_start=0.0, use_l2_loss = False, use_weight_normalization = use_weight_normalization_default, scope=None, timestep = -1, weight_initializer = None, orthogonal_scale_factor = 1.1):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 2D Tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
# assert args #was causing error in upgraded tensorflow
if not isinstance(args, (list, tuple)):
args = [args]
if len(args) > 1 and use_weight_normalization: raise ValueError('you can not use weight_normalization with multiple inputs because the euclidean norm will be incorrect -- besides, you should be using multiple integration instead!!!')
# Calculate the total size of arguments on dimension 1.
total_arg_size = 0
shapes = [a.get_shape().as_list() for a in args]
for shape in shapes:
if len(shape) != 2:
raise ValueError("Linear is expecting 2D arguments: %s" % str(shapes))
if not shape[1]:
raise ValueError("Linear expects shape[1] of arguments: %s" % str(shapes))
else:
total_arg_size += shape[1]
weights_found = False
try:
with tf.variable_scope("Linear", reuse=True):
tf.get_variable("Matrix")
weights_found = True
except ValueError:
pass
print (weights_found)
if use_l2_loss:
l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
else:
l_regularizer = None
# Now the computation.
with tf.variable_scope(scope or "Linear", reuse=weights_found):
matrix = tf.get_variable("Matrix", [total_arg_size, output_size],
initializer = tf.uniform_unit_scaling_initializer(), regularizer = l_regularizer)
if use_weight_normalization: matrix = weight_normalization(matrix, timestep = timestep)
if len(args) == 1:
res = tf.matmul(args[0], matrix)
else:
res = tf.matmul(tf.concat(1, args), matrix)
if not bias:
return res
bias_term = tf.get_variable("Bias", [output_size],
initializer=tf.constant_initializer(bias_start), regularizer = l_regularizer)
return res + bias_term
def batch_timesteps_linear(input, output_size, bias, bias_start=0.0, use_l2_loss = False, use_weight_normalization = use_weight_normalization_default, scope=None,
tranpose_input = True, timestep = -1):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 3D Tensor [timesteps, batch_size, input_size]
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
# Calculate the total size of arguments on dimension 2.
if tranpose_input:
input = tf.transpose(input, [1,0,2])
shape_list = input.get_shape().as_list()
if len(shape_list) != 3: raise ValueError('shape must be of size 3, you have inputted shape size of:', len(shape_list))
num_timesteps = shape_list[0]
batch_size = shape_list[1]
total_arg_size = shape_list[2]
if use_l2_loss:
l_regularizer = tf.contrib.layers.l2_regularizer(1e-5)
else:
l_regularizer = None
# Now the computation.
with tf.variable_scope(scope or "Linear"):
matrix = tf.get_variable("Matrix", [total_arg_size, output_size], initializer = tf.uniform_unit_scaling_initializer(), regularizer = l_regularizer)
if use_weight_normalization: matrix = weight_normalization(matrix)
matrix = tf.tile(tf.expand_dims(matrix, 0), [num_timesteps, 1, 1])
res = tf.batch_matmul(input, matrix)
if bias:
bias_term = tf.get_variable(
"Bias", [output_size],
initializer=tf.constant_initializer(bias_start))
res = res + bias_term
if tranpose_input:
res = tf.transpose(res, [1,0,2])
return res