activation_kernels.cu
6.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#include "cuda_runtime.h"
#include "curand.h"
#include "cublas_v2.h"
extern "C" {
#include "activations.h"
#include "cuda.h"
}
__device__ float lhtan_activate_kernel(float x)
{
if(x < 0) return .001f*x;
if(x > 1) return .001f*(x-1.f) + 1.f;
return x;
}
__device__ float lhtan_gradient_kernel(float x)
{
if(x > 0 && x < 1) return 1;
return .001;
}
__device__ float hardtan_activate_kernel(float x)
{
if (x < -1) return -1;
if (x > 1) return 1;
return x;
}
__device__ float linear_activate_kernel(float x){return x;}
__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));}
__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;}
__device__ float relu_activate_kernel(float x){return x*(x>0);}
__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);}
__device__ float selu_activate_kernel(float x){return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x)-1);}
__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;}
__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;}
__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;}
__device__ float tanh_activate_kernel(float x){return (2.f/(1 + expf(-2*x)) - 1);}
__device__ float plse_activate_kernel(float x)
{
if(x < -4) return .01f * (x + 4);
if(x > 4) return .01f * (x - 4) + 1;
return .125f*x + .5f;
}
__device__ float stair_activate_kernel(float x)
{
int n = floorf(x);
if (n%2 == 0) return floorf(x/2);
else return (x - n) + floorf(x/2);
}
__device__ float hardtan_gradient_kernel(float x)
{
if (x > -1 && x < 1) return 1;
return 0;
}
__device__ float linear_gradient_kernel(float x){return 1;}
__device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
__device__ float loggy_gradient_kernel(float x)
{
float y = (x+1)/2;
return 2*(1-y)*y;
}
__device__ float relu_gradient_kernel(float x){return (x>0);}
__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
__device__ float selu_gradient_kernel(float x){return (x >= 0)*1.0507 + (x < 0)*(x + 1.0507*1.6732);}
__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;}
__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;}
__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;}
__device__ float tanh_gradient_kernel(float x){return 1-x*x;}
__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;}
__device__ float stair_gradient_kernel(float x)
{
if (floorf(x) == x) return 0;
return 1;
}
__device__ float activate_kernel(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_activate_kernel(x);
case LOGISTIC:
return logistic_activate_kernel(x);
case LOGGY:
return loggy_activate_kernel(x);
case RELU:
return relu_activate_kernel(x);
case ELU:
return elu_activate_kernel(x);
case SELU:
return selu_activate_kernel(x);
case RELIE:
return relie_activate_kernel(x);
case RAMP:
return ramp_activate_kernel(x);
case LEAKY:
return leaky_activate_kernel(x);
case TANH:
return tanh_activate_kernel(x);
case PLSE:
return plse_activate_kernel(x);
case STAIR:
return stair_activate_kernel(x);
case HARDTAN:
return hardtan_activate_kernel(x);
case LHTAN:
return lhtan_activate_kernel(x);
}
return 0;
}
__device__ float gradient_kernel(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_gradient_kernel(x);
case LOGISTIC:
return logistic_gradient_kernel(x);
case LOGGY:
return loggy_gradient_kernel(x);
case RELU:
return relu_gradient_kernel(x);
case ELU:
return elu_gradient_kernel(x);
case SELU:
return selu_gradient_kernel(x);
case RELIE:
return relie_gradient_kernel(x);
case RAMP:
return ramp_gradient_kernel(x);
case LEAKY:
return leaky_gradient_kernel(x);
case TANH:
return tanh_gradient_kernel(x);
case PLSE:
return plse_gradient_kernel(x);
case STAIR:
return stair_gradient_kernel(x);
case HARDTAN:
return hardtan_gradient_kernel(x);
case LHTAN:
return lhtan_gradient_kernel(x);
}
return 0;
}
__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int i = id % s;
int b = id / s;
float x1 = x[b*s + i];
float x2 = x[b*s + s/2 + i];
if(id < n) {
float de = dy[id];
dx[b*s + i] = x2*de;
dx[b*s + s/2 + i] = x1*de;
}
}
extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y)
{
binary_gradient_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, dx, n/2, size, a, y);
check_error(cudaPeekAtLastError());
}
__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int i = id % s;
int b = id / s;
float x1 = x[b*s + i];
float x2 = x[b*s + s/2 + i];
if(id < n) y[id] = x1*x2;
}
extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y)
{
binary_activate_array_kernel<<<cuda_gridsize(n/2), BLOCK>>>(x, n/2, size, a, y);
check_error(cudaPeekAtLastError());
}
__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(i < n) x[i] = activate_kernel(x[i], a);
}
__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(i < n) delta[i] *= gradient_kernel(x[i], a);
}
extern "C" void activate_array_gpu(float *x, int n, ACTIVATION a)
{
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
check_error(cudaPeekAtLastError());
}
extern "C" void gradient_array_gpu(float *x, int n, ACTIVATION a, float *delta)
{
gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
check_error(cudaPeekAtLastError());
}