박해연

upload modified pcn code

1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import numpy as np
4 +import tensorflow as tf
5 +from tensorpack import dataflow
6 +
7 +
8 +def resample_pcd(pcd, n):
9 + """Drop or duplicate points so that pcd has exactly n points"""
10 + idx = np.random.permutation(pcd.shape[0])
11 + if idx.shape[0] < n:
12 + idx = np.concatenate([idx, np.random.randint(pcd.shape[0], size=n-pcd.shape[0])])
13 + return pcd[idx[:n]]
14 +
15 +
16 +class PreprocessData(dataflow.ProxyDataFlow):
17 + def __init__(self, ds, input_size, output_size):
18 + super(PreprocessData, self).__init__(ds)
19 + self.input_size = input_size
20 + self.output_size = output_size
21 +
22 + def get_data(self):
23 + for id, input, gt in self.ds.get_data():
24 + input = resample_pcd(input, self.input_size)
25 + gt = resample_pcd(gt, self.output_size)
26 + yield id, input, gt
27 +
28 +
29 +class BatchData(dataflow.ProxyDataFlow):
30 + def __init__(self, ds, batch_size, input_size, gt_size, remainder=False, use_list=False):
31 + super(BatchData, self).__init__(ds)
32 + self.batch_size = batch_size
33 + self.input_size = input_size
34 + self.gt_size = gt_size
35 + self.remainder = remainder
36 + self.use_list = use_list
37 +
38 + def __len__(self):
39 + ds_size = len(self.ds)
40 + div = ds_size // self.batch_size
41 + rem = ds_size % self.batch_size
42 + if rem == 0:
43 + return div
44 + return div + int(self.remainder)
45 +
46 + def __iter__(self):
47 + holder = []
48 + for data in self.ds:
49 + holder.append(data)
50 + if len(holder) == self.batch_size:
51 + yield self._aggregate_batch(holder, self.use_list)
52 + del holder[:]
53 + if self.remainder and len(holder) > 0:
54 + yield self._aggregate_batch(holder, self.use_list)
55 +
56 + def _aggregate_batch(self, data_holder, use_list=False):
57 + ''' Concatenate input points along the 0-th dimension
58 + Stack all other data along the 0-th dimension
59 + '''
60 + ids = np.stack([x[0] for x in data_holder])
61 + inputs = [resample_pcd(x[1], self.input_size) if x[1].shape[0] > self.input_size else x[1]
62 + for x in data_holder]
63 + inputs = np.expand_dims(np.concatenate([x for x in inputs]), 0).astype(np.float32)
64 + npts = np.stack([x[1].shape[0] if x[1].shape[0] < self.input_size else self.input_size
65 + for x in data_holder]).astype(np.int32)
66 + gts = np.stack([resample_pcd(x[2], self.gt_size) for x in data_holder]).astype(np.float32)
67 + return ids, inputs, npts, gts
68 +
69 +
70 +def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False):
71 + df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False)
72 + size = df.size()
73 + if is_training:
74 + df = dataflow.LocallyShuffleData(df, buffer_size=2000)
75 + df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1)
76 + df = BatchData(df, batch_size, input_size, output_size)
77 + if is_training:
78 + df = dataflow.PrefetchDataZMQ(df, nr_proc=8)
79 + df = dataflow.RepeatedData(df, -1)
80 + if test_speed:
81 + dataflow.TestDataSpeed(df, size=1000).start()
82 + df.reset_state()
83 + return df, size
84 +
85 +
86 +def get_queued_data(generator, dtypes, shapes, queue_capacity=10):
87 + assert len(dtypes) == len(shapes), 'dtypes and shapes must have the same length'
88 + queue = tf.FIFOQueue(queue_capacity, dtypes, shapes)
89 + placeholders = [tf.placeholder(dtype, shape) for dtype, shape in zip(dtypes, shapes)]
90 + enqueue_op = queue.enqueue(placeholders)
91 + close_op = queue.close(cancel_pending_enqueues=True)
92 + feed_fn = lambda: {placeholder: value for placeholder, value in zip(placeholders, next(generator))}
93 + queue_runner = tf.contrib.training.FeedingQueueRunner(queue, [enqueue_op], close_op, feed_fns=[feed_fn])
94 + tf.train.add_queue_runner(queue_runner)
95 + return queue.dequeue()
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import numpy as np
4 +#from open3d import *
5 +import open3d as o3d
6 +
7 +
8 +def read_pcd(filename):
9 + pcd = o3d.io.read_point_cloud(filename)
10 + return np.array(pcd.points)
11 +
12 +
13 +def save_pcd(filename, points):
14 + pcd = o3d.geometry.PointCloud()
15 + pcd.points = o3d.utility.Vector3dVector(points)
16 + o3d.io.write_point_cloud(filename, pcd)
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import argparse
4 +import os
5 +from io_util import read_pcd
6 +from tensorpack import DataFlow, dataflow
7 +
8 +
9 +class pcd_df(DataFlow):
10 + def __init__(self, model_list, num_scans, partial_dir, complete_dir):
11 + self.model_list = model_list
12 + self.num_scans = num_scans
13 + self.partial_dir = partial_dir
14 + self.complete_dir = complete_dir
15 +
16 + def size(self):
17 + return len(self.model_list) * self.num_scans
18 +
19 + def get_data(self):
20 + for model_id in model_list:
21 + complete = read_pcd(os.path.join(self.complete_dir, '%s.pcd' % model_id))
22 + for i in range(self.num_scans):
23 + partial = read_pcd(os.path.join(self.partial_dir, model_id, '%d.pcd' % i))
24 + yield model_id.replace('/', '_'), partial, complete
25 +
26 +
27 +if __name__ == '__main__':
28 + parser = argparse.ArgumentParser()
29 + parser.add_argument('--list_path')
30 + parser.add_argument('--num_scans', type=int)
31 + parser.add_argument('--partial_dir')
32 + parser.add_argument('--complete_dir')
33 + parser.add_argument('--output_path')
34 + args = parser.parse_args()
35 +
36 + with open(args.list_path) as file:
37 + model_list = file.read().splitlines()
38 + df = pcd_df(model_list, args.num_scans, args.partial_dir, args.complete_dir)
39 + if os.path.exists(args.output_path):
40 + os.system('rm %s' % args.output_path)
41 + dataflow.LMDBSerializer.save(df, args.output_path)
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import tensorflow as tf
4 +from tf_util import mlp, mlp_conv, chamfer, add_train_summary, add_valid_summary
5 +
6 +
7 +class Model:
8 + def __init__(self, inputs, gt, alpha):
9 + self.num_output_points = 16384
10 + self.features = self.create_encoder(inputs)
11 + self.outputs = self.create_decoder(self.features)
12 + self.loss, self.update = self.create_loss(self.outputs, gt)
13 + self.visualize_ops = [inputs[0], self.outputs[0], gt[0]]
14 + self.visualize_titles = ['input', 'output', 'ground truth']
15 +
16 + def create_encoder(self, inputs):
17 + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE):
18 + features = mlp_conv(inputs, [128, 256])
19 + features_global = tf.reduce_max(features, axis=1, keep_dims=True, name='maxpool_0')
20 + features = tf.concat([features, tf.tile(features_global, [1, tf.shape(inputs)[1], 1])], axis=2)
21 + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE):
22 + features = mlp_conv(features, [512, 1024])
23 + features = tf.reduce_max(features, axis=1, name='maxpool_1')
24 + return features
25 +
26 + def create_decoder(self, features):
27 + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
28 + outputs = mlp(features, [1024, 1024, self.num_output_points * 3])
29 + outputs = tf.reshape(outputs, [-1, self.num_output_points, 3])
30 + return outputs
31 +
32 + def create_loss(self, outputs, gt):
33 + loss = chamfer(outputs, gt)
34 + add_train_summary('train/loss', loss)
35 + update_loss = add_valid_summary('valid/loss', loss)
36 + return loss, update_loss
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import tensorflow as tf
4 +from tf_util import mlp_conv, chamfer, add_train_summary, add_valid_summary
5 +
6 +
7 +class Model:
8 + def __init__(self, inputs, gt, alpha):
9 + self.grid_size = 128
10 + self.grid_scale = 0.5
11 + self.num_output_points = 16384
12 + self.features = self.create_encoder(inputs)
13 + fold1, fold2 = self.create_decoder(self.features)
14 + self.outputs = fold2
15 + self.loss, self.update = self.create_loss(self.outputs, gt)
16 + self.visualize_ops = [inputs[0], fold1[0], fold2[0], gt[0]]
17 + self.visualize_titles = ['input', '1st folding', '2nd folding', 'ground truth']
18 +
19 + def create_encoder(self, inputs):
20 + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE):
21 + features = mlp_conv(inputs, [128, 256])
22 + features_global = tf.reduce_max(features, axis=1, keep_dims=True, name='maxpool_0')
23 + features = tf.concat([features, tf.tile(features_global, [1, tf.shape(inputs)[1], 1])], axis=2)
24 + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE):
25 + features = mlp_conv(features, [512, 1024])
26 + features = tf.reduce_max(features, axis=1, name='maxpool_1')
27 + return features
28 +
29 + def create_decoder(self, features):
30 + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
31 + x = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size)
32 + y = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size)
33 + grid = tf.meshgrid(x, y)
34 + grid = tf.reshape(tf.stack(grid, axis=2), [-1, 2])
35 + grid = tf.tile(tf.expand_dims(grid, 0), [features.shape[0], 1, 1])
36 + features = tf.tile(tf.expand_dims(features, 1), [1, self.num_output_points, 1])
37 + with tf.variable_scope('folding_1'):
38 + fold1 = mlp_conv(tf.concat([features, grid], axis=2), [512, 512, 3])
39 + with tf.variable_scope('folding_2'):
40 + fold2 = mlp_conv(tf.concat([features, fold1], axis=2), [512, 512, 3])
41 + return fold1, fold2
42 +
43 + def create_loss(self, outputs, gt):
44 + loss = chamfer(outputs, gt)
45 + add_train_summary('train/loss', loss)
46 + update_loss = add_valid_summary('valid/loss', loss)
47 + return loss, update_loss
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import tensorflow as tf
4 +from tf_util import *
5 +
6 +
7 +class Model:
8 + def __init__(self, inputs, npts, gt, alpha):
9 + self.num_coarse = 1024
10 + self.grid_size = 4
11 + self.grid_scale = 0.05
12 + self.num_fine = self.grid_size ** 2 * self.num_coarse
13 + self.features = self.create_encoder(inputs, npts)
14 + self.coarse, self.fine = self.create_decoder(self.features)
15 + self.loss, self.update = self.create_loss(self.coarse, self.fine, gt, alpha)
16 + self.outputs = self.fine
17 + self.visualize_ops = [tf.split(inputs[0], npts, axis=0), self.coarse, self.fine, gt]
18 + self.visualize_titles = ['input', 'coarse output', 'fine output', 'ground truth']
19 +
20 + def create_encoder(self, inputs, npts):
21 + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE):
22 + features = mlp_conv(inputs, [128, 256])
23 + features_global = point_unpool(point_maxpool(features, npts, keepdims=True), npts)
24 + features = tf.concat([features, features_global], axis=2)
25 + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE):
26 + features = mlp_conv(features, [512, 1024])
27 + features = point_maxpool(features, npts)
28 + return features
29 +
30 + def create_decoder(self, features):
31 + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
32 + coarse = mlp(features, [1024, 1024, self.num_coarse * 3])
33 + coarse = tf.reshape(coarse, [-1, self.num_coarse, 3])
34 +
35 + with tf.variable_scope('folding', reuse=tf.AUTO_REUSE):
36 + grid = tf.meshgrid(tf.linspace(-0.05, 0.05, self.grid_size), tf.linspace(-0.05, 0.05, self.grid_size))
37 + grid = tf.expand_dims(tf.reshape(tf.stack(grid, axis=2), [-1, 2]), 0)
38 + grid_feat = tf.tile(grid, [features.shape[0], self.num_coarse, 1])
39 +
40 + point_feat = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1])
41 + point_feat = tf.reshape(point_feat, [-1, self.num_fine, 3])
42 +
43 + global_feat = tf.tile(tf.expand_dims(features, 1), [1, self.num_fine, 1])
44 +
45 + feat = tf.concat([grid_feat, point_feat, global_feat], axis=2)
46 +
47 + center = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1])
48 + center = tf.reshape(center, [-1, self.num_fine, 3])
49 +
50 + fine = mlp_conv(feat, [512, 512, 3]) + center
51 + return coarse, fine
52 +
53 + def create_loss(self, coarse, fine, gt, alpha):
54 + loss_coarse = chamfer(coarse, gt)
55 + add_train_summary('train/coarse_loss', loss_coarse)
56 + update_coarse = add_valid_summary('valid/coarse_loss', loss_coarse)
57 +
58 + loss_fine = chamfer(fine, gt)
59 + add_train_summary('train/fine_loss', loss_fine)
60 + update_fine = add_valid_summary('valid/fine_loss', loss_fine)
61 +
62 + loss = loss_coarse + alpha * loss_fine
63 + add_train_summary('train/loss', loss)
64 + update_loss = add_valid_summary('valid/loss', loss)
65 +
66 + return loss, [update_coarse, update_fine, update_loss]
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import tensorflow as tf
4 +from tf_util import *
5 +
6 +
7 +class Model:
8 + def __init__(self, inputs,my_inputs, npts, gt, alpha,beta):
9 + self.num_coarse = 1024
10 + self.grid_size = 4
11 + self.grid_scale = 0.05
12 + self.num_fine = self.grid_size ** 2 * self.num_coarse
13 + self.features = self.create_encoder(inputs, npts)
14 + self.coarse, self.fine = self.create_decoder(self.features)
15 + self.loss, self.update = self.create_loss(my_inputs,self.coarse, self.fine, gt, alpha,beta)
16 + self.outputs = self.fine
17 + self.visualize_ops = [tf.split(inputs[0], npts, axis=0), self.coarse, self.fine, gt]
18 + self.visualize_titles = ['input', 'coarse output', 'fine output', 'ground truth']
19 +
20 + def create_encoder(self, inputs, npts):
21 + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE):
22 + features = mlp_conv(inputs, [128, 256])
23 + features_global = point_unpool(point_maxpool(features, npts, keepdims=True), npts)
24 + features = tf.concat([features, features_global], axis=2)
25 + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE):
26 + features = mlp_conv(features, [512, 1024])
27 + features = point_maxpool(features, npts)
28 + return features
29 +
30 + def create_decoder(self, features):
31 + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE):
32 + coarse = mlp(features, [1024, 1024, self.num_coarse * 3])
33 + coarse = tf.reshape(coarse, [-1, self.num_coarse, 3])
34 +
35 + with tf.variable_scope('folding', reuse=tf.AUTO_REUSE):
36 + x = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size)
37 + y = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size)
38 + grid = tf.meshgrid(x, y)
39 + grid = tf.expand_dims(tf.reshape(tf.stack(grid, axis=2), [-1, 2]), 0)
40 + grid_feat = tf.tile(grid, [features.shape[0], self.num_coarse, 1])
41 +
42 + point_feat = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1])
43 + point_feat = tf.reshape(point_feat, [-1, self.num_fine, 3])
44 +
45 + global_feat = tf.tile(tf.expand_dims(features, 1), [1, self.num_fine, 1])
46 +
47 + feat = tf.concat([grid_feat, point_feat, global_feat], axis=2)
48 +
49 + center = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1])
50 + center = tf.reshape(center, [-1, self.num_fine, 3])
51 +
52 + fine = mlp_conv(feat, [512, 512, 3]) + center
53 + return coarse, fine
54 +
55 + def create_loss(self,my_inputs,coarse, fine, gt, alpha,beta):
56 + gt_ds = gt[:, :coarse.shape[1], :]
57 + loss_coarse = earth_mover(coarse, gt_ds)
58 + add_train_summary('train/coarse_loss', loss_coarse)
59 + update_coarse = add_valid_summary('valid/coarse_loss', loss_coarse)
60 +
61 + loss_fine = chamfer(fine, gt)
62 + add_train_summary('train/fine_loss', loss_fine)
63 + update_fine = add_valid_summary('valid/fine_loss', loss_fine)
64 +
65 + loss_input = my_chamfer(my_inputs,fine)
66 + add_train_summary('train/input_loss',loss_input)
67 + update_input = add_valid_summary('valid/input_loss',loss_input)
68 +
69 + loss = loss_coarse + alpha * loss_fine + beta*loss_input
70 + add_train_summary('train/loss', loss)
71 + update_loss = add_valid_summary('valid/loss', loss)
72 +
73 + return loss, [update_coarse, update_fine, update_input,update_loss]
1 +cuda_inc = /usr/local/cuda-10.0/include/
2 +cuda_lib = /usr/local/cuda-10.0/lib64/
3 +nvcc = /usr/local/cuda-10.0/bin/nvcc
4 +tf_inc = /usr/local/lib/python3.6/dist-packages/tensorflow_core/include
5 +tf_lib = /usr/local/lib/python3.6/dist-packages/tensorflow_core
6 +
7 +all: tf_nndistance_so.so tf_approxmatch_so.so
8 +
9 +tf_nndistance.cu.o: tf_nndistance.cu
10 + $(nvcc) tf_nndistance.cu -o tf_nndistance.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
11 +
12 +tf_nndistance_so.so: tf_nndistance.cpp tf_nndistance.cu.o
13 + g++ tf_nndistance.cpp tf_nndistance.cu.o -o tf_nndistance_so.so \
14 + -I $(cuda_inc) -I $(tf_inc) -L $(cuda_lib) -lcudart -L $(tf_lib) -ltensorflow_framework \
15 + -shared -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -fPIC -O2
16 +
17 +tf_approxmatch.cu.o: tf_approxmatch.cu
18 + $(nvcc) tf_approxmatch.cu -o tf_approxmatch.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
19 +
20 +tf_approxmatch_so.so: tf_approxmatch.cpp tf_approxmatch.cu.o
21 + g++ -shared $(CPPFLAGS) tf_approxmatch.cpp tf_approxmatch.cu.o -o tf_approxmatch_so.so \
22 + -I $(cuda_inc) -I $(tf_inc) -L $(cuda_lib) -lcudart -L $(tf_lib) -ltensorflow_framework \
23 + -shared -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -fPIC -O2
24 +
25 +clean:
26 + rm -rf *.o *.so
1 +#include "tensorflow/core/framework/op.h"
2 +#include "tensorflow/core/framework/op_kernel.h"
3 +#include <algorithm>
4 +#include <vector>
5 +#include <math.h>
6 +using namespace tensorflow;
7 +REGISTER_OP("ApproxMatch")
8 + .Input("xyz1: float32")
9 + .Input("xyz2: float32")
10 + .Output("match: float32");
11 +REGISTER_OP("MatchCost")
12 + .Input("xyz1: float32")
13 + .Input("xyz2: float32")
14 + .Input("match: float32")
15 + .Output("cost: float32");
16 +REGISTER_OP("MatchCostGrad")
17 + .Input("xyz1: float32")
18 + .Input("xyz2: float32")
19 + .Input("match: float32")
20 + .Output("grad1: float32")
21 + .Output("grad2: float32");
22 +
23 +void approxmatch_cpu(int b,int n,int m,const float * xyz1,const float * xyz2,float * match){
24 + for (int i=0;i<b;i++){
25 + int factorl=std::max(n,m)/n;
26 + int factorr=std::max(n,m)/m;
27 + std::vector<double> saturatedl(n,double(factorl)),saturatedr(m,double(factorr));
28 + std::vector<double> weight(n*m);
29 + for (int j=0;j<n*m;j++)
30 + match[j]=0;
31 + for (int j=8;j>=-2;j--){
32 + //printf("i=%d j=%d\n",i,j);
33 + double level=-powf(4.0,j);
34 + if (j==-2)
35 + level=0;
36 + for (int k=0;k<n;k++){
37 + double x1=xyz1[k*3+0];
38 + double y1=xyz1[k*3+1];
39 + double z1=xyz1[k*3+2];
40 + for (int l=0;l<m;l++){
41 + double x2=xyz2[l*3+0];
42 + double y2=xyz2[l*3+1];
43 + double z2=xyz2[l*3+2];
44 + weight[k*m+l]=expf(level*((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2)))*saturatedr[l];
45 + }
46 + }
47 + std::vector<double> ss(m,1e-9);
48 + for (int k=0;k<n;k++){
49 + double s=1e-9;
50 + for (int l=0;l<m;l++){
51 + s+=weight[k*m+l];
52 + }
53 + for (int l=0;l<m;l++){
54 + weight[k*m+l]=weight[k*m+l]/s*saturatedl[k];
55 + }
56 + for (int l=0;l<m;l++)
57 + ss[l]+=weight[k*m+l];
58 + }
59 + for (int l=0;l<m;l++){
60 + double s=ss[l];
61 + double r=std::min(saturatedr[l]/s,1.0);
62 + ss[l]=r;
63 + }
64 + std::vector<double> ss2(m,0);
65 + for (int k=0;k<n;k++){
66 + double s=0;
67 + for (int l=0;l<m;l++){
68 + weight[k*m+l]*=ss[l];
69 + s+=weight[k*m+l];
70 + ss2[l]+=weight[k*m+l];
71 + }
72 + saturatedl[k]=std::max(saturatedl[k]-s,0.0);
73 + }
74 + for (int k=0;k<n*m;k++)
75 + match[k]+=weight[k];
76 + for (int l=0;l<m;l++){
77 + saturatedr[l]=std::max(saturatedr[l]-ss2[l],0.0);
78 + }
79 + }
80 + xyz1+=n*3;
81 + xyz2+=m*3;
82 + match+=n*m;
83 + }
84 +}
85 +void matchcost_cpu(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * cost){
86 + for (int i=0;i<b;i++){
87 + double s=0;
88 + for (int j=0;j<n;j++)
89 + for (int k=0;k<m;k++){
90 + float x1=xyz1[j*3+0];
91 + float y1=xyz1[j*3+1];
92 + float z1=xyz1[j*3+2];
93 + float x2=xyz2[k*3+0];
94 + float y2=xyz2[k*3+1];
95 + float z2=xyz2[k*3+2];
96 + float d=sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1))*match[j*m+k];
97 + s+=d;
98 + }
99 + cost[0]=s;
100 + xyz1+=n*3;
101 + xyz2+=m*3;
102 + match+=n*m;
103 + cost+=1;
104 + }
105 +}
106 +void matchcostgrad_cpu(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2){
107 + for (int i=0;i<b;i++){
108 + for (int j=0;j<n;j++)
109 + grad1[j*3+0]=0;
110 + for (int j=0;j<m;j++){
111 + float sx=0,sy=0,sz=0;
112 + for (int k=0;k<n;k++){
113 + float x2=xyz2[j*3+0];
114 + float y2=xyz2[j*3+1];
115 + float z2=xyz2[j*3+2];
116 + float x1=xyz1[k*3+0];
117 + float y1=xyz1[k*3+1];
118 + float z1=xyz1[k*3+2];
119 + float d=std::max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f);
120 + float dx=match[k*m+j]*((x2-x1)/d);
121 + float dy=match[k*m+j]*((y2-y1)/d);
122 + float dz=match[k*m+j]*((z2-z1)/d);
123 + grad1[k*3+0]-=dx;
124 + grad1[k*3+1]-=dy;
125 + grad1[k*3+2]-=dz;
126 + sx+=dx;
127 + sy+=dy;
128 + sz+=dz;
129 + }
130 + grad2[j*3+0]=sx;
131 + grad2[j*3+1]=sy;
132 + grad2[j*3+2]=sz;
133 + }
134 + xyz1+=n*3;
135 + xyz2+=m*3;
136 + match+=n*m;
137 + grad1+=n*3;
138 + grad2+=m*3;
139 + }
140 +}
141 +void approxmatchLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,float * match,float * temp);
142 +void matchcostLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * out);
143 +void matchcostgradLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2);
144 +
145 +class ApproxMatchGpuOp: public OpKernel{
146 + public:
147 + explicit ApproxMatchGpuOp(OpKernelConstruction* context):OpKernel(context){}
148 + void Compute(OpKernelContext * context)override{
149 + const Tensor& xyz1_tensor=context->input(0);
150 + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz1 shape"));
151 + auto xyz1_flat=xyz1_tensor.flat<float>();
152 + const float * xyz1=&(xyz1_flat(0));
153 + int b=xyz1_tensor.shape().dim_size(0);
154 + int n=xyz1_tensor.shape().dim_size(1);
155 + //OP_REQUIRES(context,n<=4096,errors::InvalidArgument("ApproxMatch handles at most 4096 dataset points"));
156 +
157 + const Tensor& xyz2_tensor=context->input(1);
158 + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz2 shape, and batch_size must match"));
159 + int m=xyz2_tensor.shape().dim_size(1);
160 + //OP_REQUIRES(context,m<=1024,errors::InvalidArgument("ApproxMatch handles at most 1024 query points"));
161 + auto xyz2_flat=xyz2_tensor.flat<float>();
162 + const float * xyz2=&(xyz2_flat(0));
163 + Tensor * match_tensor=NULL;
164 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,n},&match_tensor));
165 + auto match_flat=match_tensor->flat<float>();
166 + float * match=&(match_flat(0));
167 + Tensor temp_tensor;
168 + OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{b,(n+m)*2},&temp_tensor));
169 + auto temp_flat=temp_tensor.flat<float>();
170 + float * temp=&(temp_flat(0));
171 + approxmatchLauncher(b,n,m,xyz1,xyz2,match,temp);
172 + }
173 +};
174 +REGISTER_KERNEL_BUILDER(Name("ApproxMatch").Device(DEVICE_GPU), ApproxMatchGpuOp);
175 +class ApproxMatchOp: public OpKernel{
176 + public:
177 + explicit ApproxMatchOp(OpKernelConstruction* context):OpKernel(context){}
178 + void Compute(OpKernelContext * context)override{
179 + const Tensor& xyz1_tensor=context->input(0);
180 + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz1 shape"));
181 + auto xyz1_flat=xyz1_tensor.flat<float>();
182 + const float * xyz1=&(xyz1_flat(0));
183 + int b=xyz1_tensor.shape().dim_size(0);
184 + int n=xyz1_tensor.shape().dim_size(1);
185 + //OP_REQUIRES(context,n<=4096,errors::InvalidArgument("ApproxMatch handles at most 4096 dataset points"));
186 +
187 + const Tensor& xyz2_tensor=context->input(1);
188 + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz2 shape, and batch_size must match"));
189 + int m=xyz2_tensor.shape().dim_size(1);
190 + //OP_REQUIRES(context,m<=1024,errors::InvalidArgument("ApproxMatch handles at most 1024 query points"));
191 + auto xyz2_flat=xyz2_tensor.flat<float>();
192 + const float * xyz2=&(xyz2_flat(0));
193 + Tensor * match_tensor=NULL;
194 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,n},&match_tensor));
195 + auto match_flat=match_tensor->flat<float>();
196 + float * match=&(match_flat(0));
197 + approxmatch_cpu(b,n,m,xyz1,xyz2,match);
198 + }
199 +};
200 +REGISTER_KERNEL_BUILDER(Name("ApproxMatch").Device(DEVICE_CPU), ApproxMatchOp);
201 +class MatchCostGpuOp: public OpKernel{
202 + public:
203 + explicit MatchCostGpuOp(OpKernelConstruction* context):OpKernel(context){}
204 + void Compute(OpKernelContext * context)override{
205 + const Tensor& xyz1_tensor=context->input(0);
206 + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz1 shape"));
207 + auto xyz1_flat=xyz1_tensor.flat<float>();
208 + const float * xyz1=&(xyz1_flat(0));
209 + int b=xyz1_tensor.shape().dim_size(0);
210 + int n=xyz1_tensor.shape().dim_size(1);
211 +
212 + const Tensor& xyz2_tensor=context->input(1);
213 + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz2 shape, and batch_size must match"));
214 + int m=xyz2_tensor.shape().dim_size(1);
215 + auto xyz2_flat=xyz2_tensor.flat<float>();
216 + const float * xyz2=&(xyz2_flat(0));
217 +
218 + const Tensor& match_tensor=context->input(2);
219 + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape"));
220 + auto match_flat=match_tensor.flat<float>();
221 + const float * match=&(match_flat(0));
222 +
223 + Tensor * cost_tensor=NULL;
224 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b},&cost_tensor));
225 + auto cost_flat=cost_tensor->flat<float>();
226 + float * cost=&(cost_flat(0));
227 + matchcostLauncher(b,n,m,xyz1,xyz2,match,cost);
228 + }
229 +};
230 +REGISTER_KERNEL_BUILDER(Name("MatchCost").Device(DEVICE_GPU), MatchCostGpuOp);
231 +class MatchCostOp: public OpKernel{
232 + public:
233 + explicit MatchCostOp(OpKernelConstruction* context):OpKernel(context){}
234 + void Compute(OpKernelContext * context)override{
235 + const Tensor& xyz1_tensor=context->input(0);
236 + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz1 shape"));
237 + auto xyz1_flat=xyz1_tensor.flat<float>();
238 + const float * xyz1=&(xyz1_flat(0));
239 + int b=xyz1_tensor.shape().dim_size(0);
240 + int n=xyz1_tensor.shape().dim_size(1);
241 +
242 + const Tensor& xyz2_tensor=context->input(1);
243 + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz2 shape, and batch_size must match"));
244 + int m=xyz2_tensor.shape().dim_size(1);
245 + auto xyz2_flat=xyz2_tensor.flat<float>();
246 + const float * xyz2=&(xyz2_flat(0));
247 +
248 + const Tensor& match_tensor=context->input(2);
249 + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape"));
250 + auto match_flat=match_tensor.flat<float>();
251 + const float * match=&(match_flat(0));
252 +
253 + Tensor * cost_tensor=NULL;
254 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b},&cost_tensor));
255 + auto cost_flat=cost_tensor->flat<float>();
256 + float * cost=&(cost_flat(0));
257 + matchcost_cpu(b,n,m,xyz1,xyz2,match,cost);
258 + }
259 +};
260 +REGISTER_KERNEL_BUILDER(Name("MatchCost").Device(DEVICE_CPU), MatchCostOp);
261 +
262 +class MatchCostGradGpuOp: public OpKernel{
263 + public:
264 + explicit MatchCostGradGpuOp(OpKernelConstruction* context):OpKernel(context){}
265 + void Compute(OpKernelContext * context)override{
266 + const Tensor& xyz1_tensor=context->input(0);
267 + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCostGrad expects (batch_size,num_points,3) xyz1 shape"));
268 + auto xyz1_flat=xyz1_tensor.flat<float>();
269 + const float * xyz1=&(xyz1_flat(0));
270 + int b=xyz1_tensor.shape().dim_size(0);
271 + int n=xyz1_tensor.shape().dim_size(1);
272 +
273 + const Tensor& xyz2_tensor=context->input(1);
274 + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCostGrad expects (batch_size,num_points,3) xyz2 shape, and batch_size must match"));
275 + int m=xyz2_tensor.shape().dim_size(1);
276 + auto xyz2_flat=xyz2_tensor.flat<float>();
277 + const float * xyz2=&(xyz2_flat(0));
278 +
279 + const Tensor& match_tensor=context->input(2);
280 + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape"));
281 + auto match_flat=match_tensor.flat<float>();
282 + const float * match=&(match_flat(0));
283 +
284 + Tensor * grad1_tensor=NULL;
285 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad1_tensor));
286 + auto grad1_flat=grad1_tensor->flat<float>();
287 + float * grad1=&(grad1_flat(0));
288 + Tensor * grad2_tensor=NULL;
289 + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad2_tensor));
290 + auto grad2_flat=grad2_tensor->flat<float>();
291 + float * grad2=&(grad2_flat(0));
292 + matchcostgradLauncher(b,n,m,xyz1,xyz2,match,grad1,grad2);
293 + }
294 +};
295 +REGISTER_KERNEL_BUILDER(Name("MatchCostGrad").Device(DEVICE_GPU), MatchCostGradGpuOp);
296 +class MatchCostGradOp: public OpKernel{
297 + public:
298 + explicit MatchCostGradOp(OpKernelConstruction* context):OpKernel(context){}
299 + void Compute(OpKernelContext * context)override{
300 + const Tensor& xyz1_tensor=context->input(0);
301 + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz1 shape"));
302 + auto xyz1_flat=xyz1_tensor.flat<float>();
303 + const float * xyz1=&(xyz1_flat(0));
304 + int b=xyz1_tensor.shape().dim_size(0);
305 + int n=xyz1_tensor.shape().dim_size(1);
306 +
307 + const Tensor& xyz2_tensor=context->input(1);
308 + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz2 shape, and batch_size must match"));
309 + int m=xyz2_tensor.shape().dim_size(1);
310 + auto xyz2_flat=xyz2_tensor.flat<float>();
311 + const float * xyz2=&(xyz2_flat(0));
312 +
313 + const Tensor& match_tensor=context->input(2);
314 + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape"));
315 + auto match_flat=match_tensor.flat<float>();
316 + const float * match=&(match_flat(0));
317 +
318 + Tensor * grad1_tensor=NULL;
319 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad1_tensor));
320 + auto grad1_flat=grad1_tensor->flat<float>();
321 + float * grad1=&(grad1_flat(0));
322 + Tensor * grad2_tensor=NULL;
323 + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad2_tensor));
324 + auto grad2_flat=grad2_tensor->flat<float>();
325 + float * grad2=&(grad2_flat(0));
326 + matchcostgrad_cpu(b,n,m,xyz1,xyz2,match,grad1,grad2);
327 + }
328 +};
329 +REGISTER_KERNEL_BUILDER(Name("MatchCostGrad").Device(DEVICE_CPU), MatchCostGradOp);
1 +__global__ void approxmatch(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,float * __restrict__ match,float * temp){
2 + float * remainL=temp+blockIdx.x*(n+m)*2, * remainR=temp+blockIdx.x*(n+m)*2+n,*ratioL=temp+blockIdx.x*(n+m)*2+n+m,*ratioR=temp+blockIdx.x*(n+m)*2+n+m+n;
3 + float multiL,multiR;
4 + if (n>=m){
5 + multiL=1;
6 + multiR=n/m;
7 + }else{
8 + multiL=m/n;
9 + multiR=1;
10 + }
11 + const int Block=1024;
12 + __shared__ float buf[Block*4];
13 + for (int i=blockIdx.x;i<b;i+=gridDim.x){
14 + for (int j=threadIdx.x;j<n*m;j+=blockDim.x)
15 + match[i*n*m+j]=0;
16 + for (int j=threadIdx.x;j<n;j+=blockDim.x)
17 + remainL[j]=multiL;
18 + for (int j=threadIdx.x;j<m;j+=blockDim.x)
19 + remainR[j]=multiR;
20 + __syncthreads();
21 + for (int j=7;j>=-2;j--){
22 + float level=-powf(4.0f,j);
23 + if (j==-2){
24 + level=0;
25 + }
26 + for (int k0=0;k0<n;k0+=blockDim.x){
27 + int k=k0+threadIdx.x;
28 + float x1=0,y1=0,z1=0;
29 + if (k<n){
30 + x1=xyz1[i*n*3+k*3+0];
31 + y1=xyz1[i*n*3+k*3+1];
32 + z1=xyz1[i*n*3+k*3+2];
33 + }
34 + float suml=1e-9f;
35 + for (int l0=0;l0<m;l0+=Block){
36 + int lend=min(m,l0+Block)-l0;
37 + for (int l=threadIdx.x;l<lend;l+=blockDim.x){
38 + float x2=xyz2[i*m*3+l0*3+l*3+0];
39 + float y2=xyz2[i*m*3+l0*3+l*3+1];
40 + float z2=xyz2[i*m*3+l0*3+l*3+2];
41 + buf[l*4+0]=x2;
42 + buf[l*4+1]=y2;
43 + buf[l*4+2]=z2;
44 + buf[l*4+3]=remainR[l0+l];
45 + }
46 + __syncthreads();
47 + for (int l=0;l<lend;l++){
48 + float x2=buf[l*4+0];
49 + float y2=buf[l*4+1];
50 + float z2=buf[l*4+2];
51 + float d=level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1));
52 + float w=__expf(d)*buf[l*4+3];
53 + suml+=w;
54 + }
55 + __syncthreads();
56 + }
57 + if (k<n)
58 + ratioL[k]=remainL[k]/suml;
59 + }
60 + /*for (int k=threadIdx.x;k<n;k+=gridDim.x){
61 + float x1=xyz1[i*n*3+k*3+0];
62 + float y1=xyz1[i*n*3+k*3+1];
63 + float z1=xyz1[i*n*3+k*3+2];
64 + float suml=1e-9f;
65 + for (int l=0;l<m;l++){
66 + float x2=xyz2[i*m*3+l*3+0];
67 + float y2=xyz2[i*m*3+l*3+1];
68 + float z2=xyz2[i*m*3+l*3+2];
69 + float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*remainR[l];
70 + suml+=w;
71 + }
72 + ratioL[k]=remainL[k]/suml;
73 + }*/
74 + __syncthreads();
75 + for (int l0=0;l0<m;l0+=blockDim.x){
76 + int l=l0+threadIdx.x;
77 + float x2=0,y2=0,z2=0;
78 + if (l<m){
79 + x2=xyz2[i*m*3+l*3+0];
80 + y2=xyz2[i*m*3+l*3+1];
81 + z2=xyz2[i*m*3+l*3+2];
82 + }
83 + float sumr=0;
84 + for (int k0=0;k0<n;k0+=Block){
85 + int kend=min(n,k0+Block)-k0;
86 + for (int k=threadIdx.x;k<kend;k+=blockDim.x){
87 + buf[k*4+0]=xyz1[i*n*3+k0*3+k*3+0];
88 + buf[k*4+1]=xyz1[i*n*3+k0*3+k*3+1];
89 + buf[k*4+2]=xyz1[i*n*3+k0*3+k*3+2];
90 + buf[k*4+3]=ratioL[k0+k];
91 + }
92 + __syncthreads();
93 + for (int k=0;k<kend;k++){
94 + float x1=buf[k*4+0];
95 + float y1=buf[k*4+1];
96 + float z1=buf[k*4+2];
97 + float w=__expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*buf[k*4+3];
98 + sumr+=w;
99 + }
100 + __syncthreads();
101 + }
102 + if (l<m){
103 + sumr*=remainR[l];
104 + float consumption=fminf(remainR[l]/(sumr+1e-9f),1.0f);
105 + ratioR[l]=consumption*remainR[l];
106 + remainR[l]=fmaxf(0.0f,remainR[l]-sumr);
107 + }
108 + }
109 + /*for (int l=threadIdx.x;l<m;l+=blockDim.x){
110 + float x2=xyz2[i*m*3+l*3+0];
111 + float y2=xyz2[i*m*3+l*3+1];
112 + float z2=xyz2[i*m*3+l*3+2];
113 + float sumr=0;
114 + for (int k=0;k<n;k++){
115 + float x1=xyz1[i*n*3+k*3+0];
116 + float y1=xyz1[i*n*3+k*3+1];
117 + float z1=xyz1[i*n*3+k*3+2];
118 + float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*ratioL[k];
119 + sumr+=w;
120 + }
121 + sumr*=remainR[l];
122 + float consumption=fminf(remainR[l]/(sumr+1e-9f),1.0f);
123 + ratioR[l]=consumption*remainR[l];
124 + remainR[l]=fmaxf(0.0f,remainR[l]-sumr);
125 + }*/
126 + __syncthreads();
127 + for (int k0=0;k0<n;k0+=blockDim.x){
128 + int k=k0+threadIdx.x;
129 + float x1=0,y1=0,z1=0;
130 + if (k<n){
131 + x1=xyz1[i*n*3+k*3+0];
132 + y1=xyz1[i*n*3+k*3+1];
133 + z1=xyz1[i*n*3+k*3+2];
134 + }
135 + float suml=0;
136 + for (int l0=0;l0<m;l0+=Block){
137 + int lend=min(m,l0+Block)-l0;
138 + for (int l=threadIdx.x;l<lend;l+=blockDim.x){
139 + buf[l*4+0]=xyz2[i*m*3+l0*3+l*3+0];
140 + buf[l*4+1]=xyz2[i*m*3+l0*3+l*3+1];
141 + buf[l*4+2]=xyz2[i*m*3+l0*3+l*3+2];
142 + buf[l*4+3]=ratioR[l0+l];
143 + }
144 + __syncthreads();
145 + float rl=ratioL[k];
146 + if (k<n){
147 + for (int l=0;l<lend;l++){
148 + float x2=buf[l*4+0];
149 + float y2=buf[l*4+1];
150 + float z2=buf[l*4+2];
151 + float w=__expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*rl*buf[l*4+3];
152 + match[i*n*m+(l0+l)*n+k]+=w;
153 + suml+=w;
154 + }
155 + }
156 + __syncthreads();
157 + }
158 + if (k<n)
159 + remainL[k]=fmaxf(0.0f,remainL[k]-suml);
160 + }
161 + /*for (int k=threadIdx.x;k<n;k+=blockDim.x){
162 + float x1=xyz1[i*n*3+k*3+0];
163 + float y1=xyz1[i*n*3+k*3+1];
164 + float z1=xyz1[i*n*3+k*3+2];
165 + float suml=0;
166 + for (int l=0;l<m;l++){
167 + float x2=xyz2[i*m*3+l*3+0];
168 + float y2=xyz2[i*m*3+l*3+1];
169 + float z2=xyz2[i*m*3+l*3+2];
170 + float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*ratioL[k]*ratioR[l];
171 + match[i*n*m+l*n+k]+=w;
172 + suml+=w;
173 + }
174 + remainL[k]=fmaxf(0.0f,remainL[k]-suml);
175 + }*/
176 + __syncthreads();
177 + }
178 + }
179 +}
180 +void approxmatchLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,float * match,float * temp){
181 + approxmatch<<<32,512>>>(b,n,m,xyz1,xyz2,match,temp);
182 +}
183 +__global__ void matchcost(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ out){
184 + __shared__ float allsum[512];
185 + const int Block=1024;
186 + __shared__ float buf[Block*3];
187 + for (int i=blockIdx.x;i<b;i+=gridDim.x){
188 + float subsum=0;
189 + for (int k0=0;k0<n;k0+=blockDim.x){
190 + int k=k0+threadIdx.x;
191 + float x1=0,y1=0,z1=0;
192 + if (k<n){
193 + x1=xyz1[i*n*3+k*3+0];
194 + y1=xyz1[i*n*3+k*3+1];
195 + z1=xyz1[i*n*3+k*3+2];
196 + }
197 + for (int l0=0;l0<m;l0+=Block){
198 + int lend=min(m,l0+Block)-l0;
199 + for (int l=threadIdx.x;l<lend*3;l+=blockDim.x)
200 + buf[l]=xyz2[i*m*3+l0*3+l];
201 + __syncthreads();
202 + if (k<n){
203 + for (int l=0;l<lend;l++){
204 + float x2=buf[l*3+0];
205 + float y2=buf[l*3+1];
206 + float z2=buf[l*3+2];
207 + float d=sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1));
208 + subsum+=d*match[i*n*m+(l0+l)*n+k];
209 + }
210 + }
211 + __syncthreads();
212 + }
213 + }
214 + allsum[threadIdx.x]=subsum;
215 + for (int j=1;j<blockDim.x;j<<=1){
216 + __syncthreads();
217 + if ((threadIdx.x&j)==0 && threadIdx.x+j<blockDim.x){
218 + allsum[threadIdx.x]+=allsum[threadIdx.x+j];
219 + }
220 + }
221 + if (threadIdx.x==0)
222 + out[i]=allsum[0];
223 + __syncthreads();
224 + }
225 +}
226 +void matchcostLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * out){
227 + matchcost<<<32,512>>>(b,n,m,xyz1,xyz2,match,out);
228 +}
229 +__global__ void matchcostgrad2(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ grad2){
230 + __shared__ float sum_grad[256*3];
231 + for (int i=blockIdx.x;i<b;i+=gridDim.x){
232 + int kbeg=m*blockIdx.y/gridDim.y;
233 + int kend=m*(blockIdx.y+1)/gridDim.y;
234 + for (int k=kbeg;k<kend;k++){
235 + float x2=xyz2[(i*m+k)*3+0];
236 + float y2=xyz2[(i*m+k)*3+1];
237 + float z2=xyz2[(i*m+k)*3+2];
238 + float subsumx=0,subsumy=0,subsumz=0;
239 + for (int j=threadIdx.x;j<n;j+=blockDim.x){
240 + float x1=x2-xyz1[(i*n+j)*3+0];
241 + float y1=y2-xyz1[(i*n+j)*3+1];
242 + float z1=z2-xyz1[(i*n+j)*3+2];
243 + float d=match[i*n*m+k*n+j]*rsqrtf(fmaxf(x1*x1+y1*y1+z1*z1,1e-20f));
244 + subsumx+=x1*d;
245 + subsumy+=y1*d;
246 + subsumz+=z1*d;
247 + }
248 + sum_grad[threadIdx.x*3+0]=subsumx;
249 + sum_grad[threadIdx.x*3+1]=subsumy;
250 + sum_grad[threadIdx.x*3+2]=subsumz;
251 + for (int j=1;j<blockDim.x;j<<=1){
252 + __syncthreads();
253 + int j1=threadIdx.x;
254 + int j2=threadIdx.x+j;
255 + if ((j1&j)==0 && j2<blockDim.x){
256 + sum_grad[j1*3+0]+=sum_grad[j2*3+0];
257 + sum_grad[j1*3+1]+=sum_grad[j2*3+1];
258 + sum_grad[j1*3+2]+=sum_grad[j2*3+2];
259 + }
260 + }
261 + if (threadIdx.x==0){
262 + grad2[(i*m+k)*3+0]=sum_grad[0];
263 + grad2[(i*m+k)*3+1]=sum_grad[1];
264 + grad2[(i*m+k)*3+2]=sum_grad[2];
265 + }
266 + __syncthreads();
267 + }
268 + }
269 +}
270 +__global__ void matchcostgrad1(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ grad1){
271 + for (int i=blockIdx.x;i<b;i+=gridDim.x){
272 + for (int l=threadIdx.x;l<n;l+=blockDim.x){
273 + float x1=xyz1[i*n*3+l*3+0];
274 + float y1=xyz1[i*n*3+l*3+1];
275 + float z1=xyz1[i*n*3+l*3+2];
276 + float dx=0,dy=0,dz=0;
277 + for (int k=0;k<m;k++){
278 + float x2=xyz2[i*m*3+k*3+0];
279 + float y2=xyz2[i*m*3+k*3+1];
280 + float z2=xyz2[i*m*3+k*3+2];
281 + float d=match[i*n*m+k*n+l]*rsqrtf(fmaxf((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2),1e-20f));
282 + dx+=(x1-x2)*d;
283 + dy+=(y1-y2)*d;
284 + dz+=(z1-z2)*d;
285 + }
286 + grad1[i*n*3+l*3+0]=dx;
287 + grad1[i*n*3+l*3+1]=dy;
288 + grad1[i*n*3+l*3+2]=dz;
289 + }
290 + }
291 +}
292 +void matchcostgradLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2){
293 + matchcostgrad1<<<32,512>>>(b,n,m,xyz1,xyz2,match,grad1);
294 + matchcostgrad2<<<dim3(32,32),256>>>(b,n,m,xyz1,xyz2,match,grad2);
295 +}
296 +
1 +import tensorflow as tf
2 +from tensorflow.python.framework import ops
3 +import os.path as osp
4 +
5 +base_dir = osp.dirname(osp.abspath(__file__))
6 +
7 +approxmatch_module = tf.load_op_library(osp.join(base_dir, 'tf_approxmatch_so.so'))
8 +
9 +
10 +def approx_match(xyz1,xyz2):
11 + '''
12 +input:
13 + xyz1 : batch_size * #dataset_points * 3
14 + xyz2 : batch_size * #query_points * 3
15 +returns:
16 + match : batch_size * #query_points * #dataset_points
17 + '''
18 + return approxmatch_module.approx_match(xyz1,xyz2)
19 +ops.NoGradient('ApproxMatch')
20 +#@tf.RegisterShape('ApproxMatch')
21 +@ops.RegisterShape('ApproxMatch')
22 +def _approx_match_shape(op):
23 + shape1=op.inputs[0].get_shape().with_rank(3)
24 + shape2=op.inputs[1].get_shape().with_rank(3)
25 + return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[1]])]
26 +
27 +def match_cost(xyz1,xyz2,match):
28 + '''
29 +input:
30 + xyz1 : batch_size * #dataset_points * 3
31 + xyz2 : batch_size * #query_points * 3
32 + match : batch_size * #query_points * #dataset_points
33 +returns:
34 + cost : batch_size
35 + '''
36 + return approxmatch_module.match_cost(xyz1,xyz2,match)
37 +#@tf.RegisterShape('MatchCost')
38 +@ops.RegisterShape('MatchCost')
39 +def _match_cost_shape(op):
40 + shape1=op.inputs[0].get_shape().with_rank(3)
41 + shape2=op.inputs[1].get_shape().with_rank(3)
42 + shape3=op.inputs[2].get_shape().with_rank(3)
43 + return [tf.TensorShape([shape1.dims[0]])]
44 +@tf.RegisterGradient('MatchCost')
45 +def _match_cost_grad(op,grad_cost):
46 + xyz1=op.inputs[0]
47 + xyz2=op.inputs[1]
48 + match=op.inputs[2]
49 + grad_1,grad_2=approxmatch_module.match_cost_grad(xyz1,xyz2,match)
50 + return [grad_1*tf.expand_dims(tf.expand_dims(grad_cost,1),2),grad_2*tf.expand_dims(tf.expand_dims(grad_cost,1),2),None]
51 +
52 +if __name__=='__main__':
53 + alpha=0.5
54 + beta=2.0
55 + import bestmatch
56 + import numpy as np
57 + import math
58 + import random
59 + import cv2
60 +
61 + import tf_nndistance
62 +
63 + npoint=100
64 +
65 + with tf.device('/gpu:2'):
66 + pt_in=tf.placeholder(tf.float32,shape=(1,npoint*4,3))
67 + mypoints=tf.Variable(np.random.randn(1,npoint,3).astype('float32'))
68 + match=approx_match(pt_in,mypoints)
69 + loss=tf.reduce_sum(match_cost(pt_in,mypoints,match))
70 + #match=approx_match(mypoints,pt_in)
71 + #loss=tf.reduce_sum(match_cost(mypoints,pt_in,match))
72 + #distf,_,distb,_=tf_nndistance.nn_distance(pt_in,mypoints)
73 + #loss=tf.reduce_sum((distf+1e-9)**0.5)*0.5+tf.reduce_sum((distb+1e-9)**0.5)*0.5
74 + #loss=tf.reduce_max((distf+1e-9)**0.5)*0.5*npoint+tf.reduce_max((distb+1e-9)**0.5)*0.5*npoint
75 +
76 + optimizer=tf.train.GradientDescentOptimizer(1e-4).minimize(loss)
77 + with tf.Session('') as sess:
78 + sess.run(tf.initialize_all_variables())
79 + while True:
80 + meanloss=0
81 + meantrueloss=0
82 + for i in xrange(1001):
83 + #phi=np.random.rand(4*npoint)*math.pi*2
84 + #tpoints=(np.hstack([np.cos(phi)[:,None],np.sin(phi)[:,None],(phi*0)[:,None]])*random.random())[None,:,:]
85 + #tpoints=((np.random.rand(400)-0.5)[:,None]*[0,2,0]+[(random.random()-0.5)*2,0,0]).astype('float32')[None,:,:]
86 + tpoints=np.hstack([np.linspace(-1,1,400)[:,None],(random.random()*2*np.linspace(1,0,400)**2)[:,None],np.zeros((400,1))])[None,:,:]
87 + trainloss,_=sess.run([loss,optimizer],feed_dict={pt_in:tpoints.astype('float32')})
88 + trainloss,trainmatch=sess.run([loss,match],feed_dict={pt_in:tpoints.astype('float32')})
89 + #trainmatch=trainmatch.transpose((0,2,1))
90 + show=np.zeros((400,400,3),dtype='uint8')^255
91 + trainmypoints=sess.run(mypoints)
92 + for i in xrange(len(tpoints[0])):
93 + u=np.random.choice(range(len(trainmypoints[0])),p=trainmatch[0].T[i])
94 + cv2.line(show,
95 + (int(tpoints[0][i,1]*100+200),int(tpoints[0][i,0]*100+200)),
96 + (int(trainmypoints[0][u,1]*100+200),int(trainmypoints[0][u,0]*100+200)),
97 + cv2.cv.CV_RGB(0,255,0))
98 + for x,y,z in tpoints[0]:
99 + cv2.circle(show,(int(y*100+200),int(x*100+200)),2,cv2.cv.CV_RGB(255,0,0))
100 + for x,y,z in trainmypoints[0]:
101 + cv2.circle(show,(int(y*100+200),int(x*100+200)),3,cv2.cv.CV_RGB(0,0,255))
102 + cost=((tpoints[0][:,None,:]-np.repeat(trainmypoints[0][None,:,:],4,axis=1))**2).sum(axis=2)**0.5
103 + #trueloss=bestmatch.bestmatch(cost)[0]
104 + print(trainloss) #,trueloss
105 + cv2.imshow('show',show)
106 + cmd=cv2.waitKey(10)%256
107 + if cmd==ord('q'):
108 + break
1 +#include "tensorflow/core/framework/op.h"
2 +#include "tensorflow/core/framework/op_kernel.h"
3 +REGISTER_OP("NnDistance")
4 + .Input("xyz1: float32")
5 + .Input("xyz2: float32")
6 + .Output("dist1: float32")
7 + .Output("idx1: int32")
8 + .Output("dist2: float32")
9 + .Output("idx2: int32");
10 +REGISTER_OP("NnDistanceGrad")
11 + .Input("xyz1: float32")
12 + .Input("xyz2: float32")
13 + .Input("grad_dist1: float32")
14 + .Input("idx1: int32")
15 + .Input("grad_dist2: float32")
16 + .Input("idx2: int32")
17 + .Output("grad_xyz1: float32")
18 + .Output("grad_xyz2: float32");
19 +using namespace tensorflow;
20 +
21 +static void nnsearch(int b,int n,int m,const float * xyz1,const float * xyz2,float * dist,int * idx){
22 + for (int i=0;i<b;i++){
23 + for (int j=0;j<n;j++){
24 + float x1=xyz1[(i*n+j)*3+0];
25 + float y1=xyz1[(i*n+j)*3+1];
26 + float z1=xyz1[(i*n+j)*3+2];
27 + double best=0;
28 + int besti=0;
29 + for (int k=0;k<m;k++){
30 + float x2=xyz2[(i*m+k)*3+0]-x1;
31 + float y2=xyz2[(i*m+k)*3+1]-y1;
32 + float z2=xyz2[(i*m+k)*3+2]-z1;
33 + double d=x2*x2+y2*y2+z2*z2;
34 + if (k==0 || d<best){
35 + best=d;
36 + besti=k;
37 + }
38 + }
39 + dist[i*n+j]=best;
40 + idx[i*n+j]=besti;
41 + }
42 + }
43 +}
44 +
45 +class NnDistanceOp : public OpKernel{
46 + public:
47 + explicit NnDistanceOp(OpKernelConstruction* context):OpKernel(context){}
48 + void Compute(OpKernelContext * context)override{
49 + const Tensor& xyz1_tensor=context->input(0);
50 + const Tensor& xyz2_tensor=context->input(1);
51 + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz1 be of shape (batch,#points,3)"));
52 + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz1"));
53 + int b=xyz1_tensor.shape().dim_size(0);
54 + int n=xyz1_tensor.shape().dim_size(1);
55 + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz2 be of shape (batch,#points,3)"));
56 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz2"));
57 + int m=xyz2_tensor.shape().dim_size(1);
58 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistance expects xyz1 and xyz2 have same batch size"));
59 + auto xyz1_flat=xyz1_tensor.flat<float>();
60 + const float * xyz1=&xyz1_flat(0);
61 + auto xyz2_flat=xyz2_tensor.flat<float>();
62 + const float * xyz2=&xyz2_flat(0);
63 + Tensor * dist1_tensor=NULL;
64 + Tensor * idx1_tensor=NULL;
65 + Tensor * dist2_tensor=NULL;
66 + Tensor * idx2_tensor=NULL;
67 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n},&dist1_tensor));
68 + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,n},&idx1_tensor));
69 + auto dist1_flat=dist1_tensor->flat<float>();
70 + auto idx1_flat=idx1_tensor->flat<int>();
71 + OP_REQUIRES_OK(context,context->allocate_output(2,TensorShape{b,m},&dist2_tensor));
72 + OP_REQUIRES_OK(context,context->allocate_output(3,TensorShape{b,m},&idx2_tensor));
73 + auto dist2_flat=dist2_tensor->flat<float>();
74 + auto idx2_flat=idx2_tensor->flat<int>();
75 + float * dist1=&(dist1_flat(0));
76 + int * idx1=&(idx1_flat(0));
77 + float * dist2=&(dist2_flat(0));
78 + int * idx2=&(idx2_flat(0));
79 + nnsearch(b,n,m,xyz1,xyz2,dist1,idx1);
80 + nnsearch(b,m,n,xyz2,xyz1,dist2,idx2);
81 + }
82 +};
83 +REGISTER_KERNEL_BUILDER(Name("NnDistance").Device(DEVICE_CPU), NnDistanceOp);
84 +class NnDistanceGradOp : public OpKernel{
85 + public:
86 + explicit NnDistanceGradOp(OpKernelConstruction* context):OpKernel(context){}
87 + void Compute(OpKernelContext * context)override{
88 + const Tensor& xyz1_tensor=context->input(0);
89 + const Tensor& xyz2_tensor=context->input(1);
90 + const Tensor& grad_dist1_tensor=context->input(2);
91 + const Tensor& idx1_tensor=context->input(3);
92 + const Tensor& grad_dist2_tensor=context->input(4);
93 + const Tensor& idx2_tensor=context->input(5);
94 + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz1 be of shape (batch,#points,3)"));
95 + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz1"));
96 + int b=xyz1_tensor.shape().dim_size(0);
97 + int n=xyz1_tensor.shape().dim_size(1);
98 + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz2 be of shape (batch,#points,3)"));
99 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz2"));
100 + int m=xyz2_tensor.shape().dim_size(1);
101 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistanceGrad expects xyz1 and xyz2 have same batch size"));
102 + OP_REQUIRES(context,grad_dist1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires grad_dist1 be of shape(batch,#points)"));
103 + OP_REQUIRES(context,idx1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires idx1 be of shape(batch,#points)"));
104 + OP_REQUIRES(context,grad_dist2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires grad_dist2 be of shape(batch,#points)"));
105 + OP_REQUIRES(context,idx2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires idx2 be of shape(batch,#points)"));
106 + auto xyz1_flat=xyz1_tensor.flat<float>();
107 + const float * xyz1=&xyz1_flat(0);
108 + auto xyz2_flat=xyz2_tensor.flat<float>();
109 + const float * xyz2=&xyz2_flat(0);
110 + auto idx1_flat=idx1_tensor.flat<int>();
111 + const int * idx1=&idx1_flat(0);
112 + auto idx2_flat=idx2_tensor.flat<int>();
113 + const int * idx2=&idx2_flat(0);
114 + auto grad_dist1_flat=grad_dist1_tensor.flat<float>();
115 + const float * grad_dist1=&grad_dist1_flat(0);
116 + auto grad_dist2_flat=grad_dist2_tensor.flat<float>();
117 + const float * grad_dist2=&grad_dist2_flat(0);
118 + Tensor * grad_xyz1_tensor=NULL;
119 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad_xyz1_tensor));
120 + Tensor * grad_xyz2_tensor=NULL;
121 + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad_xyz2_tensor));
122 + auto grad_xyz1_flat=grad_xyz1_tensor->flat<float>();
123 + float * grad_xyz1=&grad_xyz1_flat(0);
124 + auto grad_xyz2_flat=grad_xyz2_tensor->flat<float>();
125 + float * grad_xyz2=&grad_xyz2_flat(0);
126 + for (int i=0;i<b*n*3;i++)
127 + grad_xyz1[i]=0;
128 + for (int i=0;i<b*m*3;i++)
129 + grad_xyz2[i]=0;
130 + for (int i=0;i<b;i++){
131 + for (int j=0;j<n;j++){
132 + float x1=xyz1[(i*n+j)*3+0];
133 + float y1=xyz1[(i*n+j)*3+1];
134 + float z1=xyz1[(i*n+j)*3+2];
135 + int j2=idx1[i*n+j];
136 + float x2=xyz2[(i*m+j2)*3+0];
137 + float y2=xyz2[(i*m+j2)*3+1];
138 + float z2=xyz2[(i*m+j2)*3+2];
139 + float g=grad_dist1[i*n+j]*2;
140 + grad_xyz1[(i*n+j)*3+0]+=g*(x1-x2);
141 + grad_xyz1[(i*n+j)*3+1]+=g*(y1-y2);
142 + grad_xyz1[(i*n+j)*3+2]+=g*(z1-z2);
143 + grad_xyz2[(i*m+j2)*3+0]-=(g*(x1-x2));
144 + grad_xyz2[(i*m+j2)*3+1]-=(g*(y1-y2));
145 + grad_xyz2[(i*m+j2)*3+2]-=(g*(z1-z2));
146 + }
147 + for (int j=0;j<m;j++){
148 + float x1=xyz2[(i*m+j)*3+0];
149 + float y1=xyz2[(i*m+j)*3+1];
150 + float z1=xyz2[(i*m+j)*3+2];
151 + int j2=idx2[i*m+j];
152 + float x2=xyz1[(i*n+j2)*3+0];
153 + float y2=xyz1[(i*n+j2)*3+1];
154 + float z2=xyz1[(i*n+j2)*3+2];
155 + float g=grad_dist2[i*m+j]*2;
156 + grad_xyz2[(i*m+j)*3+0]+=g*(x1-x2);
157 + grad_xyz2[(i*m+j)*3+1]+=g*(y1-y2);
158 + grad_xyz2[(i*m+j)*3+2]+=g*(z1-z2);
159 + grad_xyz1[(i*n+j2)*3+0]-=(g*(x1-x2));
160 + grad_xyz1[(i*n+j2)*3+1]-=(g*(y1-y2));
161 + grad_xyz1[(i*n+j2)*3+2]-=(g*(z1-z2));
162 + }
163 + }
164 + }
165 +};
166 +REGISTER_KERNEL_BUILDER(Name("NnDistanceGrad").Device(DEVICE_CPU), NnDistanceGradOp);
167 +
168 +void NmDistanceKernelLauncher(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i);
169 +class NnDistanceGpuOp : public OpKernel{
170 + public:
171 + explicit NnDistanceGpuOp(OpKernelConstruction* context):OpKernel(context){}
172 + void Compute(OpKernelContext * context)override{
173 + const Tensor& xyz1_tensor=context->input(0);
174 + const Tensor& xyz2_tensor=context->input(1);
175 + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz1 be of shape (batch,#points,3)"));
176 + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz1"));
177 + int b=xyz1_tensor.shape().dim_size(0);
178 + int n=xyz1_tensor.shape().dim_size(1);
179 + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz2 be of shape (batch,#points,3)"));
180 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz2"));
181 + int m=xyz2_tensor.shape().dim_size(1);
182 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistance expects xyz1 and xyz2 have same batch size"));
183 + auto xyz1_flat=xyz1_tensor.flat<float>();
184 + const float * xyz1=&xyz1_flat(0);
185 + auto xyz2_flat=xyz2_tensor.flat<float>();
186 + const float * xyz2=&xyz2_flat(0);
187 + Tensor * dist1_tensor=NULL;
188 + Tensor * idx1_tensor=NULL;
189 + Tensor * dist2_tensor=NULL;
190 + Tensor * idx2_tensor=NULL;
191 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n},&dist1_tensor));
192 + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,n},&idx1_tensor));
193 + auto dist1_flat=dist1_tensor->flat<float>();
194 + auto idx1_flat=idx1_tensor->flat<int>();
195 + OP_REQUIRES_OK(context,context->allocate_output(2,TensorShape{b,m},&dist2_tensor));
196 + OP_REQUIRES_OK(context,context->allocate_output(3,TensorShape{b,m},&idx2_tensor));
197 + auto dist2_flat=dist2_tensor->flat<float>();
198 + auto idx2_flat=idx2_tensor->flat<int>();
199 + float * dist1=&(dist1_flat(0));
200 + int * idx1=&(idx1_flat(0));
201 + float * dist2=&(dist2_flat(0));
202 + int * idx2=&(idx2_flat(0));
203 + NmDistanceKernelLauncher(b,n,xyz1,m,xyz2,dist1,idx1,dist2,idx2);
204 + }
205 +};
206 +REGISTER_KERNEL_BUILDER(Name("NnDistance").Device(DEVICE_GPU), NnDistanceGpuOp);
207 +
208 +void NmDistanceGradKernelLauncher(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2);
209 +class NnDistanceGradGpuOp : public OpKernel{
210 + public:
211 + explicit NnDistanceGradGpuOp(OpKernelConstruction* context):OpKernel(context){}
212 + void Compute(OpKernelContext * context)override{
213 + const Tensor& xyz1_tensor=context->input(0);
214 + const Tensor& xyz2_tensor=context->input(1);
215 + const Tensor& grad_dist1_tensor=context->input(2);
216 + const Tensor& idx1_tensor=context->input(3);
217 + const Tensor& grad_dist2_tensor=context->input(4);
218 + const Tensor& idx2_tensor=context->input(5);
219 + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz1 be of shape (batch,#points,3)"));
220 + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz1"));
221 + int b=xyz1_tensor.shape().dim_size(0);
222 + int n=xyz1_tensor.shape().dim_size(1);
223 + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz2 be of shape (batch,#points,3)"));
224 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz2"));
225 + int m=xyz2_tensor.shape().dim_size(1);
226 + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistanceGrad expects xyz1 and xyz2 have same batch size"));
227 + OP_REQUIRES(context,grad_dist1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires grad_dist1 be of shape(batch,#points)"));
228 + OP_REQUIRES(context,idx1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires idx1 be of shape(batch,#points)"));
229 + OP_REQUIRES(context,grad_dist2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires grad_dist2 be of shape(batch,#points)"));
230 + OP_REQUIRES(context,idx2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires idx2 be of shape(batch,#points)"));
231 + auto xyz1_flat=xyz1_tensor.flat<float>();
232 + const float * xyz1=&xyz1_flat(0);
233 + auto xyz2_flat=xyz2_tensor.flat<float>();
234 + const float * xyz2=&xyz2_flat(0);
235 + auto idx1_flat=idx1_tensor.flat<int>();
236 + const int * idx1=&idx1_flat(0);
237 + auto idx2_flat=idx2_tensor.flat<int>();
238 + const int * idx2=&idx2_flat(0);
239 + auto grad_dist1_flat=grad_dist1_tensor.flat<float>();
240 + const float * grad_dist1=&grad_dist1_flat(0);
241 + auto grad_dist2_flat=grad_dist2_tensor.flat<float>();
242 + const float * grad_dist2=&grad_dist2_flat(0);
243 + Tensor * grad_xyz1_tensor=NULL;
244 + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad_xyz1_tensor));
245 + Tensor * grad_xyz2_tensor=NULL;
246 + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad_xyz2_tensor));
247 + auto grad_xyz1_flat=grad_xyz1_tensor->flat<float>();
248 + float * grad_xyz1=&grad_xyz1_flat(0);
249 + auto grad_xyz2_flat=grad_xyz2_tensor->flat<float>();
250 + float * grad_xyz2=&grad_xyz2_flat(0);
251 + NmDistanceGradKernelLauncher(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_dist2,idx2,grad_xyz1,grad_xyz2);
252 + }
253 +};
254 +REGISTER_KERNEL_BUILDER(Name("NnDistanceGrad").Device(DEVICE_GPU), NnDistanceGradGpuOp);
1 +#if GOOGLE_CUDA
2 +#define EIGEN_USE_GPU
3 +// #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
4 +
5 +__global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){
6 + const int batch=512;
7 + __shared__ float buf[batch*3];
8 + for (int i=blockIdx.x;i<b;i+=gridDim.x){
9 + for (int k2=0;k2<m;k2+=batch){
10 + int end_k=min(m,k2+batch)-k2;
11 + for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){
12 + buf[j]=xyz2[(i*m+k2)*3+j];
13 + }
14 + __syncthreads();
15 + for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
16 + float x1=xyz[(i*n+j)*3+0];
17 + float y1=xyz[(i*n+j)*3+1];
18 + float z1=xyz[(i*n+j)*3+2];
19 + int best_i=0;
20 + float best=0;
21 + int end_ka=end_k-(end_k&3);
22 + if (end_ka==batch){
23 + for (int k=0;k<batch;k+=4){
24 + {
25 + float x2=buf[k*3+0]-x1;
26 + float y2=buf[k*3+1]-y1;
27 + float z2=buf[k*3+2]-z1;
28 + float d=x2*x2+y2*y2+z2*z2;
29 + if (k==0 || d<best){
30 + best=d;
31 + best_i=k+k2;
32 + }
33 + }
34 + {
35 + float x2=buf[k*3+3]-x1;
36 + float y2=buf[k*3+4]-y1;
37 + float z2=buf[k*3+5]-z1;
38 + float d=x2*x2+y2*y2+z2*z2;
39 + if (d<best){
40 + best=d;
41 + best_i=k+k2+1;
42 + }
43 + }
44 + {
45 + float x2=buf[k*3+6]-x1;
46 + float y2=buf[k*3+7]-y1;
47 + float z2=buf[k*3+8]-z1;
48 + float d=x2*x2+y2*y2+z2*z2;
49 + if (d<best){
50 + best=d;
51 + best_i=k+k2+2;
52 + }
53 + }
54 + {
55 + float x2=buf[k*3+9]-x1;
56 + float y2=buf[k*3+10]-y1;
57 + float z2=buf[k*3+11]-z1;
58 + float d=x2*x2+y2*y2+z2*z2;
59 + if (d<best){
60 + best=d;
61 + best_i=k+k2+3;
62 + }
63 + }
64 + }
65 + }else{
66 + for (int k=0;k<end_ka;k+=4){
67 + {
68 + float x2=buf[k*3+0]-x1;
69 + float y2=buf[k*3+1]-y1;
70 + float z2=buf[k*3+2]-z1;
71 + float d=x2*x2+y2*y2+z2*z2;
72 + if (k==0 || d<best){
73 + best=d;
74 + best_i=k+k2;
75 + }
76 + }
77 + {
78 + float x2=buf[k*3+3]-x1;
79 + float y2=buf[k*3+4]-y1;
80 + float z2=buf[k*3+5]-z1;
81 + float d=x2*x2+y2*y2+z2*z2;
82 + if (d<best){
83 + best=d;
84 + best_i=k+k2+1;
85 + }
86 + }
87 + {
88 + float x2=buf[k*3+6]-x1;
89 + float y2=buf[k*3+7]-y1;
90 + float z2=buf[k*3+8]-z1;
91 + float d=x2*x2+y2*y2+z2*z2;
92 + if (d<best){
93 + best=d;
94 + best_i=k+k2+2;
95 + }
96 + }
97 + {
98 + float x2=buf[k*3+9]-x1;
99 + float y2=buf[k*3+10]-y1;
100 + float z2=buf[k*3+11]-z1;
101 + float d=x2*x2+y2*y2+z2*z2;
102 + if (d<best){
103 + best=d;
104 + best_i=k+k2+3;
105 + }
106 + }
107 + }
108 + }
109 + for (int k=end_ka;k<end_k;k++){
110 + float x2=buf[k*3+0]-x1;
111 + float y2=buf[k*3+1]-y1;
112 + float z2=buf[k*3+2]-z1;
113 + float d=x2*x2+y2*y2+z2*z2;
114 + if (k==0 || d<best){
115 + best=d;
116 + best_i=k+k2;
117 + }
118 + }
119 + if (k2==0 || result[(i*n+j)]>best){
120 + result[(i*n+j)]=best;
121 + result_i[(i*n+j)]=best_i;
122 + }
123 + }
124 + __syncthreads();
125 + }
126 + }
127 +}
128 +void NmDistanceKernelLauncher(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i){
129 + NmDistanceKernel<<<dim3(32,16,1),512>>>(b,n,xyz,m,xyz2,result,result_i);
130 + NmDistanceKernel<<<dim3(32,16,1),512>>>(b,m,xyz2,n,xyz,result2,result2_i);
131 +}
132 +__global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){
133 + for (int i=blockIdx.x;i<b;i+=gridDim.x){
134 + for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){
135 + float x1=xyz1[(i*n+j)*3+0];
136 + float y1=xyz1[(i*n+j)*3+1];
137 + float z1=xyz1[(i*n+j)*3+2];
138 + int j2=idx1[i*n+j];
139 + float x2=xyz2[(i*m+j2)*3+0];
140 + float y2=xyz2[(i*m+j2)*3+1];
141 + float z2=xyz2[(i*m+j2)*3+2];
142 + float g=grad_dist1[i*n+j]*2;
143 + atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2));
144 + atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2));
145 + atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2));
146 + atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2)));
147 + atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2)));
148 + atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2)));
149 + }
150 + }
151 +}
152 +void NmDistanceGradKernelLauncher(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2){
153 + cudaMemset(grad_xyz1,0,b*n*3*4);
154 + cudaMemset(grad_xyz2,0,b*m*3*4);
155 + NmDistanceGradKernel<<<dim3(1,16,1),256>>>(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_xyz1,grad_xyz2);
156 + NmDistanceGradKernel<<<dim3(1,16,1),256>>>(b,m,xyz2,n,xyz1,grad_dist2,idx2,grad_xyz2,grad_xyz1);
157 +}
158 +
159 +#endif
1 +import os, sys
2 +import tensorflow as tf
3 +from tensorflow.python.framework import ops
4 +BASE_DIR = os.path.dirname(os.path.abspath(__file__))
5 +nn_distance_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_nndistance_so.so'))
6 +
7 +def nn_distance(xyz1,xyz2):
8 + '''
9 +Computes the distance of nearest neighbors for a pair of point clouds
10 +input: xyz1: (batch_size,#points_1,3) the first point cloud
11 +input: xyz2: (batch_size,#points_2,3) the second point cloud
12 +output: dist1: (batch_size,#point_1) distance from first to second
13 +output: idx1: (batch_size,#point_1) nearest neighbor from first to second
14 +output: dist2: (batch_size,#point_2) distance from second to first
15 +output: idx2: (batch_size,#point_2) nearest neighbor from second to first
16 + '''
17 + return nn_distance_module.nn_distance(xyz1,xyz2)
18 +#@tf.RegisterShape('NnDistance')
19 +#def _nn_distance_shape(op):
20 + #shape1=op.inputs[0].get_shape().with_rank(3)
21 + #shape2=op.inputs[1].get_shape().with_rank(3)
22 + #return [tf.TensorShape([shape1.dims[0],shape1.dims[1]]),tf.TensorShape([shape1.dims[0],shape1.dims[1]]),
23 + #tf.TensorShape([shape2.dims[0],shape2.dims[1]]),tf.TensorShape([shape2.dims[0],shape2.dims[1]])]
24 +@ops.RegisterGradient('NnDistance')
25 +def _nn_distance_grad(op,grad_dist1,grad_idx1,grad_dist2,grad_idx2):
26 + xyz1=op.inputs[0]
27 + xyz2=op.inputs[1]
28 + idx1=op.outputs[1]
29 + idx2=op.outputs[3]
30 + return nn_distance_module.nn_distance_grad(xyz1,xyz2,grad_dist1,idx1,grad_dist2,idx2)
31 +
32 +
33 +if __name__=='__main__':
34 + import numpy as np
35 + import random
36 + import time
37 + from tensorflow.python.ops.gradient_checker import compute_gradient
38 + random.seed(100)
39 + np.random.seed(100)
40 + with tf.Session('') as sess:
41 + xyz1=np.random.randn(32,16384,3).astype('float32')
42 + xyz2=np.random.randn(32,1024,3).astype('float32')
43 + #with tf.device('/gpu:0'):
44 + if True:
45 + inp1=tf.Variable(xyz1)
46 + inp2=tf.constant(xyz2)
47 + reta,retb,retc,retd=nn_distance(inp1,inp2)
48 + loss=tf.reduce_sum(reta)+tf.reduce_sum(retc)
49 + train=tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(loss)
50 + sess.run(tf.initialize_all_variables())
51 + t0=time.time()
52 + t1=t0
53 + best=1e100
54 + for i in xrange(100):
55 + trainloss,_=sess.run([loss,train])
56 + newt=time.time()
57 + best=min(best,newt-t1)
58 + print(i,trainloss,(newt-t0)/(i+1),best)
59 + t1=newt
60 + #print sess.run([inp1,retb,inp2,retd])
61 + #grads=compute_gradient([inp1,inp2],[(16,32,3),(16,32,3)],loss,(1,),[xyz1,xyz2])
62 + #for i,j in grads:
63 + #print i.shape,j.shape,np.mean(np.abs(i-j)),np.mean(np.abs(i)),np.mean(np.abs(j))
64 + #for i in xrange(10):
65 + #t0=time.time()
66 + #a,b,c,d=sess.run([reta,retb,retc,retd],feed_dict={inp1:xyz1,inp2:xyz2})
67 + #print 'time',time.time()-t0
68 + #print a.shape,b.shape,c.shape,d.shape
69 + #print a.dtype,b.dtype,c.dtype,d.dtype
70 + #samples=np.array(random.sample(range(xyz2.shape[1]),100),dtype='int32')
71 + #dist1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).min(axis=-1)
72 + #idx1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1)
73 + #print np.abs(dist1-a[:,samples]).max()
74 + #print np.abs(idx1-b[:,samples]).max()
75 + #dist2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).min(axis=-1)
76 + #idx2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1)
77 + #print np.abs(dist2-c[:,samples]).max()
78 + #print np.abs(idx2-d[:,samples]).max()
79 +
1 +*.png
2 +*.log
...\ No newline at end of file ...\ No newline at end of file
1 +This directory contains code that generates partial point clouds from ShapeNet models. To use it:
2 +1. Install [Blender](https://blender.org/download/).
3 +2. Create a model list. Each line of the model list should be in the format `[synset_id]/[model_id]`.
4 +3. Run `blender -b -P render_depth.py [ShapeNet directory] [model list] [output directory] [num scans per model]` to render the depth images. The images will be stored in OpenEXR format.
5 +4. Run `python3 process_exr.py [model list] [intrinsics file] [output directory] [num scans per model]` to convert the `.exr` files into 16 bit PNG depth images and point clouds in the model's coordinate frame.
...\ No newline at end of file ...\ No newline at end of file
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import Imath
4 +import OpenEXR
5 +import argparse
6 +import array
7 +import numpy as np
8 +import os
9 +from open3d import *
10 +
11 +
12 +def read_exr(exr_path, height, width):
13 + file = OpenEXR.InputFile(exr_path)
14 + depth_arr = array.array('f', file.channel('R', Imath.PixelType(Imath.PixelType.FLOAT)))
15 + depth = np.array(depth_arr).reshape((height, width))
16 + depth[depth < 0] = 0
17 + depth[np.isinf(depth)] = 0
18 + return depth
19 +
20 +
21 +def depth2pcd(depth, intrinsics, pose):
22 + inv_K = np.linalg.inv(intrinsics)
23 + inv_K[2, 2] = -1
24 + depth = np.flipud(depth)
25 + y, x = np.where(depth > 0)
26 + # image coordinates -> camera coordinates
27 + points = np.dot(inv_K, np.stack([x, y, np.ones_like(x)] * depth[y, x], 0))
28 + # camera coordinates -> world coordinates
29 + points = np.dot(pose, np.concatenate([points, np.ones((1, points.shape[1]))], 0)).T[:, :3]
30 + return points
31 +
32 +
33 +if __name__ == '__main__':
34 + parser = argparse.ArgumentParser()
35 + parser.add_argument('list_file')
36 + parser.add_argument('intrinsics_file')
37 + parser.add_argument('output_dir')
38 + parser.add_argument('num_scans', type=int)
39 + args = parser.parse_args()
40 +
41 + with open(args.list_file) as file:
42 + model_list = file.read().splitlines()
43 + intrinsics = np.loadtxt(args.intrinsics_file)
44 + width = int(intrinsics[0, 2] * 2)
45 + height = int(intrinsics[1, 2] * 2)
46 +
47 + for model_id in model_list:
48 + depth_dir = os.path.join(args.output_dir, 'depth', model_id)
49 + pcd_dir = os.path.join(args.output_dir, 'pcd', model_id)
50 + os.makedirs(depth_dir, exist_ok=True)
51 + os.makedirs(pcd_dir, exist_ok=True)
52 + for i in range(args.num_scans):
53 + exr_path = os.path.join(args.output_dir, 'exr', model_id, '%d.exr' % i)
54 + pose_path = os.path.join(args.output_dir, 'pose', model_id, '%d.txt' % i)
55 +
56 + depth = read_exr(exr_path, height, width)
57 + depth_img = Image(np.uint16(depth * 1000))
58 + write_image(os.path.join(depth_dir, '%d.png' % i), depth_img)
59 +
60 + pose = np.loadtxt(pose_path)
61 + points = depth2pcd(depth, intrinsics, pose)
62 + pcd = PointCloud()
63 + pcd.points = Vector3dVector(points)
64 + write_point_cloud(os.path.join(pcd_dir, '%d.pcd' % i), pcd)
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import bpy
4 +import mathutils
5 +import numpy as np
6 +import os
7 +import sys
8 +import time
9 +
10 +# Usage: blender -b -P render_depth.py [ShapeNet directory] [model list] [output directory] [num scans per model]
11 +
12 +
13 +def random_pose():
14 + angle_x = np.random.uniform() * 2 * np.pi
15 + angle_y = np.random.uniform() * 2 * np.pi
16 + angle_z = np.random.uniform() * 2 * np.pi
17 + Rx = np.array([[1, 0, 0],
18 + [0, np.cos(angle_x), -np.sin(angle_x)],
19 + [0, np.sin(angle_x), np.cos(angle_x)]])
20 + Ry = np.array([[np.cos(angle_y), 0, np.sin(angle_y)],
21 + [0, 1, 0],
22 + [-np.sin(angle_y), 0, np.cos(angle_y)]])
23 + Rz = np.array([[np.cos(angle_z), -np.sin(angle_z), 0],
24 + [np.sin(angle_z), np.cos(angle_z), 0],
25 + [0, 0, 1]])
26 + R = np.dot(Rz, np.dot(Ry, Rx))
27 + # Set camera pointing to the origin and 1 unit away from the origin
28 + t = np.expand_dims(R[:, 2], 1)
29 + pose = np.concatenate([np.concatenate([R, t], 1), [[0, 0, 0, 1]]], 0)
30 + return pose
31 +
32 +
33 +def setup_blender(width, height, focal_length):
34 + # camera
35 + camera = bpy.data.objects['Camera']
36 + camera.data.angle = np.arctan(width / 2 / focal_length) * 2
37 +
38 + # render layer
39 + scene = bpy.context.scene
40 + scene.render.filepath = 'buffer'
41 + scene.render.image_settings.color_depth = '16'
42 + scene.render.resolution_percentage = 100
43 + scene.render.resolution_x = width
44 + scene.render.resolution_y = height
45 +
46 + # compositor nodes
47 + scene.use_nodes = True
48 + tree = scene.node_tree
49 + rl = tree.nodes.new('CompositorNodeRLayers')
50 + output = tree.nodes.new('CompositorNodeOutputFile')
51 + output.base_path = ''
52 + output.format.file_format = 'OPEN_EXR'
53 + tree.links.new(rl.outputs['Depth'], output.inputs[0])
54 +
55 + # remove default cube
56 + bpy.data.objects['Cube'].select = True
57 + bpy.ops.object.delete()
58 +
59 + return scene, camera, output
60 +
61 +
62 +if __name__ == '__main__':
63 + model_dir = sys.argv[-4]
64 + list_path = sys.argv[-3]
65 + output_dir = sys.argv[-2]
66 + num_scans = int(sys.argv[-1])
67 +
68 + width = 160
69 + height = 120
70 + focal = 100
71 + scene, camera, output = setup_blender(width, height, focal)
72 + intrinsics = np.array([[focal, 0, width / 2], [0, focal, height / 2], [0, 0, 1]])
73 +
74 + with open(os.path.join(list_path)) as file:
75 + model_list = [line.strip() for line in file]
76 + open('blender.log', 'w+').close()
77 + os.system('rm -rf %s' % output_dir)
78 + os.makedirs(output_dir)
79 + np.savetxt(os.path.join(output_dir, 'intrinsics.txt'), intrinsics, '%f')
80 +
81 + for model_id in model_list:
82 + start = time.time()
83 + exr_dir = os.path.join(output_dir, 'exr', model_id)
84 + pose_dir = os.path.join(output_dir, 'pose', model_id)
85 + os.makedirs(exr_dir)
86 + os.makedirs(pose_dir)
87 +
88 + # Redirect output to log file
89 + old_os_out = os.dup(1)
90 + os.close(1)
91 + os.open('blender.log', os.O_WRONLY)
92 +
93 + # Import mesh model
94 + model_path = os.path.join(model_dir, model_id, 'model.obj')
95 + bpy.ops.import_scene.obj(filepath=model_path)
96 +
97 + # Rotate model by 90 degrees around x-axis (z-up => y-up) to match ShapeNet's coordinates
98 + bpy.ops.transform.rotate(value=-np.pi / 2, axis=(1, 0, 0))
99 +
100 + # Render
101 + for i in range(num_scans):
102 + scene.frame_set(i)
103 + pose = random_pose()
104 + camera.matrix_world = mathutils.Matrix(pose)
105 + output.file_slots[0].path = os.path.join(exr_dir, '#.exr')
106 + bpy.ops.render.render(write_still=True)
107 + np.savetxt(os.path.join(pose_dir, '%d.txt' % i), pose, '%f')
108 +
109 + # Clean up
110 + bpy.ops.object.delete()
111 + for m in bpy.data.meshes:
112 + bpy.data.meshes.remove(m)
113 + for m in bpy.data.materials:
114 + m.user_clear()
115 + bpy.data.materials.remove(m)
116 +
117 + # Show time
118 + os.close(1)
119 + os.dup(old_os_out)
120 + os.close(old_os_out)
121 + print('%s done, time=%.4f sec' % (model_id, time.time() - start))
1 +lmdb>=0.9
2 +msgpack==0.5.6
3 +pyarrow==0.10.0
4 +tensorpack==0.8.9
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import argparse
4 +import csv
5 +import importlib
6 +import models
7 +import numpy as np
8 +import os
9 +import tensorflow as tf
10 +import time
11 +from io_util import read_pcd, save_pcd
12 +from tf_util import chamfer, earth_mover
13 +from visu_util import plot_pcd_three_views
14 +
15 +
16 +def test(args):
17 + inputs = tf.placeholder(tf.float32, (1, None, 3))
18 + npts = tf.placeholder(tf.int32, (1,))
19 + gt = tf.placeholder(tf.float32, (1, args.num_gt_points, 3))
20 + model_module = importlib.import_module('.%s' % args.model_type, 'models')
21 + model = model_module.Model(inputs, npts, gt, tf.constant(1.0))
22 +
23 + output = tf.placeholder(tf.float32, (1, args.num_gt_points, 3))
24 + cd_op = chamfer(output, gt)
25 + emd_op = earth_mover(output, gt)
26 +
27 + config = tf.ConfigProto()
28 + config.gpu_options.allow_growth = True
29 + config.allow_soft_placement = True
30 + sess = tf.Session(config=config)
31 +
32 + saver = tf.train.Saver()
33 + saver.restore(sess, args.checkpoint)
34 +
35 + os.makedirs(args.results_dir, exist_ok=True)
36 + csv_file = open(os.path.join(args.results_dir, 'results.csv'), 'w')
37 + writer = csv.writer(csv_file)
38 + writer.writerow(['id', 'cd', 'emd'])
39 +
40 + with open(args.list_path) as file:
41 + model_list = file.read().splitlines()
42 + total_time = 0
43 + total_cd = 0
44 + total_emd = 0
45 + cd_per_cat = {}
46 + emd_per_cat = {}
47 + for i, model_id in enumerate(model_list):
48 + partial = read_pcd(os.path.join(args.data_dir, 'partial', '%s.pcd' % model_id))
49 + complete = read_pcd(os.path.join(args.data_dir, 'complete', '%s.pcd' % model_id))
50 + start = time.time()
51 + completion = sess.run(model.outputs, feed_dict={inputs: [partial], npts: [partial.shape[0]]})
52 + total_time += time.time() - start
53 + cd, emd = sess.run([cd_op, emd_op], feed_dict={output: completion, gt: [complete]})
54 + total_cd += cd
55 + total_emd += emd
56 + writer.writerow([model_id, cd, emd])
57 +
58 + synset_id, model_id = model_id.split('/')
59 + if not cd_per_cat.get(synset_id):
60 + cd_per_cat[synset_id] = []
61 + if not emd_per_cat.get(synset_id):
62 + emd_per_cat[synset_id] = []
63 + cd_per_cat[synset_id].append(cd)
64 + emd_per_cat[synset_id].append(emd)
65 +
66 + if i % args.plot_freq == 0:
67 + os.makedirs(os.path.join(args.results_dir, 'plots', synset_id), exist_ok=True)
68 + plot_path = os.path.join(args.results_dir, 'plots', synset_id, '%s.png' % model_id)
69 + plot_pcd_three_views(plot_path, [partial, completion[0], complete],
70 + ['input', 'output', 'ground truth'],
71 + 'CD %.4f EMD %.4f' % (cd, emd),
72 + [5, 0.5, 0.5])
73 + if args.save_pcd:
74 + os.makedirs(os.path.join(args.results_dir, 'pcds', synset_id), exist_ok=True)
75 + save_pcd(os.path.join(args.results_dir, 'pcds', '%s.pcd' % model_id), completion[0])
76 + csv_file.close()
77 + sess.close()
78 +
79 + print('Average time: %f' % (total_time / len(model_list)))
80 + print('Average Chamfer distance: %f' % (total_cd / len(model_list)))
81 + print('Average Earth mover distance: %f' % (total_emd / len(model_list)))
82 + print('Chamfer distance per category')
83 + for synset_id in cd_per_cat.keys():
84 + print(synset_id, '%f' % np.mean(cd_per_cat[synset_id]))
85 + print('Earth mover distance per category')
86 + for synset_id in emd_per_cat.keys():
87 + print(synset_id, '%f' % np.mean(emd_per_cat[synset_id]))
88 +
89 +
90 +if __name__ == '__main__':
91 + parser = argparse.ArgumentParser()
92 + parser.add_argument('--list_path', default='data/shapenet/test.list')
93 + parser.add_argument('--data_dir', default='data/shapenet/test')
94 + parser.add_argument('--model_type', default='pcn_emd')
95 + parser.add_argument('--checkpoint', default='data/trained_models/pcn_emd')
96 + parser.add_argument('--results_dir', default='results/shapenet_pcn_emd')
97 + parser.add_argument('--num_gt_points', type=int, default=16384)
98 + parser.add_argument('--plot_freq', type=int, default=100)
99 + parser.add_argument('--save_pcd', action='store_true')
100 + args = parser.parse_args()
101 +
102 + test(args)
1 +{
2 + "cells": [
3 + {
4 + "cell_type": "markdown",
5 + "metadata": {},
6 + "source": [
7 + "valid.lmdb를 대상으로 \n",
8 + "1. cd, emd cost function 값 확인\n",
9 + "2. 각 표본에 대한 결과 출력\n",
10 + "3. pcd값 저장해둬서 어떤 결과인지 직접 확인하자 - (이게 발표자료로서 의미가 있을것 같음)"
11 + ]
12 + },
13 + {
14 + "cell_type": "code",
15 + "execution_count": null,
16 + "metadata": {},
17 + "outputs": [],
18 + "source": [
19 + " num_eval_steps = num_valid // args.batch_size\n",
20 + " total_loss = 0\n",
21 + " total_time = 0\n",
22 + " sess.run(tf.local_variables_initializer())\n",
23 + " for i in range(num_eval_steps):\n",
24 + " start = time.time()\n",
25 + " ids, inputs, npts, gt = next(valid_gen)\n",
26 + " feed_dict = {inputs_pl: inputs,my_inputs_pl:my_inputs, npts_pl: npts, gt_pl: gt, is_training_pl: False}\n",
27 + " loss, _ = sess.run([model.loss, model.update], feed_dict=feed_dict)\n",
28 + " total_loss += loss\n",
29 + " total_time += time.time() - start\n",
30 + " summary = sess.run(valid_summary, feed_dict={is_training_pl: False})\n",
31 + " writer.add_summary(summary, step)\n",
32 + " print(colored('epoch %d step %d loss %.8f - time per batch %.4f' %\n",
33 + " (epoch, step, total_loss / num_eval_steps, total_time / num_eval_steps),\n",
34 + " 'grey', 'on_green'))\n",
35 + " total_time = 0\n",
36 + " if step % args.steps_per_visu == 0:\n",
37 + " all_pcds = sess.run(model.visualize_ops, feed_dict=feed_dict)\n",
38 + " for i in range(0, args.batch_size, args.visu_freq):\n",
39 + " plot_path = os.path.join(args.log_dir, 'plots',\n",
40 + " 'epoch_%d_step_%d_%s.png' % (epoch, step, ids[i]))\n",
41 + " pcds = [x[i] for x in all_pcds]\n",
42 + " plot_pcd_three_views(plot_path, pcds, model.visualize_titles)"
43 + ]
44 + },
45 + {
46 + "cell_type": "code",
47 + "execution_count": 2,
48 + "metadata": {},
49 + "outputs": [
50 + {
51 + "name": "stdout",
52 + "output_type": "stream",
53 + "text": [
54 + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/models/pcn_emd.py:21: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n",
55 + "\n",
56 + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/models/pcn_emd.py:21: The name tf.AUTO_REUSE is deprecated. Please use tf.compat.v1.AUTO_REUSE instead.\n",
57 + "\n",
58 + "WARNING:tensorflow:\n",
59 + "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
60 + "For more information, please see:\n",
61 + " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
62 + " * https://github.com/tensorflow/addons\n",
63 + " * https://github.com/tensorflow/io (for I/O related ops)\n",
64 + "If you depend on functionality not listed there, please file an issue.\n",
65 + "\n",
66 + "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1057: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
67 + "Instructions for updating:\n",
68 + "Please use `layer.__call__` method instead.\n",
69 + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/tf_util.py:71: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead.\n",
70 + "\n",
71 + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/tf_util.py:75: The name tf.metrics.mean is deprecated. Please use tf.compat.v1.metrics.mean instead.\n",
72 + "\n",
73 + "INFO:tensorflow:Restoring parameters from ./log/pcn_emd_car_modify/model-26000\n",
74 + "Average time: 0.049774\n",
75 + "Average Chamfer distance: 0.009361\n",
76 + "Average Earth mover distance: 0.051862\n",
77 + "Chamfer distance per category\n",
78 + "02958343 0.009361\n",
79 + "Earth mover distance per category\n",
80 + "02958343 0.051862\n"
81 + ]
82 + }
83 + ],
84 + "source": [
85 + "# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018\n",
86 + "\n",
87 + "import argparse\n",
88 + "import csv\n",
89 + "import importlib\n",
90 + "import models\n",
91 + "import numpy as np\n",
92 + "import os\n",
93 + "import tensorflow as tf\n",
94 + "import time\n",
95 + "\n",
96 + "from tf_util import chamfer, earth_mover\n",
97 + "from visu_util import plot_pcd_three_views\n",
98 + "from data_util import lmdb_dataflow, get_queued_data, resample_pcd\n",
99 + "\n",
100 + "##################\n",
101 + "#from io_util import read_pcd, save_pcd 이부분 그냥 포함시켜버렸다...ㅎㅎ\n",
102 + "import numpy as np\n",
103 + "#from open3d import *\n",
104 + "import open3d as o3d\n",
105 + "\n",
106 + "\n",
107 + "def read_pcd(filename):\n",
108 + " pcd = o3d.io.read_point_cloud(filename)\n",
109 + " return np.array(pcd.points)\n",
110 + "\n",
111 + "\n",
112 + "def save_pcd(filename, points):\n",
113 + " pcd = o3d.geometry.PointCloud()\n",
114 + " pcd.points = o3d.utility.Vector3dVector(points)\n",
115 + " o3d.io.write_point_cloud(filename, pcd)\n",
116 + "##################\n",
117 + "\n",
118 + "list_path ='../../data/shapenet/car_test.list' \n",
119 + "data_dir = '../../data/shapenet/test'\n",
120 + "model_type = 'pcn_emd'\n",
121 + "checkpoint = './log/pcn_emd_car_modify'\n",
122 + "results_dir ='results/shapenet_pcn_emd_car_modify'\n",
123 + "num_gt_points = 16384\n",
124 + "plot_freq = 1\n",
125 + "_save_pcd = True\n",
126 + "lmdb_valid = ''\n",
127 + "num_input_points=3000\n",
128 + "\n",
129 + "def test():\n",
130 + " inputs = tf.placeholder(tf.float32, (1, None, 3))\n",
131 + " my_inputs = tf.placeholder(tf.float32, (1, None, 3))\n",
132 + " npts = tf.placeholder(tf.int32, (1,))\n",
133 + " gt = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n",
134 + " model_module = importlib.import_module('.%s' % model_type, 'models')\n",
135 + " model = model_module.Model(inputs,my_inputs, npts, gt, tf.constant(1.0))\n",
136 + "\n",
137 + " output = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n",
138 + " cd_op = chamfer(output, gt)\n",
139 + " emd_op = earth_mover(output, gt)\n",
140 + "\n",
141 + " #####\n",
142 + " df_valid, num_valid = lmdb_dataflow(\n",
143 + " lmdb_valid, 1, num_input_points, num_gt_points, is_training=False)\n",
144 + " valid_gen = df_valid.get_data()\n",
145 + " #####\n",
146 + " \n",
147 + " config = tf.ConfigProto()\n",
148 + " config.gpu_options.allow_growth = True\n",
149 + " config.allow_soft_placement = True\n",
150 + " sess = tf.Session(config=config)\n",
151 + "\n",
152 + " saver = tf.train.Saver()\n",
153 + " saver.restore(sess, tf.train.latest_checkpoint(checkpoint))\n",
154 + "\n",
155 + " os.makedirs(results_dir, exist_ok=True)\n",
156 + " csv_file = open(os.path.join(results_dir, 'results.csv'), 'w') # 각 항목별로 cd, emd 구해줌.\n",
157 + " writer = csv.writer(csv_file)\n",
158 + " writer.writerow(['id', 'cd', 'emd'])\n",
159 + "\n",
160 + " with open(list_path) as file:\n",
161 + " model_list = file.read().splitlines()\n",
162 + " total_time = 0\n",
163 + " total_cd = 0\n",
164 + " total_emd = 0\n",
165 + " cd_per_cat = {}\n",
166 + " emd_per_cat = {}\n",
167 + " for i, model_id in enumerate(model_list):\n",
168 + " partial = read_pcd(os.path.join(data_dir, 'partial', '%s.pcd' % model_id))\n",
169 + " complete = read_pcd(os.path.join(data_dir, 'complete', '%s.pcd' % model_id))\n",
170 + " start = time.time()\n",
171 + " completion = sess.run(model.outputs, feed_dict={inputs: [partial],my_inputs:[partial], npts: [partial.shape[0]]})\n",
172 + " total_time += time.time() - start\n",
173 + " cd, emd = sess.run([cd_op, emd_op], feed_dict={output: completion, gt: [complete]})\n",
174 + " total_cd += cd\n",
175 + " total_emd += emd\n",
176 + " writer.writerow([model_id, cd, emd]) #항목별 cd,emd 써줌\n",
177 + "\n",
178 + " # 카테고리별 cd,emd 얻음\n",
179 + " synset_id, model_id = model_id.split('/')\n",
180 + " if not cd_per_cat.get(synset_id):\n",
181 + " cd_per_cat[synset_id] = []\n",
182 + " if not emd_per_cat.get(synset_id):\n",
183 + " emd_per_cat[synset_id] = []\n",
184 + " cd_per_cat[synset_id].append(cd)\n",
185 + " emd_per_cat[synset_id].append(emd)\n",
186 + " \n",
187 + " # 3가지 view에서 모델 input,gt,output보여줌.\n",
188 + " if i % plot_freq == 0:\n",
189 + " os.makedirs(os.path.join(results_dir, 'plots', synset_id), exist_ok=True)\n",
190 + " plot_path = os.path.join(results_dir, 'plots', synset_id, '%s.png' % model_id)\n",
191 + " plot_pcd_three_views(plot_path, [partial, completion[0], complete],\n",
192 + " ['input', 'output', 'ground truth'],\n",
193 + " 'CD %.4f EMD %.4f' % (cd, emd),\n",
194 + " [5, 0.5, 0.5])\n",
195 + " if _save_pcd:\n",
196 + " os.makedirs(os.path.join(results_dir, 'pcds', synset_id), exist_ok=True)\n",
197 + " save_pcd(os.path.join(results_dir, 'pcds', '%s.pcd' % model_id), completion[0])\n",
198 + " csv_file.close()\n",
199 + " sess.close()\n",
200 + "\n",
201 + " print('Average time: %f' % (total_time / len(model_list)))\n",
202 + " print('Average Chamfer distance: %f' % (total_cd / len(model_list)))\n",
203 + " print('Average Earth mover distance: %f' % (total_emd / len(model_list)))\n",
204 + " print('Chamfer distance per category')\n",
205 + " for synset_id in cd_per_cat.keys():\n",
206 + " print(synset_id, '%f' % np.mean(cd_per_cat[synset_id]))\n",
207 + " print('Earth mover distance per category')\n",
208 + " for synset_id in emd_per_cat.keys():\n",
209 + " print(synset_id, '%f' % np.mean(emd_per_cat[synset_id]))\n",
210 + "'''\n",
211 + "\n",
212 + "if __name__ == '__main__':\n",
213 + " parser = argparse.ArgumentParser()\n",
214 + " parser.add_argument('--list_path', default='data/shapenet/test.list')\n",
215 + " parser.add_argument('--data_dir', default='data/shapenet/test')\n",
216 + " parser.add_argument('--model_type', default='pcn_emd')\n",
217 + " parser.add_argument('--checkpoint', default='data/trained_models/pcn_emd')\n",
218 + " parser.add_argument('--results_dir', default='results/shapenet_pcn_emd')\n",
219 + " parser.add_argument('--num_gt_points', type=int, default=16384)\n",
220 + " parser.add_argument('--plot_freq', type=int, default=100)\n",
221 + " parser.add_argument('--save_pcd', action='store_true')\n",
222 + " args = parser.parse_args()\n",
223 + "\n",
224 + " test(args)\n",
225 + "'''\n",
226 + "test()"
227 + ]
228 + },
229 + {
230 + "cell_type": "code",
231 + "execution_count": 9,
232 + "metadata": {},
233 + "outputs": [
234 + {
235 + "ename": "TypeError",
236 + "evalue": "read_point_cloud(): incompatible function arguments. The following argument types are supported:\n 1. (filename: str, format: str = 'auto', remove_nan_points: bool = True, remove_infinite_points: bool = True, print_progress: bool = False) -> open3d.open3d.geometry.PointCloud\n\nInvoked with: ",
237 + "output_type": "error",
238 + "traceback": [
239 + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
240 + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
241 + "\u001b[0;32m<ipython-input-9-77d1fb1ef881>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mopen3d\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_point_cloud\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
242 + "\u001b[0;31mTypeError\u001b[0m: read_point_cloud(): incompatible function arguments. The following argument types are supported:\n 1. (filename: str, format: str = 'auto', remove_nan_points: bool = True, remove_infinite_points: bool = True, print_progress: bool = False) -> open3d.open3d.geometry.PointCloud\n\nInvoked with: "
243 + ]
244 + }
245 + ],
246 + "source": [
247 + "from open3d import *\n",
248 + "io.read_point_cloud()"
249 + ]
250 + }
251 + ],
252 + "metadata": {
253 + "kernelspec": {
254 + "display_name": "Python 3",
255 + "language": "python",
256 + "name": "python3"
257 + },
258 + "language_info": {
259 + "codemirror_mode": {
260 + "name": "ipython",
261 + "version": 3
262 + },
263 + "file_extension": ".py",
264 + "mimetype": "text/x-python",
265 + "name": "python",
266 + "nbconvert_exporter": "python",
267 + "pygments_lexer": "ipython3",
268 + "version": "3.6.9"
269 + }
270 + },
271 + "nbformat": 4,
272 + "nbformat_minor": 4
273 +}
1 +{
2 + "cells": [
3 + {
4 + "cell_type": "code",
5 + "execution_count": 15,
6 + "metadata": {},
7 + "outputs": [
8 + {
9 + "name": "stdout",
10 + "output_type": "stream",
11 + "text": [
12 + "\u001b[32m[0603 12:20:31 @format.py:92]\u001b[0m Found 100 entries in ../../../data/shapenet-car/valid.lmdb\n",
13 + "INFO:tensorflow:Restoring parameters from ./log/pcn_addbeta_lr/model-81500\n",
14 + "Average Chamfer distance: 0.009029\n",
15 + "Average Earth mover distance: 0.053403\n"
16 + ]
17 + }
18 + ],
19 + "source": [
20 + "# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018\n",
21 + "\n",
22 + "import argparse\n",
23 + "import csv\n",
24 + "import importlib\n",
25 + "import models\n",
26 + "import numpy as np\n",
27 + "import os\n",
28 + "import tensorflow as tf\n",
29 + "import time\n",
30 + "\n",
31 + "from tf_util import chamfer, earth_mover\n",
32 + "from visu_util import plot_pcd_three_views\n",
33 + "from data_util import lmdb_dataflow, get_queued_data\n",
34 + "\n",
35 + "##################\n",
36 + "#from io_util import read_pcd, save_pcd 이부분 그냥 포함시켜버렸다...ㅎㅎ\n",
37 + "import numpy as np\n",
38 + "#from open3d import *\n",
39 + "import open3d as o3d\n",
40 + "\n",
41 + "\n",
42 + "def read_pcd(filename):\n",
43 + " pcd = o3d.io.read_point_cloud(filename)\n",
44 + " return np.array(pcd.points)\n",
45 + "\n",
46 + "\n",
47 + "def save_pcd(filename, points):\n",
48 + " pcd = o3d.geometry.PointCloud()\n",
49 + " pcd.points = o3d.utility.Vector3dVector(points)\n",
50 + " o3d.io.write_point_cloud(filename, pcd)\n",
51 + "##################\n",
52 + "\n",
53 + "#list_path ='../../data/shapenet/car_test.list' \n",
54 + "#data_dir = '../../data/shapenet/test'\n",
55 + "model_type = 'pcn_emd'\n",
56 + "checkpoint = './log/pcn_addbeta_lr'\n",
57 + "results_dir ='results/pcn_addbeta_lr'\n",
58 + "num_gt_points = 16384\n",
59 + "plot_freq = 1\n",
60 + "_save_pcd = True\n",
61 + "lmdb_valid='../../../data/shapenet-car/valid.lmdb'\n",
62 + "num_input_points=3000\n",
63 + "\n",
64 + "def test():\n",
65 + " inputs = tf.placeholder(tf.float32, (1, None, 3))\n",
66 + " my_inputs = tf.placeholder(tf.float32, (1, None, 3))\n",
67 + " npts = tf.placeholder(tf.int32, (1,))\n",
68 + " gt = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n",
69 + " model_module = importlib.import_module('.%s' % model_type, 'models')\n",
70 + " model = model_module.Model(inputs,my_inputs, npts, gt, tf.constant(1.0),tf.constant(1.0))\n",
71 + "\n",
72 + " output = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n",
73 + " cd_op = chamfer(output, gt)\n",
74 + " emd_op = earth_mover(output, gt)\n",
75 + "\n",
76 + " ###\n",
77 + " df_valid, num_valid = lmdb_dataflow(\n",
78 + " lmdb_valid, 1, num_input_points, num_gt_points, is_training=False)\n",
79 + " valid_gen = df_valid.get_data()\n",
80 + " \n",
81 + " config = tf.ConfigProto()\n",
82 + " config.gpu_options.allow_growth = True\n",
83 + " config.allow_soft_placement = True\n",
84 + " sess = tf.Session(config=config)\n",
85 + "\n",
86 + " saver = tf.train.Saver()\n",
87 + " saver.restore(sess, tf.train.latest_checkpoint(checkpoint))\n",
88 + "\n",
89 + " os.makedirs(results_dir, exist_ok=True)\n",
90 + " csv_file = open(os.path.join(results_dir, 'results.csv'), 'w') # 각 항목별로 cd, emd 구해줌.\n",
91 + " writer = csv.writer(csv_file)\n",
92 + " writer.writerow(['id', 'cd', 'emd'])\n",
93 + "\n",
94 + " ###\n",
95 + " total_time = 0\n",
96 + " total_cd = 0\n",
97 + " total_emd = 0\n",
98 + " for i in range(num_valid):\n",
99 + " ids,iinputs,inpts,igt = next(valid_gen)\n",
100 + "\n",
101 + " completion = sess.run(model.outputs, feed_dict={inputs:iinputs, my_inputs:iinputs, npts:inpts})\n",
102 + " cd,emd = sess.run([cd_op,emd_op],feed_dict={output: completion, gt:igt})\n",
103 + " total_cd +=cd\n",
104 + " total_emd +=emd\n",
105 + " writer.writerow([ids,cd,emd]) #항목별 cd,emd\n",
106 + " \n",
107 + " if i % plot_freq == 0:\n",
108 + " os.makedirs(os.path.join(results_dir, 'plots'), exist_ok=True)\n",
109 + " plot_path = os.path.join(results_dir, 'plots', '%s.png' % ids)\n",
110 + "# print(iinputs.shape,completion[0].shape,igt.shape)###\n",
111 + "\n",
112 + " plot_pcd_three_views(plot_path, [iinputs.reshape(iinputs.shape[1],iinputs.shape[2]), completion[0], igt.reshape(igt.shape[1],igt.shape[2])],\n",
113 + " ['input', 'output', 'ground truth'],\n",
114 + " 'CD %.4f EMD %.4f' % (cd, emd),\n",
115 + " [5, 0.5, 0.5])\n",
116 + " if _save_pcd:\n",
117 + " os.makedirs(os.path.join(results_dir, 'pcds'), exist_ok=True)\n",
118 + " save_pcd(os.path.join(results_dir, 'pcds', '%s.pcd' % ids), completion[0])\n",
119 + " csv_file.close()\n",
120 + " sess.close()\n",
121 + "\n",
122 + " print('Average Chamfer distance: %f' % (total_cd / num_valid))\n",
123 + " print('Average Earth mover distance: %f' % (total_emd / num_valid))\n",
124 + "######################\n",
125 + "''' \n",
126 + " with open(list_path) as file:\n",
127 + " model_list = file.read().splitlines()\n",
128 + " total_time = 0\n",
129 + " total_cd = 0\n",
130 + " total_emd = 0\n",
131 + " cd_per_cat = {}\n",
132 + " emd_per_cat = {}\n",
133 + " for i, model_id in enumerate(model_list):\n",
134 + " partial = read_pcd(os.path.join(data_dir, 'partial', '%s.pcd' % model_id))\n",
135 + " complete = read_pcd(os.path.join(data_dir, 'complete', '%s.pcd' % model_id))\n",
136 + " start = time.time()\n",
137 + " completion = sess.run(model.outputs, feed_dict={inputs: [partial],my_inputs:[partial], npts: [partial.shape[0]]})\n",
138 + " total_time += time.time() - start\n",
139 + " cd, emd = sess.run([cd_op, emd_op], feed_dict={output: completion, gt: [complete]})\n",
140 + " total_cd += cd\n",
141 + " total_emd += emd\n",
142 + " writer.writerow([model_id, cd, emd]) #항목별 cd,emd 써줌\n",
143 + "\n",
144 + " # 카테고리별 cd,emd 얻음\n",
145 + " synset_id, model_id = model_id.split('/')\n",
146 + " if not cd_per_cat.get(synset_id):\n",
147 + " cd_per_cat[synset_id] = []\n",
148 + " if not emd_per_cat.get(synset_id):\n",
149 + " emd_per_cat[synset_id] = []\n",
150 + " cd_per_cat[synset_id].append(cd)\n",
151 + " emd_per_cat[synset_id].append(emd)\n",
152 + " \n",
153 + " # 3가지 view에서 모델 input,gt,output보여줌.\n",
154 + " if i % plot_freq == 0:\n",
155 + " os.makedirs(os.path.join(results_dir, 'plots', synset_id), exist_ok=True)\n",
156 + " plot_path = os.path.join(results_dir, 'plots', synset_id, '%s.png' % model_id)\n",
157 + " plot_pcd_three_views(plot_path, [partial, completion[0], complete],\n",
158 + " ['input', 'output', 'ground truth'],\n",
159 + " 'CD %.4f EMD %.4f' % (cd, emd),\n",
160 + " [5, 0.5, 0.5])\n",
161 + " if _save_pcd:\n",
162 + " os.makedirs(os.path.join(results_dir, 'pcds', synset_id), exist_ok=True)\n",
163 + " save_pcd(os.path.join(results_dir, 'pcds', '%s.pcd' % model_id), completion[0])\n",
164 + " csv_file.close()\n",
165 + " sess.close()\n",
166 + "\n",
167 + " print('Average time: %f' % (total_time / len(model_list)))\n",
168 + " print('Average Chamfer distance: %f' % (total_cd / len(model_list)))\n",
169 + " print('Average Earth mover distance: %f' % (total_emd / len(model_list)))\n",
170 + " print('Chamfer distance per category')\n",
171 + " for synset_id in cd_per_cat.keys():\n",
172 + " print(synset_id, '%f' % np.mean(cd_per_cat[synset_id]))\n",
173 + " print('Earth mover distance per category')\n",
174 + " for synset_id in emd_per_cat.keys():\n",
175 + " print(synset_id, '%f' % np.mean(emd_per_cat[synset_id]))\n",
176 + " '''\n",
177 + "'''\n",
178 + "\n",
179 + "if __name__ == '__main__':\n",
180 + " parser = argparse.ArgumentParser()\n",
181 + " parser.add_argument('--list_path', default='data/shapenet/test.list')\n",
182 + " parser.add_argument('--data_dir', default='data/shapenet/test')\n",
183 + " parser.add_argument('--model_type', default='pcn_emd')\n",
184 + " parser.add_argument('--checkpoint', default='data/trained_models/pcn_emd')\n",
185 + " parser.add_argument('--results_dir', default='results/shapenet_pcn_emd')\n",
186 + " parser.add_argument('--num_gt_points', type=int, default=16384)\n",
187 + " parser.add_argument('--plot_freq', type=int, default=100)\n",
188 + " parser.add_argument('--save_pcd', action='store_true')\n",
189 + " args = parser.parse_args()\n",
190 + "\n",
191 + " test(args)\n",
192 + "'''\n",
193 + "test()"
194 + ]
195 + },
196 + {
197 + "cell_type": "code",
198 + "execution_count": null,
199 + "metadata": {},
200 + "outputs": [],
201 + "source": []
202 + }
203 + ],
204 + "metadata": {
205 + "kernelspec": {
206 + "display_name": "Python 3",
207 + "language": "python",
208 + "name": "python3"
209 + },
210 + "language_info": {
211 + "codemirror_mode": {
212 + "name": "ipython",
213 + "version": 3
214 + },
215 + "file_extension": ".py",
216 + "mimetype": "text/x-python",
217 + "name": "python",
218 + "nbconvert_exporter": "python",
219 + "pygments_lexer": "ipython3",
220 + "version": "3.6.9"
221 + }
222 + },
223 + "nbformat": 4,
224 + "nbformat_minor": 4
225 +}
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +import tensorflow as tf
4 +from pc_distance import tf_nndistance, tf_approxmatch
5 +
6 +
7 +def mlp(features, layer_dims, bn=None, bn_params=None):
8 + for i, num_outputs in enumerate(layer_dims[:-1]):
9 + features = tf.contrib.layers.fully_connected(
10 + features, num_outputs,
11 + normalizer_fn=bn,
12 + normalizer_params=bn_params,
13 + scope='fc_%d' % i)
14 + outputs = tf.contrib.layers.fully_connected(
15 + features, layer_dims[-1],
16 + activation_fn=None,
17 + scope='fc_%d' % (len(layer_dims) - 1))
18 + return outputs
19 +
20 +
21 +def mlp_conv(inputs, layer_dims, bn=None, bn_params=None):
22 + for i, num_out_channel in enumerate(layer_dims[:-1]):
23 + inputs = tf.contrib.layers.conv1d(
24 + inputs, num_out_channel,
25 + kernel_size=1,
26 + normalizer_fn=bn,
27 + normalizer_params=bn_params,
28 + scope='conv_%d' % i)
29 + outputs = tf.contrib.layers.conv1d(
30 + inputs, layer_dims[-1],
31 + kernel_size=1,
32 + activation_fn=None,
33 + scope='conv_%d' % (len(layer_dims) - 1))
34 + return outputs
35 +
36 +
37 +def point_maxpool(inputs, npts, keepdims=False):
38 + outputs = [tf.reduce_max(f, axis=1, keepdims=keepdims)
39 + for f in tf.split(inputs, npts, axis=1)]
40 + return tf.concat(outputs, axis=0)
41 +
42 +
43 +def point_unpool(inputs, npts):
44 + inputs = tf.split(inputs, inputs.shape[0], axis=0)
45 + outputs = [tf.tile(f, [1, npts[i], 1]) for i,f in enumerate(inputs)]
46 + return tf.concat(outputs, axis=1)
47 +
48 +
49 +def chamfer(pcd1, pcd2):
50 + dist1, _, dist2, _ = tf_nndistance.nn_distance(pcd1, pcd2)
51 + dist1 = tf.reduce_mean(tf.sqrt(dist1))
52 + dist2 = tf.reduce_mean(tf.sqrt(dist2))
53 + return (dist1 + dist2) / 2
54 +
55 +def my_chamfer(pcd1, pcd2):
56 + dist1, _, dist2, _ = tf_nndistance.nn_distance(pcd1, pcd2)
57 + dist1 = tf.reduce_mean(tf.sqrt(dist1))
58 + #dist2 = tf.reduce_mean(tf.sqrt(dist2))
59 + return dist1
60 +
61 +
62 +def earth_mover(pcd1, pcd2):
63 + assert pcd1.shape[1] == pcd2.shape[1]
64 + num_points = tf.cast(pcd1.shape[1], tf.float32)
65 + match = tf_approxmatch.approx_match(pcd1, pcd2)
66 + cost = tf_approxmatch.match_cost(pcd1, pcd2, match)
67 + return tf.reduce_mean(cost / num_points)
68 +
69 +
70 +def add_train_summary(name, value):
71 + tf.summary.scalar(name, value, collections=['train_summary'])
72 +
73 +
74 +def add_valid_summary(name, value):
75 + avg, update = tf.metrics.mean(value)
76 + tf.summary.scalar(name, avg, collections=['valid_summary'])
77 + return update
1 +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018
2 +
3 +from matplotlib import pyplot as plt
4 +from mpl_toolkits.mplot3d import Axes3D
5 +
6 +
7 +def plot_pcd_three_views(filename, pcds, titles, suptitle='', sizes=None, cmap='Reds', zdir='y',
8 + xlim=(-0.3, 0.3), ylim=(-0.3, 0.3), zlim=(-0.3, 0.3)):
9 + if sizes is None:
10 + sizes = [0.5 for i in range(len(pcds))]
11 + fig = plt.figure(figsize=(len(pcds) * 3, 9))
12 + for i in range(3):
13 + elev = 30
14 + azim = -45 + 90 * i
15 + for j, (pcd, size) in enumerate(zip(pcds, sizes)):
16 + color = pcd[:, 0]
17 + ax = fig.add_subplot(3, len(pcds), i * len(pcds) + j + 1, projection='3d')
18 + ax.view_init(elev, azim)
19 + ax.scatter(pcd[:, 0], pcd[:, 1], pcd[:, 2], zdir=zdir, c=color, s=size, cmap=cmap, vmin=-1, vmax=0.5)
20 + ax.set_title(titles[j])
21 + ax.set_axis_off()
22 + ax.set_xlim(xlim)
23 + ax.set_ylim(ylim)
24 + ax.set_zlim(zlim)
25 + plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.9, wspace=0.1, hspace=0.1)
26 + plt.suptitle(suptitle)
27 + fig.savefig(filename)
28 + plt.close(fig)