Showing
26 changed files
with
2526 additions
and
0 deletions
code/pcn_modify/pcn/data_util.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import numpy as np | ||
4 | +import tensorflow as tf | ||
5 | +from tensorpack import dataflow | ||
6 | + | ||
7 | + | ||
8 | +def resample_pcd(pcd, n): | ||
9 | + """Drop or duplicate points so that pcd has exactly n points""" | ||
10 | + idx = np.random.permutation(pcd.shape[0]) | ||
11 | + if idx.shape[0] < n: | ||
12 | + idx = np.concatenate([idx, np.random.randint(pcd.shape[0], size=n-pcd.shape[0])]) | ||
13 | + return pcd[idx[:n]] | ||
14 | + | ||
15 | + | ||
16 | +class PreprocessData(dataflow.ProxyDataFlow): | ||
17 | + def __init__(self, ds, input_size, output_size): | ||
18 | + super(PreprocessData, self).__init__(ds) | ||
19 | + self.input_size = input_size | ||
20 | + self.output_size = output_size | ||
21 | + | ||
22 | + def get_data(self): | ||
23 | + for id, input, gt in self.ds.get_data(): | ||
24 | + input = resample_pcd(input, self.input_size) | ||
25 | + gt = resample_pcd(gt, self.output_size) | ||
26 | + yield id, input, gt | ||
27 | + | ||
28 | + | ||
29 | +class BatchData(dataflow.ProxyDataFlow): | ||
30 | + def __init__(self, ds, batch_size, input_size, gt_size, remainder=False, use_list=False): | ||
31 | + super(BatchData, self).__init__(ds) | ||
32 | + self.batch_size = batch_size | ||
33 | + self.input_size = input_size | ||
34 | + self.gt_size = gt_size | ||
35 | + self.remainder = remainder | ||
36 | + self.use_list = use_list | ||
37 | + | ||
38 | + def __len__(self): | ||
39 | + ds_size = len(self.ds) | ||
40 | + div = ds_size // self.batch_size | ||
41 | + rem = ds_size % self.batch_size | ||
42 | + if rem == 0: | ||
43 | + return div | ||
44 | + return div + int(self.remainder) | ||
45 | + | ||
46 | + def __iter__(self): | ||
47 | + holder = [] | ||
48 | + for data in self.ds: | ||
49 | + holder.append(data) | ||
50 | + if len(holder) == self.batch_size: | ||
51 | + yield self._aggregate_batch(holder, self.use_list) | ||
52 | + del holder[:] | ||
53 | + if self.remainder and len(holder) > 0: | ||
54 | + yield self._aggregate_batch(holder, self.use_list) | ||
55 | + | ||
56 | + def _aggregate_batch(self, data_holder, use_list=False): | ||
57 | + ''' Concatenate input points along the 0-th dimension | ||
58 | + Stack all other data along the 0-th dimension | ||
59 | + ''' | ||
60 | + ids = np.stack([x[0] for x in data_holder]) | ||
61 | + inputs = [resample_pcd(x[1], self.input_size) if x[1].shape[0] > self.input_size else x[1] | ||
62 | + for x in data_holder] | ||
63 | + inputs = np.expand_dims(np.concatenate([x for x in inputs]), 0).astype(np.float32) | ||
64 | + npts = np.stack([x[1].shape[0] if x[1].shape[0] < self.input_size else self.input_size | ||
65 | + for x in data_holder]).astype(np.int32) | ||
66 | + gts = np.stack([resample_pcd(x[2], self.gt_size) for x in data_holder]).astype(np.float32) | ||
67 | + return ids, inputs, npts, gts | ||
68 | + | ||
69 | + | ||
70 | +def lmdb_dataflow(lmdb_path, batch_size, input_size, output_size, is_training, test_speed=False): | ||
71 | + df = dataflow.LMDBSerializer.load(lmdb_path, shuffle=False) | ||
72 | + size = df.size() | ||
73 | + if is_training: | ||
74 | + df = dataflow.LocallyShuffleData(df, buffer_size=2000) | ||
75 | + df = dataflow.PrefetchData(df, nr_prefetch=500, nr_proc=1) | ||
76 | + df = BatchData(df, batch_size, input_size, output_size) | ||
77 | + if is_training: | ||
78 | + df = dataflow.PrefetchDataZMQ(df, nr_proc=8) | ||
79 | + df = dataflow.RepeatedData(df, -1) | ||
80 | + if test_speed: | ||
81 | + dataflow.TestDataSpeed(df, size=1000).start() | ||
82 | + df.reset_state() | ||
83 | + return df, size | ||
84 | + | ||
85 | + | ||
86 | +def get_queued_data(generator, dtypes, shapes, queue_capacity=10): | ||
87 | + assert len(dtypes) == len(shapes), 'dtypes and shapes must have the same length' | ||
88 | + queue = tf.FIFOQueue(queue_capacity, dtypes, shapes) | ||
89 | + placeholders = [tf.placeholder(dtype, shape) for dtype, shape in zip(dtypes, shapes)] | ||
90 | + enqueue_op = queue.enqueue(placeholders) | ||
91 | + close_op = queue.close(cancel_pending_enqueues=True) | ||
92 | + feed_fn = lambda: {placeholder: value for placeholder, value in zip(placeholders, next(generator))} | ||
93 | + queue_runner = tf.contrib.training.FeedingQueueRunner(queue, [enqueue_op], close_op, feed_fns=[feed_fn]) | ||
94 | + tf.train.add_queue_runner(queue_runner) | ||
95 | + return queue.dequeue() |
code/pcn_modify/pcn/io_util.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import numpy as np | ||
4 | +#from open3d import * | ||
5 | +import open3d as o3d | ||
6 | + | ||
7 | + | ||
8 | +def read_pcd(filename): | ||
9 | + pcd = o3d.io.read_point_cloud(filename) | ||
10 | + return np.array(pcd.points) | ||
11 | + | ||
12 | + | ||
13 | +def save_pcd(filename, points): | ||
14 | + pcd = o3d.geometry.PointCloud() | ||
15 | + pcd.points = o3d.utility.Vector3dVector(points) | ||
16 | + o3d.io.write_point_cloud(filename, pcd) |
code/pcn_modify/pcn/lmdb_writer.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import argparse | ||
4 | +import os | ||
5 | +from io_util import read_pcd | ||
6 | +from tensorpack import DataFlow, dataflow | ||
7 | + | ||
8 | + | ||
9 | +class pcd_df(DataFlow): | ||
10 | + def __init__(self, model_list, num_scans, partial_dir, complete_dir): | ||
11 | + self.model_list = model_list | ||
12 | + self.num_scans = num_scans | ||
13 | + self.partial_dir = partial_dir | ||
14 | + self.complete_dir = complete_dir | ||
15 | + | ||
16 | + def size(self): | ||
17 | + return len(self.model_list) * self.num_scans | ||
18 | + | ||
19 | + def get_data(self): | ||
20 | + for model_id in model_list: | ||
21 | + complete = read_pcd(os.path.join(self.complete_dir, '%s.pcd' % model_id)) | ||
22 | + for i in range(self.num_scans): | ||
23 | + partial = read_pcd(os.path.join(self.partial_dir, model_id, '%d.pcd' % i)) | ||
24 | + yield model_id.replace('/', '_'), partial, complete | ||
25 | + | ||
26 | + | ||
27 | +if __name__ == '__main__': | ||
28 | + parser = argparse.ArgumentParser() | ||
29 | + parser.add_argument('--list_path') | ||
30 | + parser.add_argument('--num_scans', type=int) | ||
31 | + parser.add_argument('--partial_dir') | ||
32 | + parser.add_argument('--complete_dir') | ||
33 | + parser.add_argument('--output_path') | ||
34 | + args = parser.parse_args() | ||
35 | + | ||
36 | + with open(args.list_path) as file: | ||
37 | + model_list = file.read().splitlines() | ||
38 | + df = pcd_df(model_list, args.num_scans, args.partial_dir, args.complete_dir) | ||
39 | + if os.path.exists(args.output_path): | ||
40 | + os.system('rm %s' % args.output_path) | ||
41 | + dataflow.LMDBSerializer.save(df, args.output_path) |
code/pcn_modify/pcn/models/__init__.py
0 → 100644
File mode changed
code/pcn_modify/pcn/models/fc.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import tensorflow as tf | ||
4 | +from tf_util import mlp, mlp_conv, chamfer, add_train_summary, add_valid_summary | ||
5 | + | ||
6 | + | ||
7 | +class Model: | ||
8 | + def __init__(self, inputs, gt, alpha): | ||
9 | + self.num_output_points = 16384 | ||
10 | + self.features = self.create_encoder(inputs) | ||
11 | + self.outputs = self.create_decoder(self.features) | ||
12 | + self.loss, self.update = self.create_loss(self.outputs, gt) | ||
13 | + self.visualize_ops = [inputs[0], self.outputs[0], gt[0]] | ||
14 | + self.visualize_titles = ['input', 'output', 'ground truth'] | ||
15 | + | ||
16 | + def create_encoder(self, inputs): | ||
17 | + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE): | ||
18 | + features = mlp_conv(inputs, [128, 256]) | ||
19 | + features_global = tf.reduce_max(features, axis=1, keep_dims=True, name='maxpool_0') | ||
20 | + features = tf.concat([features, tf.tile(features_global, [1, tf.shape(inputs)[1], 1])], axis=2) | ||
21 | + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE): | ||
22 | + features = mlp_conv(features, [512, 1024]) | ||
23 | + features = tf.reduce_max(features, axis=1, name='maxpool_1') | ||
24 | + return features | ||
25 | + | ||
26 | + def create_decoder(self, features): | ||
27 | + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): | ||
28 | + outputs = mlp(features, [1024, 1024, self.num_output_points * 3]) | ||
29 | + outputs = tf.reshape(outputs, [-1, self.num_output_points, 3]) | ||
30 | + return outputs | ||
31 | + | ||
32 | + def create_loss(self, outputs, gt): | ||
33 | + loss = chamfer(outputs, gt) | ||
34 | + add_train_summary('train/loss', loss) | ||
35 | + update_loss = add_valid_summary('valid/loss', loss) | ||
36 | + return loss, update_loss |
code/pcn_modify/pcn/models/folding.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import tensorflow as tf | ||
4 | +from tf_util import mlp_conv, chamfer, add_train_summary, add_valid_summary | ||
5 | + | ||
6 | + | ||
7 | +class Model: | ||
8 | + def __init__(self, inputs, gt, alpha): | ||
9 | + self.grid_size = 128 | ||
10 | + self.grid_scale = 0.5 | ||
11 | + self.num_output_points = 16384 | ||
12 | + self.features = self.create_encoder(inputs) | ||
13 | + fold1, fold2 = self.create_decoder(self.features) | ||
14 | + self.outputs = fold2 | ||
15 | + self.loss, self.update = self.create_loss(self.outputs, gt) | ||
16 | + self.visualize_ops = [inputs[0], fold1[0], fold2[0], gt[0]] | ||
17 | + self.visualize_titles = ['input', '1st folding', '2nd folding', 'ground truth'] | ||
18 | + | ||
19 | + def create_encoder(self, inputs): | ||
20 | + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE): | ||
21 | + features = mlp_conv(inputs, [128, 256]) | ||
22 | + features_global = tf.reduce_max(features, axis=1, keep_dims=True, name='maxpool_0') | ||
23 | + features = tf.concat([features, tf.tile(features_global, [1, tf.shape(inputs)[1], 1])], axis=2) | ||
24 | + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE): | ||
25 | + features = mlp_conv(features, [512, 1024]) | ||
26 | + features = tf.reduce_max(features, axis=1, name='maxpool_1') | ||
27 | + return features | ||
28 | + | ||
29 | + def create_decoder(self, features): | ||
30 | + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): | ||
31 | + x = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size) | ||
32 | + y = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size) | ||
33 | + grid = tf.meshgrid(x, y) | ||
34 | + grid = tf.reshape(tf.stack(grid, axis=2), [-1, 2]) | ||
35 | + grid = tf.tile(tf.expand_dims(grid, 0), [features.shape[0], 1, 1]) | ||
36 | + features = tf.tile(tf.expand_dims(features, 1), [1, self.num_output_points, 1]) | ||
37 | + with tf.variable_scope('folding_1'): | ||
38 | + fold1 = mlp_conv(tf.concat([features, grid], axis=2), [512, 512, 3]) | ||
39 | + with tf.variable_scope('folding_2'): | ||
40 | + fold2 = mlp_conv(tf.concat([features, fold1], axis=2), [512, 512, 3]) | ||
41 | + return fold1, fold2 | ||
42 | + | ||
43 | + def create_loss(self, outputs, gt): | ||
44 | + loss = chamfer(outputs, gt) | ||
45 | + add_train_summary('train/loss', loss) | ||
46 | + update_loss = add_valid_summary('valid/loss', loss) | ||
47 | + return loss, update_loss |
code/pcn_modify/pcn/models/pcn_cd.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import tensorflow as tf | ||
4 | +from tf_util import * | ||
5 | + | ||
6 | + | ||
7 | +class Model: | ||
8 | + def __init__(self, inputs, npts, gt, alpha): | ||
9 | + self.num_coarse = 1024 | ||
10 | + self.grid_size = 4 | ||
11 | + self.grid_scale = 0.05 | ||
12 | + self.num_fine = self.grid_size ** 2 * self.num_coarse | ||
13 | + self.features = self.create_encoder(inputs, npts) | ||
14 | + self.coarse, self.fine = self.create_decoder(self.features) | ||
15 | + self.loss, self.update = self.create_loss(self.coarse, self.fine, gt, alpha) | ||
16 | + self.outputs = self.fine | ||
17 | + self.visualize_ops = [tf.split(inputs[0], npts, axis=0), self.coarse, self.fine, gt] | ||
18 | + self.visualize_titles = ['input', 'coarse output', 'fine output', 'ground truth'] | ||
19 | + | ||
20 | + def create_encoder(self, inputs, npts): | ||
21 | + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE): | ||
22 | + features = mlp_conv(inputs, [128, 256]) | ||
23 | + features_global = point_unpool(point_maxpool(features, npts, keepdims=True), npts) | ||
24 | + features = tf.concat([features, features_global], axis=2) | ||
25 | + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE): | ||
26 | + features = mlp_conv(features, [512, 1024]) | ||
27 | + features = point_maxpool(features, npts) | ||
28 | + return features | ||
29 | + | ||
30 | + def create_decoder(self, features): | ||
31 | + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): | ||
32 | + coarse = mlp(features, [1024, 1024, self.num_coarse * 3]) | ||
33 | + coarse = tf.reshape(coarse, [-1, self.num_coarse, 3]) | ||
34 | + | ||
35 | + with tf.variable_scope('folding', reuse=tf.AUTO_REUSE): | ||
36 | + grid = tf.meshgrid(tf.linspace(-0.05, 0.05, self.grid_size), tf.linspace(-0.05, 0.05, self.grid_size)) | ||
37 | + grid = tf.expand_dims(tf.reshape(tf.stack(grid, axis=2), [-1, 2]), 0) | ||
38 | + grid_feat = tf.tile(grid, [features.shape[0], self.num_coarse, 1]) | ||
39 | + | ||
40 | + point_feat = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1]) | ||
41 | + point_feat = tf.reshape(point_feat, [-1, self.num_fine, 3]) | ||
42 | + | ||
43 | + global_feat = tf.tile(tf.expand_dims(features, 1), [1, self.num_fine, 1]) | ||
44 | + | ||
45 | + feat = tf.concat([grid_feat, point_feat, global_feat], axis=2) | ||
46 | + | ||
47 | + center = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1]) | ||
48 | + center = tf.reshape(center, [-1, self.num_fine, 3]) | ||
49 | + | ||
50 | + fine = mlp_conv(feat, [512, 512, 3]) + center | ||
51 | + return coarse, fine | ||
52 | + | ||
53 | + def create_loss(self, coarse, fine, gt, alpha): | ||
54 | + loss_coarse = chamfer(coarse, gt) | ||
55 | + add_train_summary('train/coarse_loss', loss_coarse) | ||
56 | + update_coarse = add_valid_summary('valid/coarse_loss', loss_coarse) | ||
57 | + | ||
58 | + loss_fine = chamfer(fine, gt) | ||
59 | + add_train_summary('train/fine_loss', loss_fine) | ||
60 | + update_fine = add_valid_summary('valid/fine_loss', loss_fine) | ||
61 | + | ||
62 | + loss = loss_coarse + alpha * loss_fine | ||
63 | + add_train_summary('train/loss', loss) | ||
64 | + update_loss = add_valid_summary('valid/loss', loss) | ||
65 | + | ||
66 | + return loss, [update_coarse, update_fine, update_loss] |
code/pcn_modify/pcn/models/pcn_emd.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import tensorflow as tf | ||
4 | +from tf_util import * | ||
5 | + | ||
6 | + | ||
7 | +class Model: | ||
8 | + def __init__(self, inputs,my_inputs, npts, gt, alpha,beta): | ||
9 | + self.num_coarse = 1024 | ||
10 | + self.grid_size = 4 | ||
11 | + self.grid_scale = 0.05 | ||
12 | + self.num_fine = self.grid_size ** 2 * self.num_coarse | ||
13 | + self.features = self.create_encoder(inputs, npts) | ||
14 | + self.coarse, self.fine = self.create_decoder(self.features) | ||
15 | + self.loss, self.update = self.create_loss(my_inputs,self.coarse, self.fine, gt, alpha,beta) | ||
16 | + self.outputs = self.fine | ||
17 | + self.visualize_ops = [tf.split(inputs[0], npts, axis=0), self.coarse, self.fine, gt] | ||
18 | + self.visualize_titles = ['input', 'coarse output', 'fine output', 'ground truth'] | ||
19 | + | ||
20 | + def create_encoder(self, inputs, npts): | ||
21 | + with tf.variable_scope('encoder_0', reuse=tf.AUTO_REUSE): | ||
22 | + features = mlp_conv(inputs, [128, 256]) | ||
23 | + features_global = point_unpool(point_maxpool(features, npts, keepdims=True), npts) | ||
24 | + features = tf.concat([features, features_global], axis=2) | ||
25 | + with tf.variable_scope('encoder_1', reuse=tf.AUTO_REUSE): | ||
26 | + features = mlp_conv(features, [512, 1024]) | ||
27 | + features = point_maxpool(features, npts) | ||
28 | + return features | ||
29 | + | ||
30 | + def create_decoder(self, features): | ||
31 | + with tf.variable_scope('decoder', reuse=tf.AUTO_REUSE): | ||
32 | + coarse = mlp(features, [1024, 1024, self.num_coarse * 3]) | ||
33 | + coarse = tf.reshape(coarse, [-1, self.num_coarse, 3]) | ||
34 | + | ||
35 | + with tf.variable_scope('folding', reuse=tf.AUTO_REUSE): | ||
36 | + x = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size) | ||
37 | + y = tf.linspace(-self.grid_scale, self.grid_scale, self.grid_size) | ||
38 | + grid = tf.meshgrid(x, y) | ||
39 | + grid = tf.expand_dims(tf.reshape(tf.stack(grid, axis=2), [-1, 2]), 0) | ||
40 | + grid_feat = tf.tile(grid, [features.shape[0], self.num_coarse, 1]) | ||
41 | + | ||
42 | + point_feat = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1]) | ||
43 | + point_feat = tf.reshape(point_feat, [-1, self.num_fine, 3]) | ||
44 | + | ||
45 | + global_feat = tf.tile(tf.expand_dims(features, 1), [1, self.num_fine, 1]) | ||
46 | + | ||
47 | + feat = tf.concat([grid_feat, point_feat, global_feat], axis=2) | ||
48 | + | ||
49 | + center = tf.tile(tf.expand_dims(coarse, 2), [1, 1, self.grid_size ** 2, 1]) | ||
50 | + center = tf.reshape(center, [-1, self.num_fine, 3]) | ||
51 | + | ||
52 | + fine = mlp_conv(feat, [512, 512, 3]) + center | ||
53 | + return coarse, fine | ||
54 | + | ||
55 | + def create_loss(self,my_inputs,coarse, fine, gt, alpha,beta): | ||
56 | + gt_ds = gt[:, :coarse.shape[1], :] | ||
57 | + loss_coarse = earth_mover(coarse, gt_ds) | ||
58 | + add_train_summary('train/coarse_loss', loss_coarse) | ||
59 | + update_coarse = add_valid_summary('valid/coarse_loss', loss_coarse) | ||
60 | + | ||
61 | + loss_fine = chamfer(fine, gt) | ||
62 | + add_train_summary('train/fine_loss', loss_fine) | ||
63 | + update_fine = add_valid_summary('valid/fine_loss', loss_fine) | ||
64 | + | ||
65 | + loss_input = my_chamfer(my_inputs,fine) | ||
66 | + add_train_summary('train/input_loss',loss_input) | ||
67 | + update_input = add_valid_summary('valid/input_loss',loss_input) | ||
68 | + | ||
69 | + loss = loss_coarse + alpha * loss_fine + beta*loss_input | ||
70 | + add_train_summary('train/loss', loss) | ||
71 | + update_loss = add_valid_summary('valid/loss', loss) | ||
72 | + | ||
73 | + return loss, [update_coarse, update_fine, update_input,update_loss] |
code/pcn_modify/pcn/pc_distance/__init__.py
0 → 100755
File mode changed
code/pcn_modify/pcn/pc_distance/makefile
0 → 100644
1 | +cuda_inc = /usr/local/cuda-10.0/include/ | ||
2 | +cuda_lib = /usr/local/cuda-10.0/lib64/ | ||
3 | +nvcc = /usr/local/cuda-10.0/bin/nvcc | ||
4 | +tf_inc = /usr/local/lib/python3.6/dist-packages/tensorflow_core/include | ||
5 | +tf_lib = /usr/local/lib/python3.6/dist-packages/tensorflow_core | ||
6 | + | ||
7 | +all: tf_nndistance_so.so tf_approxmatch_so.so | ||
8 | + | ||
9 | +tf_nndistance.cu.o: tf_nndistance.cu | ||
10 | + $(nvcc) tf_nndistance.cu -o tf_nndistance.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC | ||
11 | + | ||
12 | +tf_nndistance_so.so: tf_nndistance.cpp tf_nndistance.cu.o | ||
13 | + g++ tf_nndistance.cpp tf_nndistance.cu.o -o tf_nndistance_so.so \ | ||
14 | + -I $(cuda_inc) -I $(tf_inc) -L $(cuda_lib) -lcudart -L $(tf_lib) -ltensorflow_framework \ | ||
15 | + -shared -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -fPIC -O2 | ||
16 | + | ||
17 | +tf_approxmatch.cu.o: tf_approxmatch.cu | ||
18 | + $(nvcc) tf_approxmatch.cu -o tf_approxmatch.cu.o -c -O2 -DGOOGLE_CUDA=1 -x cu -Xcompiler -fPIC | ||
19 | + | ||
20 | +tf_approxmatch_so.so: tf_approxmatch.cpp tf_approxmatch.cu.o | ||
21 | + g++ -shared $(CPPFLAGS) tf_approxmatch.cpp tf_approxmatch.cu.o -o tf_approxmatch_so.so \ | ||
22 | + -I $(cuda_inc) -I $(tf_inc) -L $(cuda_lib) -lcudart -L $(tf_lib) -ltensorflow_framework \ | ||
23 | + -shared -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -fPIC -O2 | ||
24 | + | ||
25 | +clean: | ||
26 | + rm -rf *.o *.so |
1 | +#include "tensorflow/core/framework/op.h" | ||
2 | +#include "tensorflow/core/framework/op_kernel.h" | ||
3 | +#include <algorithm> | ||
4 | +#include <vector> | ||
5 | +#include <math.h> | ||
6 | +using namespace tensorflow; | ||
7 | +REGISTER_OP("ApproxMatch") | ||
8 | + .Input("xyz1: float32") | ||
9 | + .Input("xyz2: float32") | ||
10 | + .Output("match: float32"); | ||
11 | +REGISTER_OP("MatchCost") | ||
12 | + .Input("xyz1: float32") | ||
13 | + .Input("xyz2: float32") | ||
14 | + .Input("match: float32") | ||
15 | + .Output("cost: float32"); | ||
16 | +REGISTER_OP("MatchCostGrad") | ||
17 | + .Input("xyz1: float32") | ||
18 | + .Input("xyz2: float32") | ||
19 | + .Input("match: float32") | ||
20 | + .Output("grad1: float32") | ||
21 | + .Output("grad2: float32"); | ||
22 | + | ||
23 | +void approxmatch_cpu(int b,int n,int m,const float * xyz1,const float * xyz2,float * match){ | ||
24 | + for (int i=0;i<b;i++){ | ||
25 | + int factorl=std::max(n,m)/n; | ||
26 | + int factorr=std::max(n,m)/m; | ||
27 | + std::vector<double> saturatedl(n,double(factorl)),saturatedr(m,double(factorr)); | ||
28 | + std::vector<double> weight(n*m); | ||
29 | + for (int j=0;j<n*m;j++) | ||
30 | + match[j]=0; | ||
31 | + for (int j=8;j>=-2;j--){ | ||
32 | + //printf("i=%d j=%d\n",i,j); | ||
33 | + double level=-powf(4.0,j); | ||
34 | + if (j==-2) | ||
35 | + level=0; | ||
36 | + for (int k=0;k<n;k++){ | ||
37 | + double x1=xyz1[k*3+0]; | ||
38 | + double y1=xyz1[k*3+1]; | ||
39 | + double z1=xyz1[k*3+2]; | ||
40 | + for (int l=0;l<m;l++){ | ||
41 | + double x2=xyz2[l*3+0]; | ||
42 | + double y2=xyz2[l*3+1]; | ||
43 | + double z2=xyz2[l*3+2]; | ||
44 | + weight[k*m+l]=expf(level*((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2)))*saturatedr[l]; | ||
45 | + } | ||
46 | + } | ||
47 | + std::vector<double> ss(m,1e-9); | ||
48 | + for (int k=0;k<n;k++){ | ||
49 | + double s=1e-9; | ||
50 | + for (int l=0;l<m;l++){ | ||
51 | + s+=weight[k*m+l]; | ||
52 | + } | ||
53 | + for (int l=0;l<m;l++){ | ||
54 | + weight[k*m+l]=weight[k*m+l]/s*saturatedl[k]; | ||
55 | + } | ||
56 | + for (int l=0;l<m;l++) | ||
57 | + ss[l]+=weight[k*m+l]; | ||
58 | + } | ||
59 | + for (int l=0;l<m;l++){ | ||
60 | + double s=ss[l]; | ||
61 | + double r=std::min(saturatedr[l]/s,1.0); | ||
62 | + ss[l]=r; | ||
63 | + } | ||
64 | + std::vector<double> ss2(m,0); | ||
65 | + for (int k=0;k<n;k++){ | ||
66 | + double s=0; | ||
67 | + for (int l=0;l<m;l++){ | ||
68 | + weight[k*m+l]*=ss[l]; | ||
69 | + s+=weight[k*m+l]; | ||
70 | + ss2[l]+=weight[k*m+l]; | ||
71 | + } | ||
72 | + saturatedl[k]=std::max(saturatedl[k]-s,0.0); | ||
73 | + } | ||
74 | + for (int k=0;k<n*m;k++) | ||
75 | + match[k]+=weight[k]; | ||
76 | + for (int l=0;l<m;l++){ | ||
77 | + saturatedr[l]=std::max(saturatedr[l]-ss2[l],0.0); | ||
78 | + } | ||
79 | + } | ||
80 | + xyz1+=n*3; | ||
81 | + xyz2+=m*3; | ||
82 | + match+=n*m; | ||
83 | + } | ||
84 | +} | ||
85 | +void matchcost_cpu(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * cost){ | ||
86 | + for (int i=0;i<b;i++){ | ||
87 | + double s=0; | ||
88 | + for (int j=0;j<n;j++) | ||
89 | + for (int k=0;k<m;k++){ | ||
90 | + float x1=xyz1[j*3+0]; | ||
91 | + float y1=xyz1[j*3+1]; | ||
92 | + float z1=xyz1[j*3+2]; | ||
93 | + float x2=xyz2[k*3+0]; | ||
94 | + float y2=xyz2[k*3+1]; | ||
95 | + float z2=xyz2[k*3+2]; | ||
96 | + float d=sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1))*match[j*m+k]; | ||
97 | + s+=d; | ||
98 | + } | ||
99 | + cost[0]=s; | ||
100 | + xyz1+=n*3; | ||
101 | + xyz2+=m*3; | ||
102 | + match+=n*m; | ||
103 | + cost+=1; | ||
104 | + } | ||
105 | +} | ||
106 | +void matchcostgrad_cpu(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2){ | ||
107 | + for (int i=0;i<b;i++){ | ||
108 | + for (int j=0;j<n;j++) | ||
109 | + grad1[j*3+0]=0; | ||
110 | + for (int j=0;j<m;j++){ | ||
111 | + float sx=0,sy=0,sz=0; | ||
112 | + for (int k=0;k<n;k++){ | ||
113 | + float x2=xyz2[j*3+0]; | ||
114 | + float y2=xyz2[j*3+1]; | ||
115 | + float z2=xyz2[j*3+2]; | ||
116 | + float x1=xyz1[k*3+0]; | ||
117 | + float y1=xyz1[k*3+1]; | ||
118 | + float z1=xyz1[k*3+2]; | ||
119 | + float d=std::max(sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)),1e-20f); | ||
120 | + float dx=match[k*m+j]*((x2-x1)/d); | ||
121 | + float dy=match[k*m+j]*((y2-y1)/d); | ||
122 | + float dz=match[k*m+j]*((z2-z1)/d); | ||
123 | + grad1[k*3+0]-=dx; | ||
124 | + grad1[k*3+1]-=dy; | ||
125 | + grad1[k*3+2]-=dz; | ||
126 | + sx+=dx; | ||
127 | + sy+=dy; | ||
128 | + sz+=dz; | ||
129 | + } | ||
130 | + grad2[j*3+0]=sx; | ||
131 | + grad2[j*3+1]=sy; | ||
132 | + grad2[j*3+2]=sz; | ||
133 | + } | ||
134 | + xyz1+=n*3; | ||
135 | + xyz2+=m*3; | ||
136 | + match+=n*m; | ||
137 | + grad1+=n*3; | ||
138 | + grad2+=m*3; | ||
139 | + } | ||
140 | +} | ||
141 | +void approxmatchLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,float * match,float * temp); | ||
142 | +void matchcostLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * out); | ||
143 | +void matchcostgradLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2); | ||
144 | + | ||
145 | +class ApproxMatchGpuOp: public OpKernel{ | ||
146 | + public: | ||
147 | + explicit ApproxMatchGpuOp(OpKernelConstruction* context):OpKernel(context){} | ||
148 | + void Compute(OpKernelContext * context)override{ | ||
149 | + const Tensor& xyz1_tensor=context->input(0); | ||
150 | + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz1 shape")); | ||
151 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
152 | + const float * xyz1=&(xyz1_flat(0)); | ||
153 | + int b=xyz1_tensor.shape().dim_size(0); | ||
154 | + int n=xyz1_tensor.shape().dim_size(1); | ||
155 | + //OP_REQUIRES(context,n<=4096,errors::InvalidArgument("ApproxMatch handles at most 4096 dataset points")); | ||
156 | + | ||
157 | + const Tensor& xyz2_tensor=context->input(1); | ||
158 | + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz2 shape, and batch_size must match")); | ||
159 | + int m=xyz2_tensor.shape().dim_size(1); | ||
160 | + //OP_REQUIRES(context,m<=1024,errors::InvalidArgument("ApproxMatch handles at most 1024 query points")); | ||
161 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
162 | + const float * xyz2=&(xyz2_flat(0)); | ||
163 | + Tensor * match_tensor=NULL; | ||
164 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,n},&match_tensor)); | ||
165 | + auto match_flat=match_tensor->flat<float>(); | ||
166 | + float * match=&(match_flat(0)); | ||
167 | + Tensor temp_tensor; | ||
168 | + OP_REQUIRES_OK(context,context->allocate_temp(DataTypeToEnum<float>::value,TensorShape{b,(n+m)*2},&temp_tensor)); | ||
169 | + auto temp_flat=temp_tensor.flat<float>(); | ||
170 | + float * temp=&(temp_flat(0)); | ||
171 | + approxmatchLauncher(b,n,m,xyz1,xyz2,match,temp); | ||
172 | + } | ||
173 | +}; | ||
174 | +REGISTER_KERNEL_BUILDER(Name("ApproxMatch").Device(DEVICE_GPU), ApproxMatchGpuOp); | ||
175 | +class ApproxMatchOp: public OpKernel{ | ||
176 | + public: | ||
177 | + explicit ApproxMatchOp(OpKernelConstruction* context):OpKernel(context){} | ||
178 | + void Compute(OpKernelContext * context)override{ | ||
179 | + const Tensor& xyz1_tensor=context->input(0); | ||
180 | + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz1 shape")); | ||
181 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
182 | + const float * xyz1=&(xyz1_flat(0)); | ||
183 | + int b=xyz1_tensor.shape().dim_size(0); | ||
184 | + int n=xyz1_tensor.shape().dim_size(1); | ||
185 | + //OP_REQUIRES(context,n<=4096,errors::InvalidArgument("ApproxMatch handles at most 4096 dataset points")); | ||
186 | + | ||
187 | + const Tensor& xyz2_tensor=context->input(1); | ||
188 | + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("ApproxMatch expects (batch_size,num_points,3) xyz2 shape, and batch_size must match")); | ||
189 | + int m=xyz2_tensor.shape().dim_size(1); | ||
190 | + //OP_REQUIRES(context,m<=1024,errors::InvalidArgument("ApproxMatch handles at most 1024 query points")); | ||
191 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
192 | + const float * xyz2=&(xyz2_flat(0)); | ||
193 | + Tensor * match_tensor=NULL; | ||
194 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,m,n},&match_tensor)); | ||
195 | + auto match_flat=match_tensor->flat<float>(); | ||
196 | + float * match=&(match_flat(0)); | ||
197 | + approxmatch_cpu(b,n,m,xyz1,xyz2,match); | ||
198 | + } | ||
199 | +}; | ||
200 | +REGISTER_KERNEL_BUILDER(Name("ApproxMatch").Device(DEVICE_CPU), ApproxMatchOp); | ||
201 | +class MatchCostGpuOp: public OpKernel{ | ||
202 | + public: | ||
203 | + explicit MatchCostGpuOp(OpKernelConstruction* context):OpKernel(context){} | ||
204 | + void Compute(OpKernelContext * context)override{ | ||
205 | + const Tensor& xyz1_tensor=context->input(0); | ||
206 | + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz1 shape")); | ||
207 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
208 | + const float * xyz1=&(xyz1_flat(0)); | ||
209 | + int b=xyz1_tensor.shape().dim_size(0); | ||
210 | + int n=xyz1_tensor.shape().dim_size(1); | ||
211 | + | ||
212 | + const Tensor& xyz2_tensor=context->input(1); | ||
213 | + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz2 shape, and batch_size must match")); | ||
214 | + int m=xyz2_tensor.shape().dim_size(1); | ||
215 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
216 | + const float * xyz2=&(xyz2_flat(0)); | ||
217 | + | ||
218 | + const Tensor& match_tensor=context->input(2); | ||
219 | + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape")); | ||
220 | + auto match_flat=match_tensor.flat<float>(); | ||
221 | + const float * match=&(match_flat(0)); | ||
222 | + | ||
223 | + Tensor * cost_tensor=NULL; | ||
224 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b},&cost_tensor)); | ||
225 | + auto cost_flat=cost_tensor->flat<float>(); | ||
226 | + float * cost=&(cost_flat(0)); | ||
227 | + matchcostLauncher(b,n,m,xyz1,xyz2,match,cost); | ||
228 | + } | ||
229 | +}; | ||
230 | +REGISTER_KERNEL_BUILDER(Name("MatchCost").Device(DEVICE_GPU), MatchCostGpuOp); | ||
231 | +class MatchCostOp: public OpKernel{ | ||
232 | + public: | ||
233 | + explicit MatchCostOp(OpKernelConstruction* context):OpKernel(context){} | ||
234 | + void Compute(OpKernelContext * context)override{ | ||
235 | + const Tensor& xyz1_tensor=context->input(0); | ||
236 | + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz1 shape")); | ||
237 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
238 | + const float * xyz1=&(xyz1_flat(0)); | ||
239 | + int b=xyz1_tensor.shape().dim_size(0); | ||
240 | + int n=xyz1_tensor.shape().dim_size(1); | ||
241 | + | ||
242 | + const Tensor& xyz2_tensor=context->input(1); | ||
243 | + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz2 shape, and batch_size must match")); | ||
244 | + int m=xyz2_tensor.shape().dim_size(1); | ||
245 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
246 | + const float * xyz2=&(xyz2_flat(0)); | ||
247 | + | ||
248 | + const Tensor& match_tensor=context->input(2); | ||
249 | + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape")); | ||
250 | + auto match_flat=match_tensor.flat<float>(); | ||
251 | + const float * match=&(match_flat(0)); | ||
252 | + | ||
253 | + Tensor * cost_tensor=NULL; | ||
254 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b},&cost_tensor)); | ||
255 | + auto cost_flat=cost_tensor->flat<float>(); | ||
256 | + float * cost=&(cost_flat(0)); | ||
257 | + matchcost_cpu(b,n,m,xyz1,xyz2,match,cost); | ||
258 | + } | ||
259 | +}; | ||
260 | +REGISTER_KERNEL_BUILDER(Name("MatchCost").Device(DEVICE_CPU), MatchCostOp); | ||
261 | + | ||
262 | +class MatchCostGradGpuOp: public OpKernel{ | ||
263 | + public: | ||
264 | + explicit MatchCostGradGpuOp(OpKernelConstruction* context):OpKernel(context){} | ||
265 | + void Compute(OpKernelContext * context)override{ | ||
266 | + const Tensor& xyz1_tensor=context->input(0); | ||
267 | + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCostGrad expects (batch_size,num_points,3) xyz1 shape")); | ||
268 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
269 | + const float * xyz1=&(xyz1_flat(0)); | ||
270 | + int b=xyz1_tensor.shape().dim_size(0); | ||
271 | + int n=xyz1_tensor.shape().dim_size(1); | ||
272 | + | ||
273 | + const Tensor& xyz2_tensor=context->input(1); | ||
274 | + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCostGrad expects (batch_size,num_points,3) xyz2 shape, and batch_size must match")); | ||
275 | + int m=xyz2_tensor.shape().dim_size(1); | ||
276 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
277 | + const float * xyz2=&(xyz2_flat(0)); | ||
278 | + | ||
279 | + const Tensor& match_tensor=context->input(2); | ||
280 | + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape")); | ||
281 | + auto match_flat=match_tensor.flat<float>(); | ||
282 | + const float * match=&(match_flat(0)); | ||
283 | + | ||
284 | + Tensor * grad1_tensor=NULL; | ||
285 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad1_tensor)); | ||
286 | + auto grad1_flat=grad1_tensor->flat<float>(); | ||
287 | + float * grad1=&(grad1_flat(0)); | ||
288 | + Tensor * grad2_tensor=NULL; | ||
289 | + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad2_tensor)); | ||
290 | + auto grad2_flat=grad2_tensor->flat<float>(); | ||
291 | + float * grad2=&(grad2_flat(0)); | ||
292 | + matchcostgradLauncher(b,n,m,xyz1,xyz2,match,grad1,grad2); | ||
293 | + } | ||
294 | +}; | ||
295 | +REGISTER_KERNEL_BUILDER(Name("MatchCostGrad").Device(DEVICE_GPU), MatchCostGradGpuOp); | ||
296 | +class MatchCostGradOp: public OpKernel{ | ||
297 | + public: | ||
298 | + explicit MatchCostGradOp(OpKernelConstruction* context):OpKernel(context){} | ||
299 | + void Compute(OpKernelContext * context)override{ | ||
300 | + const Tensor& xyz1_tensor=context->input(0); | ||
301 | + OP_REQUIRES(context,xyz1_tensor.dims()==3 && xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz1 shape")); | ||
302 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
303 | + const float * xyz1=&(xyz1_flat(0)); | ||
304 | + int b=xyz1_tensor.shape().dim_size(0); | ||
305 | + int n=xyz1_tensor.shape().dim_size(1); | ||
306 | + | ||
307 | + const Tensor& xyz2_tensor=context->input(1); | ||
308 | + OP_REQUIRES(context,xyz2_tensor.dims()==3 && xyz2_tensor.shape().dim_size(2)==3 && xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("MatchCost expects (batch_size,num_points,3) xyz2 shape, and batch_size must match")); | ||
309 | + int m=xyz2_tensor.shape().dim_size(1); | ||
310 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
311 | + const float * xyz2=&(xyz2_flat(0)); | ||
312 | + | ||
313 | + const Tensor& match_tensor=context->input(2); | ||
314 | + OP_REQUIRES(context,match_tensor.dims()==3 && match_tensor.shape().dim_size(0)==b && match_tensor.shape().dim_size(1)==m && match_tensor.shape().dim_size(2)==n,errors::InvalidArgument("MatchCost expects (batch_size,#query,#dataset) match shape")); | ||
315 | + auto match_flat=match_tensor.flat<float>(); | ||
316 | + const float * match=&(match_flat(0)); | ||
317 | + | ||
318 | + Tensor * grad1_tensor=NULL; | ||
319 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad1_tensor)); | ||
320 | + auto grad1_flat=grad1_tensor->flat<float>(); | ||
321 | + float * grad1=&(grad1_flat(0)); | ||
322 | + Tensor * grad2_tensor=NULL; | ||
323 | + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad2_tensor)); | ||
324 | + auto grad2_flat=grad2_tensor->flat<float>(); | ||
325 | + float * grad2=&(grad2_flat(0)); | ||
326 | + matchcostgrad_cpu(b,n,m,xyz1,xyz2,match,grad1,grad2); | ||
327 | + } | ||
328 | +}; | ||
329 | +REGISTER_KERNEL_BUILDER(Name("MatchCostGrad").Device(DEVICE_CPU), MatchCostGradOp); |
1 | +__global__ void approxmatch(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,float * __restrict__ match,float * temp){ | ||
2 | + float * remainL=temp+blockIdx.x*(n+m)*2, * remainR=temp+blockIdx.x*(n+m)*2+n,*ratioL=temp+blockIdx.x*(n+m)*2+n+m,*ratioR=temp+blockIdx.x*(n+m)*2+n+m+n; | ||
3 | + float multiL,multiR; | ||
4 | + if (n>=m){ | ||
5 | + multiL=1; | ||
6 | + multiR=n/m; | ||
7 | + }else{ | ||
8 | + multiL=m/n; | ||
9 | + multiR=1; | ||
10 | + } | ||
11 | + const int Block=1024; | ||
12 | + __shared__ float buf[Block*4]; | ||
13 | + for (int i=blockIdx.x;i<b;i+=gridDim.x){ | ||
14 | + for (int j=threadIdx.x;j<n*m;j+=blockDim.x) | ||
15 | + match[i*n*m+j]=0; | ||
16 | + for (int j=threadIdx.x;j<n;j+=blockDim.x) | ||
17 | + remainL[j]=multiL; | ||
18 | + for (int j=threadIdx.x;j<m;j+=blockDim.x) | ||
19 | + remainR[j]=multiR; | ||
20 | + __syncthreads(); | ||
21 | + for (int j=7;j>=-2;j--){ | ||
22 | + float level=-powf(4.0f,j); | ||
23 | + if (j==-2){ | ||
24 | + level=0; | ||
25 | + } | ||
26 | + for (int k0=0;k0<n;k0+=blockDim.x){ | ||
27 | + int k=k0+threadIdx.x; | ||
28 | + float x1=0,y1=0,z1=0; | ||
29 | + if (k<n){ | ||
30 | + x1=xyz1[i*n*3+k*3+0]; | ||
31 | + y1=xyz1[i*n*3+k*3+1]; | ||
32 | + z1=xyz1[i*n*3+k*3+2]; | ||
33 | + } | ||
34 | + float suml=1e-9f; | ||
35 | + for (int l0=0;l0<m;l0+=Block){ | ||
36 | + int lend=min(m,l0+Block)-l0; | ||
37 | + for (int l=threadIdx.x;l<lend;l+=blockDim.x){ | ||
38 | + float x2=xyz2[i*m*3+l0*3+l*3+0]; | ||
39 | + float y2=xyz2[i*m*3+l0*3+l*3+1]; | ||
40 | + float z2=xyz2[i*m*3+l0*3+l*3+2]; | ||
41 | + buf[l*4+0]=x2; | ||
42 | + buf[l*4+1]=y2; | ||
43 | + buf[l*4+2]=z2; | ||
44 | + buf[l*4+3]=remainR[l0+l]; | ||
45 | + } | ||
46 | + __syncthreads(); | ||
47 | + for (int l=0;l<lend;l++){ | ||
48 | + float x2=buf[l*4+0]; | ||
49 | + float y2=buf[l*4+1]; | ||
50 | + float z2=buf[l*4+2]; | ||
51 | + float d=level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)); | ||
52 | + float w=__expf(d)*buf[l*4+3]; | ||
53 | + suml+=w; | ||
54 | + } | ||
55 | + __syncthreads(); | ||
56 | + } | ||
57 | + if (k<n) | ||
58 | + ratioL[k]=remainL[k]/suml; | ||
59 | + } | ||
60 | + /*for (int k=threadIdx.x;k<n;k+=gridDim.x){ | ||
61 | + float x1=xyz1[i*n*3+k*3+0]; | ||
62 | + float y1=xyz1[i*n*3+k*3+1]; | ||
63 | + float z1=xyz1[i*n*3+k*3+2]; | ||
64 | + float suml=1e-9f; | ||
65 | + for (int l=0;l<m;l++){ | ||
66 | + float x2=xyz2[i*m*3+l*3+0]; | ||
67 | + float y2=xyz2[i*m*3+l*3+1]; | ||
68 | + float z2=xyz2[i*m*3+l*3+2]; | ||
69 | + float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*remainR[l]; | ||
70 | + suml+=w; | ||
71 | + } | ||
72 | + ratioL[k]=remainL[k]/suml; | ||
73 | + }*/ | ||
74 | + __syncthreads(); | ||
75 | + for (int l0=0;l0<m;l0+=blockDim.x){ | ||
76 | + int l=l0+threadIdx.x; | ||
77 | + float x2=0,y2=0,z2=0; | ||
78 | + if (l<m){ | ||
79 | + x2=xyz2[i*m*3+l*3+0]; | ||
80 | + y2=xyz2[i*m*3+l*3+1]; | ||
81 | + z2=xyz2[i*m*3+l*3+2]; | ||
82 | + } | ||
83 | + float sumr=0; | ||
84 | + for (int k0=0;k0<n;k0+=Block){ | ||
85 | + int kend=min(n,k0+Block)-k0; | ||
86 | + for (int k=threadIdx.x;k<kend;k+=blockDim.x){ | ||
87 | + buf[k*4+0]=xyz1[i*n*3+k0*3+k*3+0]; | ||
88 | + buf[k*4+1]=xyz1[i*n*3+k0*3+k*3+1]; | ||
89 | + buf[k*4+2]=xyz1[i*n*3+k0*3+k*3+2]; | ||
90 | + buf[k*4+3]=ratioL[k0+k]; | ||
91 | + } | ||
92 | + __syncthreads(); | ||
93 | + for (int k=0;k<kend;k++){ | ||
94 | + float x1=buf[k*4+0]; | ||
95 | + float y1=buf[k*4+1]; | ||
96 | + float z1=buf[k*4+2]; | ||
97 | + float w=__expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*buf[k*4+3]; | ||
98 | + sumr+=w; | ||
99 | + } | ||
100 | + __syncthreads(); | ||
101 | + } | ||
102 | + if (l<m){ | ||
103 | + sumr*=remainR[l]; | ||
104 | + float consumption=fminf(remainR[l]/(sumr+1e-9f),1.0f); | ||
105 | + ratioR[l]=consumption*remainR[l]; | ||
106 | + remainR[l]=fmaxf(0.0f,remainR[l]-sumr); | ||
107 | + } | ||
108 | + } | ||
109 | + /*for (int l=threadIdx.x;l<m;l+=blockDim.x){ | ||
110 | + float x2=xyz2[i*m*3+l*3+0]; | ||
111 | + float y2=xyz2[i*m*3+l*3+1]; | ||
112 | + float z2=xyz2[i*m*3+l*3+2]; | ||
113 | + float sumr=0; | ||
114 | + for (int k=0;k<n;k++){ | ||
115 | + float x1=xyz1[i*n*3+k*3+0]; | ||
116 | + float y1=xyz1[i*n*3+k*3+1]; | ||
117 | + float z1=xyz1[i*n*3+k*3+2]; | ||
118 | + float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*ratioL[k]; | ||
119 | + sumr+=w; | ||
120 | + } | ||
121 | + sumr*=remainR[l]; | ||
122 | + float consumption=fminf(remainR[l]/(sumr+1e-9f),1.0f); | ||
123 | + ratioR[l]=consumption*remainR[l]; | ||
124 | + remainR[l]=fmaxf(0.0f,remainR[l]-sumr); | ||
125 | + }*/ | ||
126 | + __syncthreads(); | ||
127 | + for (int k0=0;k0<n;k0+=blockDim.x){ | ||
128 | + int k=k0+threadIdx.x; | ||
129 | + float x1=0,y1=0,z1=0; | ||
130 | + if (k<n){ | ||
131 | + x1=xyz1[i*n*3+k*3+0]; | ||
132 | + y1=xyz1[i*n*3+k*3+1]; | ||
133 | + z1=xyz1[i*n*3+k*3+2]; | ||
134 | + } | ||
135 | + float suml=0; | ||
136 | + for (int l0=0;l0<m;l0+=Block){ | ||
137 | + int lend=min(m,l0+Block)-l0; | ||
138 | + for (int l=threadIdx.x;l<lend;l+=blockDim.x){ | ||
139 | + buf[l*4+0]=xyz2[i*m*3+l0*3+l*3+0]; | ||
140 | + buf[l*4+1]=xyz2[i*m*3+l0*3+l*3+1]; | ||
141 | + buf[l*4+2]=xyz2[i*m*3+l0*3+l*3+2]; | ||
142 | + buf[l*4+3]=ratioR[l0+l]; | ||
143 | + } | ||
144 | + __syncthreads(); | ||
145 | + float rl=ratioL[k]; | ||
146 | + if (k<n){ | ||
147 | + for (int l=0;l<lend;l++){ | ||
148 | + float x2=buf[l*4+0]; | ||
149 | + float y2=buf[l*4+1]; | ||
150 | + float z2=buf[l*4+2]; | ||
151 | + float w=__expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*rl*buf[l*4+3]; | ||
152 | + match[i*n*m+(l0+l)*n+k]+=w; | ||
153 | + suml+=w; | ||
154 | + } | ||
155 | + } | ||
156 | + __syncthreads(); | ||
157 | + } | ||
158 | + if (k<n) | ||
159 | + remainL[k]=fmaxf(0.0f,remainL[k]-suml); | ||
160 | + } | ||
161 | + /*for (int k=threadIdx.x;k<n;k+=blockDim.x){ | ||
162 | + float x1=xyz1[i*n*3+k*3+0]; | ||
163 | + float y1=xyz1[i*n*3+k*3+1]; | ||
164 | + float z1=xyz1[i*n*3+k*3+2]; | ||
165 | + float suml=0; | ||
166 | + for (int l=0;l<m;l++){ | ||
167 | + float x2=xyz2[i*m*3+l*3+0]; | ||
168 | + float y2=xyz2[i*m*3+l*3+1]; | ||
169 | + float z2=xyz2[i*m*3+l*3+2]; | ||
170 | + float w=expf(level*((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)))*ratioL[k]*ratioR[l]; | ||
171 | + match[i*n*m+l*n+k]+=w; | ||
172 | + suml+=w; | ||
173 | + } | ||
174 | + remainL[k]=fmaxf(0.0f,remainL[k]-suml); | ||
175 | + }*/ | ||
176 | + __syncthreads(); | ||
177 | + } | ||
178 | + } | ||
179 | +} | ||
180 | +void approxmatchLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,float * match,float * temp){ | ||
181 | + approxmatch<<<32,512>>>(b,n,m,xyz1,xyz2,match,temp); | ||
182 | +} | ||
183 | +__global__ void matchcost(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ out){ | ||
184 | + __shared__ float allsum[512]; | ||
185 | + const int Block=1024; | ||
186 | + __shared__ float buf[Block*3]; | ||
187 | + for (int i=blockIdx.x;i<b;i+=gridDim.x){ | ||
188 | + float subsum=0; | ||
189 | + for (int k0=0;k0<n;k0+=blockDim.x){ | ||
190 | + int k=k0+threadIdx.x; | ||
191 | + float x1=0,y1=0,z1=0; | ||
192 | + if (k<n){ | ||
193 | + x1=xyz1[i*n*3+k*3+0]; | ||
194 | + y1=xyz1[i*n*3+k*3+1]; | ||
195 | + z1=xyz1[i*n*3+k*3+2]; | ||
196 | + } | ||
197 | + for (int l0=0;l0<m;l0+=Block){ | ||
198 | + int lend=min(m,l0+Block)-l0; | ||
199 | + for (int l=threadIdx.x;l<lend*3;l+=blockDim.x) | ||
200 | + buf[l]=xyz2[i*m*3+l0*3+l]; | ||
201 | + __syncthreads(); | ||
202 | + if (k<n){ | ||
203 | + for (int l=0;l<lend;l++){ | ||
204 | + float x2=buf[l*3+0]; | ||
205 | + float y2=buf[l*3+1]; | ||
206 | + float z2=buf[l*3+2]; | ||
207 | + float d=sqrtf((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)+(z2-z1)*(z2-z1)); | ||
208 | + subsum+=d*match[i*n*m+(l0+l)*n+k]; | ||
209 | + } | ||
210 | + } | ||
211 | + __syncthreads(); | ||
212 | + } | ||
213 | + } | ||
214 | + allsum[threadIdx.x]=subsum; | ||
215 | + for (int j=1;j<blockDim.x;j<<=1){ | ||
216 | + __syncthreads(); | ||
217 | + if ((threadIdx.x&j)==0 && threadIdx.x+j<blockDim.x){ | ||
218 | + allsum[threadIdx.x]+=allsum[threadIdx.x+j]; | ||
219 | + } | ||
220 | + } | ||
221 | + if (threadIdx.x==0) | ||
222 | + out[i]=allsum[0]; | ||
223 | + __syncthreads(); | ||
224 | + } | ||
225 | +} | ||
226 | +void matchcostLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * out){ | ||
227 | + matchcost<<<32,512>>>(b,n,m,xyz1,xyz2,match,out); | ||
228 | +} | ||
229 | +__global__ void matchcostgrad2(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ grad2){ | ||
230 | + __shared__ float sum_grad[256*3]; | ||
231 | + for (int i=blockIdx.x;i<b;i+=gridDim.x){ | ||
232 | + int kbeg=m*blockIdx.y/gridDim.y; | ||
233 | + int kend=m*(blockIdx.y+1)/gridDim.y; | ||
234 | + for (int k=kbeg;k<kend;k++){ | ||
235 | + float x2=xyz2[(i*m+k)*3+0]; | ||
236 | + float y2=xyz2[(i*m+k)*3+1]; | ||
237 | + float z2=xyz2[(i*m+k)*3+2]; | ||
238 | + float subsumx=0,subsumy=0,subsumz=0; | ||
239 | + for (int j=threadIdx.x;j<n;j+=blockDim.x){ | ||
240 | + float x1=x2-xyz1[(i*n+j)*3+0]; | ||
241 | + float y1=y2-xyz1[(i*n+j)*3+1]; | ||
242 | + float z1=z2-xyz1[(i*n+j)*3+2]; | ||
243 | + float d=match[i*n*m+k*n+j]*rsqrtf(fmaxf(x1*x1+y1*y1+z1*z1,1e-20f)); | ||
244 | + subsumx+=x1*d; | ||
245 | + subsumy+=y1*d; | ||
246 | + subsumz+=z1*d; | ||
247 | + } | ||
248 | + sum_grad[threadIdx.x*3+0]=subsumx; | ||
249 | + sum_grad[threadIdx.x*3+1]=subsumy; | ||
250 | + sum_grad[threadIdx.x*3+2]=subsumz; | ||
251 | + for (int j=1;j<blockDim.x;j<<=1){ | ||
252 | + __syncthreads(); | ||
253 | + int j1=threadIdx.x; | ||
254 | + int j2=threadIdx.x+j; | ||
255 | + if ((j1&j)==0 && j2<blockDim.x){ | ||
256 | + sum_grad[j1*3+0]+=sum_grad[j2*3+0]; | ||
257 | + sum_grad[j1*3+1]+=sum_grad[j2*3+1]; | ||
258 | + sum_grad[j1*3+2]+=sum_grad[j2*3+2]; | ||
259 | + } | ||
260 | + } | ||
261 | + if (threadIdx.x==0){ | ||
262 | + grad2[(i*m+k)*3+0]=sum_grad[0]; | ||
263 | + grad2[(i*m+k)*3+1]=sum_grad[1]; | ||
264 | + grad2[(i*m+k)*3+2]=sum_grad[2]; | ||
265 | + } | ||
266 | + __syncthreads(); | ||
267 | + } | ||
268 | + } | ||
269 | +} | ||
270 | +__global__ void matchcostgrad1(int b,int n,int m,const float * __restrict__ xyz1,const float * __restrict__ xyz2,const float * __restrict__ match,float * __restrict__ grad1){ | ||
271 | + for (int i=blockIdx.x;i<b;i+=gridDim.x){ | ||
272 | + for (int l=threadIdx.x;l<n;l+=blockDim.x){ | ||
273 | + float x1=xyz1[i*n*3+l*3+0]; | ||
274 | + float y1=xyz1[i*n*3+l*3+1]; | ||
275 | + float z1=xyz1[i*n*3+l*3+2]; | ||
276 | + float dx=0,dy=0,dz=0; | ||
277 | + for (int k=0;k<m;k++){ | ||
278 | + float x2=xyz2[i*m*3+k*3+0]; | ||
279 | + float y2=xyz2[i*m*3+k*3+1]; | ||
280 | + float z2=xyz2[i*m*3+k*3+2]; | ||
281 | + float d=match[i*n*m+k*n+l]*rsqrtf(fmaxf((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2)+(z1-z2)*(z1-z2),1e-20f)); | ||
282 | + dx+=(x1-x2)*d; | ||
283 | + dy+=(y1-y2)*d; | ||
284 | + dz+=(z1-z2)*d; | ||
285 | + } | ||
286 | + grad1[i*n*3+l*3+0]=dx; | ||
287 | + grad1[i*n*3+l*3+1]=dy; | ||
288 | + grad1[i*n*3+l*3+2]=dz; | ||
289 | + } | ||
290 | + } | ||
291 | +} | ||
292 | +void matchcostgradLauncher(int b,int n,int m,const float * xyz1,const float * xyz2,const float * match,float * grad1,float * grad2){ | ||
293 | + matchcostgrad1<<<32,512>>>(b,n,m,xyz1,xyz2,match,grad1); | ||
294 | + matchcostgrad2<<<dim3(32,32),256>>>(b,n,m,xyz1,xyz2,match,grad2); | ||
295 | +} | ||
296 | + |
1 | +import tensorflow as tf | ||
2 | +from tensorflow.python.framework import ops | ||
3 | +import os.path as osp | ||
4 | + | ||
5 | +base_dir = osp.dirname(osp.abspath(__file__)) | ||
6 | + | ||
7 | +approxmatch_module = tf.load_op_library(osp.join(base_dir, 'tf_approxmatch_so.so')) | ||
8 | + | ||
9 | + | ||
10 | +def approx_match(xyz1,xyz2): | ||
11 | + ''' | ||
12 | +input: | ||
13 | + xyz1 : batch_size * #dataset_points * 3 | ||
14 | + xyz2 : batch_size * #query_points * 3 | ||
15 | +returns: | ||
16 | + match : batch_size * #query_points * #dataset_points | ||
17 | + ''' | ||
18 | + return approxmatch_module.approx_match(xyz1,xyz2) | ||
19 | +ops.NoGradient('ApproxMatch') | ||
20 | +#@tf.RegisterShape('ApproxMatch') | ||
21 | +@ops.RegisterShape('ApproxMatch') | ||
22 | +def _approx_match_shape(op): | ||
23 | + shape1=op.inputs[0].get_shape().with_rank(3) | ||
24 | + shape2=op.inputs[1].get_shape().with_rank(3) | ||
25 | + return [tf.TensorShape([shape1.dims[0],shape2.dims[1],shape1.dims[1]])] | ||
26 | + | ||
27 | +def match_cost(xyz1,xyz2,match): | ||
28 | + ''' | ||
29 | +input: | ||
30 | + xyz1 : batch_size * #dataset_points * 3 | ||
31 | + xyz2 : batch_size * #query_points * 3 | ||
32 | + match : batch_size * #query_points * #dataset_points | ||
33 | +returns: | ||
34 | + cost : batch_size | ||
35 | + ''' | ||
36 | + return approxmatch_module.match_cost(xyz1,xyz2,match) | ||
37 | +#@tf.RegisterShape('MatchCost') | ||
38 | +@ops.RegisterShape('MatchCost') | ||
39 | +def _match_cost_shape(op): | ||
40 | + shape1=op.inputs[0].get_shape().with_rank(3) | ||
41 | + shape2=op.inputs[1].get_shape().with_rank(3) | ||
42 | + shape3=op.inputs[2].get_shape().with_rank(3) | ||
43 | + return [tf.TensorShape([shape1.dims[0]])] | ||
44 | +@tf.RegisterGradient('MatchCost') | ||
45 | +def _match_cost_grad(op,grad_cost): | ||
46 | + xyz1=op.inputs[0] | ||
47 | + xyz2=op.inputs[1] | ||
48 | + match=op.inputs[2] | ||
49 | + grad_1,grad_2=approxmatch_module.match_cost_grad(xyz1,xyz2,match) | ||
50 | + return [grad_1*tf.expand_dims(tf.expand_dims(grad_cost,1),2),grad_2*tf.expand_dims(tf.expand_dims(grad_cost,1),2),None] | ||
51 | + | ||
52 | +if __name__=='__main__': | ||
53 | + alpha=0.5 | ||
54 | + beta=2.0 | ||
55 | + import bestmatch | ||
56 | + import numpy as np | ||
57 | + import math | ||
58 | + import random | ||
59 | + import cv2 | ||
60 | + | ||
61 | + import tf_nndistance | ||
62 | + | ||
63 | + npoint=100 | ||
64 | + | ||
65 | + with tf.device('/gpu:2'): | ||
66 | + pt_in=tf.placeholder(tf.float32,shape=(1,npoint*4,3)) | ||
67 | + mypoints=tf.Variable(np.random.randn(1,npoint,3).astype('float32')) | ||
68 | + match=approx_match(pt_in,mypoints) | ||
69 | + loss=tf.reduce_sum(match_cost(pt_in,mypoints,match)) | ||
70 | + #match=approx_match(mypoints,pt_in) | ||
71 | + #loss=tf.reduce_sum(match_cost(mypoints,pt_in,match)) | ||
72 | + #distf,_,distb,_=tf_nndistance.nn_distance(pt_in,mypoints) | ||
73 | + #loss=tf.reduce_sum((distf+1e-9)**0.5)*0.5+tf.reduce_sum((distb+1e-9)**0.5)*0.5 | ||
74 | + #loss=tf.reduce_max((distf+1e-9)**0.5)*0.5*npoint+tf.reduce_max((distb+1e-9)**0.5)*0.5*npoint | ||
75 | + | ||
76 | + optimizer=tf.train.GradientDescentOptimizer(1e-4).minimize(loss) | ||
77 | + with tf.Session('') as sess: | ||
78 | + sess.run(tf.initialize_all_variables()) | ||
79 | + while True: | ||
80 | + meanloss=0 | ||
81 | + meantrueloss=0 | ||
82 | + for i in xrange(1001): | ||
83 | + #phi=np.random.rand(4*npoint)*math.pi*2 | ||
84 | + #tpoints=(np.hstack([np.cos(phi)[:,None],np.sin(phi)[:,None],(phi*0)[:,None]])*random.random())[None,:,:] | ||
85 | + #tpoints=((np.random.rand(400)-0.5)[:,None]*[0,2,0]+[(random.random()-0.5)*2,0,0]).astype('float32')[None,:,:] | ||
86 | + tpoints=np.hstack([np.linspace(-1,1,400)[:,None],(random.random()*2*np.linspace(1,0,400)**2)[:,None],np.zeros((400,1))])[None,:,:] | ||
87 | + trainloss,_=sess.run([loss,optimizer],feed_dict={pt_in:tpoints.astype('float32')}) | ||
88 | + trainloss,trainmatch=sess.run([loss,match],feed_dict={pt_in:tpoints.astype('float32')}) | ||
89 | + #trainmatch=trainmatch.transpose((0,2,1)) | ||
90 | + show=np.zeros((400,400,3),dtype='uint8')^255 | ||
91 | + trainmypoints=sess.run(mypoints) | ||
92 | + for i in xrange(len(tpoints[0])): | ||
93 | + u=np.random.choice(range(len(trainmypoints[0])),p=trainmatch[0].T[i]) | ||
94 | + cv2.line(show, | ||
95 | + (int(tpoints[0][i,1]*100+200),int(tpoints[0][i,0]*100+200)), | ||
96 | + (int(trainmypoints[0][u,1]*100+200),int(trainmypoints[0][u,0]*100+200)), | ||
97 | + cv2.cv.CV_RGB(0,255,0)) | ||
98 | + for x,y,z in tpoints[0]: | ||
99 | + cv2.circle(show,(int(y*100+200),int(x*100+200)),2,cv2.cv.CV_RGB(255,0,0)) | ||
100 | + for x,y,z in trainmypoints[0]: | ||
101 | + cv2.circle(show,(int(y*100+200),int(x*100+200)),3,cv2.cv.CV_RGB(0,0,255)) | ||
102 | + cost=((tpoints[0][:,None,:]-np.repeat(trainmypoints[0][None,:,:],4,axis=1))**2).sum(axis=2)**0.5 | ||
103 | + #trueloss=bestmatch.bestmatch(cost)[0] | ||
104 | + print(trainloss) #,trueloss | ||
105 | + cv2.imshow('show',show) | ||
106 | + cmd=cv2.waitKey(10)%256 | ||
107 | + if cmd==ord('q'): | ||
108 | + break |
1 | +#include "tensorflow/core/framework/op.h" | ||
2 | +#include "tensorflow/core/framework/op_kernel.h" | ||
3 | +REGISTER_OP("NnDistance") | ||
4 | + .Input("xyz1: float32") | ||
5 | + .Input("xyz2: float32") | ||
6 | + .Output("dist1: float32") | ||
7 | + .Output("idx1: int32") | ||
8 | + .Output("dist2: float32") | ||
9 | + .Output("idx2: int32"); | ||
10 | +REGISTER_OP("NnDistanceGrad") | ||
11 | + .Input("xyz1: float32") | ||
12 | + .Input("xyz2: float32") | ||
13 | + .Input("grad_dist1: float32") | ||
14 | + .Input("idx1: int32") | ||
15 | + .Input("grad_dist2: float32") | ||
16 | + .Input("idx2: int32") | ||
17 | + .Output("grad_xyz1: float32") | ||
18 | + .Output("grad_xyz2: float32"); | ||
19 | +using namespace tensorflow; | ||
20 | + | ||
21 | +static void nnsearch(int b,int n,int m,const float * xyz1,const float * xyz2,float * dist,int * idx){ | ||
22 | + for (int i=0;i<b;i++){ | ||
23 | + for (int j=0;j<n;j++){ | ||
24 | + float x1=xyz1[(i*n+j)*3+0]; | ||
25 | + float y1=xyz1[(i*n+j)*3+1]; | ||
26 | + float z1=xyz1[(i*n+j)*3+2]; | ||
27 | + double best=0; | ||
28 | + int besti=0; | ||
29 | + for (int k=0;k<m;k++){ | ||
30 | + float x2=xyz2[(i*m+k)*3+0]-x1; | ||
31 | + float y2=xyz2[(i*m+k)*3+1]-y1; | ||
32 | + float z2=xyz2[(i*m+k)*3+2]-z1; | ||
33 | + double d=x2*x2+y2*y2+z2*z2; | ||
34 | + if (k==0 || d<best){ | ||
35 | + best=d; | ||
36 | + besti=k; | ||
37 | + } | ||
38 | + } | ||
39 | + dist[i*n+j]=best; | ||
40 | + idx[i*n+j]=besti; | ||
41 | + } | ||
42 | + } | ||
43 | +} | ||
44 | + | ||
45 | +class NnDistanceOp : public OpKernel{ | ||
46 | + public: | ||
47 | + explicit NnDistanceOp(OpKernelConstruction* context):OpKernel(context){} | ||
48 | + void Compute(OpKernelContext * context)override{ | ||
49 | + const Tensor& xyz1_tensor=context->input(0); | ||
50 | + const Tensor& xyz2_tensor=context->input(1); | ||
51 | + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz1 be of shape (batch,#points,3)")); | ||
52 | + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz1")); | ||
53 | + int b=xyz1_tensor.shape().dim_size(0); | ||
54 | + int n=xyz1_tensor.shape().dim_size(1); | ||
55 | + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz2 be of shape (batch,#points,3)")); | ||
56 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz2")); | ||
57 | + int m=xyz2_tensor.shape().dim_size(1); | ||
58 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistance expects xyz1 and xyz2 have same batch size")); | ||
59 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
60 | + const float * xyz1=&xyz1_flat(0); | ||
61 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
62 | + const float * xyz2=&xyz2_flat(0); | ||
63 | + Tensor * dist1_tensor=NULL; | ||
64 | + Tensor * idx1_tensor=NULL; | ||
65 | + Tensor * dist2_tensor=NULL; | ||
66 | + Tensor * idx2_tensor=NULL; | ||
67 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n},&dist1_tensor)); | ||
68 | + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,n},&idx1_tensor)); | ||
69 | + auto dist1_flat=dist1_tensor->flat<float>(); | ||
70 | + auto idx1_flat=idx1_tensor->flat<int>(); | ||
71 | + OP_REQUIRES_OK(context,context->allocate_output(2,TensorShape{b,m},&dist2_tensor)); | ||
72 | + OP_REQUIRES_OK(context,context->allocate_output(3,TensorShape{b,m},&idx2_tensor)); | ||
73 | + auto dist2_flat=dist2_tensor->flat<float>(); | ||
74 | + auto idx2_flat=idx2_tensor->flat<int>(); | ||
75 | + float * dist1=&(dist1_flat(0)); | ||
76 | + int * idx1=&(idx1_flat(0)); | ||
77 | + float * dist2=&(dist2_flat(0)); | ||
78 | + int * idx2=&(idx2_flat(0)); | ||
79 | + nnsearch(b,n,m,xyz1,xyz2,dist1,idx1); | ||
80 | + nnsearch(b,m,n,xyz2,xyz1,dist2,idx2); | ||
81 | + } | ||
82 | +}; | ||
83 | +REGISTER_KERNEL_BUILDER(Name("NnDistance").Device(DEVICE_CPU), NnDistanceOp); | ||
84 | +class NnDistanceGradOp : public OpKernel{ | ||
85 | + public: | ||
86 | + explicit NnDistanceGradOp(OpKernelConstruction* context):OpKernel(context){} | ||
87 | + void Compute(OpKernelContext * context)override{ | ||
88 | + const Tensor& xyz1_tensor=context->input(0); | ||
89 | + const Tensor& xyz2_tensor=context->input(1); | ||
90 | + const Tensor& grad_dist1_tensor=context->input(2); | ||
91 | + const Tensor& idx1_tensor=context->input(3); | ||
92 | + const Tensor& grad_dist2_tensor=context->input(4); | ||
93 | + const Tensor& idx2_tensor=context->input(5); | ||
94 | + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz1 be of shape (batch,#points,3)")); | ||
95 | + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz1")); | ||
96 | + int b=xyz1_tensor.shape().dim_size(0); | ||
97 | + int n=xyz1_tensor.shape().dim_size(1); | ||
98 | + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz2 be of shape (batch,#points,3)")); | ||
99 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz2")); | ||
100 | + int m=xyz2_tensor.shape().dim_size(1); | ||
101 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistanceGrad expects xyz1 and xyz2 have same batch size")); | ||
102 | + OP_REQUIRES(context,grad_dist1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires grad_dist1 be of shape(batch,#points)")); | ||
103 | + OP_REQUIRES(context,idx1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires idx1 be of shape(batch,#points)")); | ||
104 | + OP_REQUIRES(context,grad_dist2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires grad_dist2 be of shape(batch,#points)")); | ||
105 | + OP_REQUIRES(context,idx2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires idx2 be of shape(batch,#points)")); | ||
106 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
107 | + const float * xyz1=&xyz1_flat(0); | ||
108 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
109 | + const float * xyz2=&xyz2_flat(0); | ||
110 | + auto idx1_flat=idx1_tensor.flat<int>(); | ||
111 | + const int * idx1=&idx1_flat(0); | ||
112 | + auto idx2_flat=idx2_tensor.flat<int>(); | ||
113 | + const int * idx2=&idx2_flat(0); | ||
114 | + auto grad_dist1_flat=grad_dist1_tensor.flat<float>(); | ||
115 | + const float * grad_dist1=&grad_dist1_flat(0); | ||
116 | + auto grad_dist2_flat=grad_dist2_tensor.flat<float>(); | ||
117 | + const float * grad_dist2=&grad_dist2_flat(0); | ||
118 | + Tensor * grad_xyz1_tensor=NULL; | ||
119 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad_xyz1_tensor)); | ||
120 | + Tensor * grad_xyz2_tensor=NULL; | ||
121 | + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad_xyz2_tensor)); | ||
122 | + auto grad_xyz1_flat=grad_xyz1_tensor->flat<float>(); | ||
123 | + float * grad_xyz1=&grad_xyz1_flat(0); | ||
124 | + auto grad_xyz2_flat=grad_xyz2_tensor->flat<float>(); | ||
125 | + float * grad_xyz2=&grad_xyz2_flat(0); | ||
126 | + for (int i=0;i<b*n*3;i++) | ||
127 | + grad_xyz1[i]=0; | ||
128 | + for (int i=0;i<b*m*3;i++) | ||
129 | + grad_xyz2[i]=0; | ||
130 | + for (int i=0;i<b;i++){ | ||
131 | + for (int j=0;j<n;j++){ | ||
132 | + float x1=xyz1[(i*n+j)*3+0]; | ||
133 | + float y1=xyz1[(i*n+j)*3+1]; | ||
134 | + float z1=xyz1[(i*n+j)*3+2]; | ||
135 | + int j2=idx1[i*n+j]; | ||
136 | + float x2=xyz2[(i*m+j2)*3+0]; | ||
137 | + float y2=xyz2[(i*m+j2)*3+1]; | ||
138 | + float z2=xyz2[(i*m+j2)*3+2]; | ||
139 | + float g=grad_dist1[i*n+j]*2; | ||
140 | + grad_xyz1[(i*n+j)*3+0]+=g*(x1-x2); | ||
141 | + grad_xyz1[(i*n+j)*3+1]+=g*(y1-y2); | ||
142 | + grad_xyz1[(i*n+j)*3+2]+=g*(z1-z2); | ||
143 | + grad_xyz2[(i*m+j2)*3+0]-=(g*(x1-x2)); | ||
144 | + grad_xyz2[(i*m+j2)*3+1]-=(g*(y1-y2)); | ||
145 | + grad_xyz2[(i*m+j2)*3+2]-=(g*(z1-z2)); | ||
146 | + } | ||
147 | + for (int j=0;j<m;j++){ | ||
148 | + float x1=xyz2[(i*m+j)*3+0]; | ||
149 | + float y1=xyz2[(i*m+j)*3+1]; | ||
150 | + float z1=xyz2[(i*m+j)*3+2]; | ||
151 | + int j2=idx2[i*m+j]; | ||
152 | + float x2=xyz1[(i*n+j2)*3+0]; | ||
153 | + float y2=xyz1[(i*n+j2)*3+1]; | ||
154 | + float z2=xyz1[(i*n+j2)*3+2]; | ||
155 | + float g=grad_dist2[i*m+j]*2; | ||
156 | + grad_xyz2[(i*m+j)*3+0]+=g*(x1-x2); | ||
157 | + grad_xyz2[(i*m+j)*3+1]+=g*(y1-y2); | ||
158 | + grad_xyz2[(i*m+j)*3+2]+=g*(z1-z2); | ||
159 | + grad_xyz1[(i*n+j2)*3+0]-=(g*(x1-x2)); | ||
160 | + grad_xyz1[(i*n+j2)*3+1]-=(g*(y1-y2)); | ||
161 | + grad_xyz1[(i*n+j2)*3+2]-=(g*(z1-z2)); | ||
162 | + } | ||
163 | + } | ||
164 | + } | ||
165 | +}; | ||
166 | +REGISTER_KERNEL_BUILDER(Name("NnDistanceGrad").Device(DEVICE_CPU), NnDistanceGradOp); | ||
167 | + | ||
168 | +void NmDistanceKernelLauncher(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i); | ||
169 | +class NnDistanceGpuOp : public OpKernel{ | ||
170 | + public: | ||
171 | + explicit NnDistanceGpuOp(OpKernelConstruction* context):OpKernel(context){} | ||
172 | + void Compute(OpKernelContext * context)override{ | ||
173 | + const Tensor& xyz1_tensor=context->input(0); | ||
174 | + const Tensor& xyz2_tensor=context->input(1); | ||
175 | + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz1 be of shape (batch,#points,3)")); | ||
176 | + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz1")); | ||
177 | + int b=xyz1_tensor.shape().dim_size(0); | ||
178 | + int n=xyz1_tensor.shape().dim_size(1); | ||
179 | + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistance requires xyz2 be of shape (batch,#points,3)")); | ||
180 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistance only accepts 3d point set xyz2")); | ||
181 | + int m=xyz2_tensor.shape().dim_size(1); | ||
182 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistance expects xyz1 and xyz2 have same batch size")); | ||
183 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
184 | + const float * xyz1=&xyz1_flat(0); | ||
185 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
186 | + const float * xyz2=&xyz2_flat(0); | ||
187 | + Tensor * dist1_tensor=NULL; | ||
188 | + Tensor * idx1_tensor=NULL; | ||
189 | + Tensor * dist2_tensor=NULL; | ||
190 | + Tensor * idx2_tensor=NULL; | ||
191 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n},&dist1_tensor)); | ||
192 | + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,n},&idx1_tensor)); | ||
193 | + auto dist1_flat=dist1_tensor->flat<float>(); | ||
194 | + auto idx1_flat=idx1_tensor->flat<int>(); | ||
195 | + OP_REQUIRES_OK(context,context->allocate_output(2,TensorShape{b,m},&dist2_tensor)); | ||
196 | + OP_REQUIRES_OK(context,context->allocate_output(3,TensorShape{b,m},&idx2_tensor)); | ||
197 | + auto dist2_flat=dist2_tensor->flat<float>(); | ||
198 | + auto idx2_flat=idx2_tensor->flat<int>(); | ||
199 | + float * dist1=&(dist1_flat(0)); | ||
200 | + int * idx1=&(idx1_flat(0)); | ||
201 | + float * dist2=&(dist2_flat(0)); | ||
202 | + int * idx2=&(idx2_flat(0)); | ||
203 | + NmDistanceKernelLauncher(b,n,xyz1,m,xyz2,dist1,idx1,dist2,idx2); | ||
204 | + } | ||
205 | +}; | ||
206 | +REGISTER_KERNEL_BUILDER(Name("NnDistance").Device(DEVICE_GPU), NnDistanceGpuOp); | ||
207 | + | ||
208 | +void NmDistanceGradKernelLauncher(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2); | ||
209 | +class NnDistanceGradGpuOp : public OpKernel{ | ||
210 | + public: | ||
211 | + explicit NnDistanceGradGpuOp(OpKernelConstruction* context):OpKernel(context){} | ||
212 | + void Compute(OpKernelContext * context)override{ | ||
213 | + const Tensor& xyz1_tensor=context->input(0); | ||
214 | + const Tensor& xyz2_tensor=context->input(1); | ||
215 | + const Tensor& grad_dist1_tensor=context->input(2); | ||
216 | + const Tensor& idx1_tensor=context->input(3); | ||
217 | + const Tensor& grad_dist2_tensor=context->input(4); | ||
218 | + const Tensor& idx2_tensor=context->input(5); | ||
219 | + OP_REQUIRES(context,xyz1_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz1 be of shape (batch,#points,3)")); | ||
220 | + OP_REQUIRES(context,xyz1_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz1")); | ||
221 | + int b=xyz1_tensor.shape().dim_size(0); | ||
222 | + int n=xyz1_tensor.shape().dim_size(1); | ||
223 | + OP_REQUIRES(context,xyz2_tensor.dims()==3,errors::InvalidArgument("NnDistanceGrad requires xyz2 be of shape (batch,#points,3)")); | ||
224 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(2)==3,errors::InvalidArgument("NnDistanceGrad only accepts 3d point set xyz2")); | ||
225 | + int m=xyz2_tensor.shape().dim_size(1); | ||
226 | + OP_REQUIRES(context,xyz2_tensor.shape().dim_size(0)==b,errors::InvalidArgument("NnDistanceGrad expects xyz1 and xyz2 have same batch size")); | ||
227 | + OP_REQUIRES(context,grad_dist1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires grad_dist1 be of shape(batch,#points)")); | ||
228 | + OP_REQUIRES(context,idx1_tensor.shape()==(TensorShape{b,n}),errors::InvalidArgument("NnDistanceGrad requires idx1 be of shape(batch,#points)")); | ||
229 | + OP_REQUIRES(context,grad_dist2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires grad_dist2 be of shape(batch,#points)")); | ||
230 | + OP_REQUIRES(context,idx2_tensor.shape()==(TensorShape{b,m}),errors::InvalidArgument("NnDistanceGrad requires idx2 be of shape(batch,#points)")); | ||
231 | + auto xyz1_flat=xyz1_tensor.flat<float>(); | ||
232 | + const float * xyz1=&xyz1_flat(0); | ||
233 | + auto xyz2_flat=xyz2_tensor.flat<float>(); | ||
234 | + const float * xyz2=&xyz2_flat(0); | ||
235 | + auto idx1_flat=idx1_tensor.flat<int>(); | ||
236 | + const int * idx1=&idx1_flat(0); | ||
237 | + auto idx2_flat=idx2_tensor.flat<int>(); | ||
238 | + const int * idx2=&idx2_flat(0); | ||
239 | + auto grad_dist1_flat=grad_dist1_tensor.flat<float>(); | ||
240 | + const float * grad_dist1=&grad_dist1_flat(0); | ||
241 | + auto grad_dist2_flat=grad_dist2_tensor.flat<float>(); | ||
242 | + const float * grad_dist2=&grad_dist2_flat(0); | ||
243 | + Tensor * grad_xyz1_tensor=NULL; | ||
244 | + OP_REQUIRES_OK(context,context->allocate_output(0,TensorShape{b,n,3},&grad_xyz1_tensor)); | ||
245 | + Tensor * grad_xyz2_tensor=NULL; | ||
246 | + OP_REQUIRES_OK(context,context->allocate_output(1,TensorShape{b,m,3},&grad_xyz2_tensor)); | ||
247 | + auto grad_xyz1_flat=grad_xyz1_tensor->flat<float>(); | ||
248 | + float * grad_xyz1=&grad_xyz1_flat(0); | ||
249 | + auto grad_xyz2_flat=grad_xyz2_tensor->flat<float>(); | ||
250 | + float * grad_xyz2=&grad_xyz2_flat(0); | ||
251 | + NmDistanceGradKernelLauncher(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_dist2,idx2,grad_xyz1,grad_xyz2); | ||
252 | + } | ||
253 | +}; | ||
254 | +REGISTER_KERNEL_BUILDER(Name("NnDistanceGrad").Device(DEVICE_GPU), NnDistanceGradGpuOp); |
1 | +#if GOOGLE_CUDA | ||
2 | +#define EIGEN_USE_GPU | ||
3 | +// #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" | ||
4 | + | ||
5 | +__global__ void NmDistanceKernel(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i){ | ||
6 | + const int batch=512; | ||
7 | + __shared__ float buf[batch*3]; | ||
8 | + for (int i=blockIdx.x;i<b;i+=gridDim.x){ | ||
9 | + for (int k2=0;k2<m;k2+=batch){ | ||
10 | + int end_k=min(m,k2+batch)-k2; | ||
11 | + for (int j=threadIdx.x;j<end_k*3;j+=blockDim.x){ | ||
12 | + buf[j]=xyz2[(i*m+k2)*3+j]; | ||
13 | + } | ||
14 | + __syncthreads(); | ||
15 | + for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){ | ||
16 | + float x1=xyz[(i*n+j)*3+0]; | ||
17 | + float y1=xyz[(i*n+j)*3+1]; | ||
18 | + float z1=xyz[(i*n+j)*3+2]; | ||
19 | + int best_i=0; | ||
20 | + float best=0; | ||
21 | + int end_ka=end_k-(end_k&3); | ||
22 | + if (end_ka==batch){ | ||
23 | + for (int k=0;k<batch;k+=4){ | ||
24 | + { | ||
25 | + float x2=buf[k*3+0]-x1; | ||
26 | + float y2=buf[k*3+1]-y1; | ||
27 | + float z2=buf[k*3+2]-z1; | ||
28 | + float d=x2*x2+y2*y2+z2*z2; | ||
29 | + if (k==0 || d<best){ | ||
30 | + best=d; | ||
31 | + best_i=k+k2; | ||
32 | + } | ||
33 | + } | ||
34 | + { | ||
35 | + float x2=buf[k*3+3]-x1; | ||
36 | + float y2=buf[k*3+4]-y1; | ||
37 | + float z2=buf[k*3+5]-z1; | ||
38 | + float d=x2*x2+y2*y2+z2*z2; | ||
39 | + if (d<best){ | ||
40 | + best=d; | ||
41 | + best_i=k+k2+1; | ||
42 | + } | ||
43 | + } | ||
44 | + { | ||
45 | + float x2=buf[k*3+6]-x1; | ||
46 | + float y2=buf[k*3+7]-y1; | ||
47 | + float z2=buf[k*3+8]-z1; | ||
48 | + float d=x2*x2+y2*y2+z2*z2; | ||
49 | + if (d<best){ | ||
50 | + best=d; | ||
51 | + best_i=k+k2+2; | ||
52 | + } | ||
53 | + } | ||
54 | + { | ||
55 | + float x2=buf[k*3+9]-x1; | ||
56 | + float y2=buf[k*3+10]-y1; | ||
57 | + float z2=buf[k*3+11]-z1; | ||
58 | + float d=x2*x2+y2*y2+z2*z2; | ||
59 | + if (d<best){ | ||
60 | + best=d; | ||
61 | + best_i=k+k2+3; | ||
62 | + } | ||
63 | + } | ||
64 | + } | ||
65 | + }else{ | ||
66 | + for (int k=0;k<end_ka;k+=4){ | ||
67 | + { | ||
68 | + float x2=buf[k*3+0]-x1; | ||
69 | + float y2=buf[k*3+1]-y1; | ||
70 | + float z2=buf[k*3+2]-z1; | ||
71 | + float d=x2*x2+y2*y2+z2*z2; | ||
72 | + if (k==0 || d<best){ | ||
73 | + best=d; | ||
74 | + best_i=k+k2; | ||
75 | + } | ||
76 | + } | ||
77 | + { | ||
78 | + float x2=buf[k*3+3]-x1; | ||
79 | + float y2=buf[k*3+4]-y1; | ||
80 | + float z2=buf[k*3+5]-z1; | ||
81 | + float d=x2*x2+y2*y2+z2*z2; | ||
82 | + if (d<best){ | ||
83 | + best=d; | ||
84 | + best_i=k+k2+1; | ||
85 | + } | ||
86 | + } | ||
87 | + { | ||
88 | + float x2=buf[k*3+6]-x1; | ||
89 | + float y2=buf[k*3+7]-y1; | ||
90 | + float z2=buf[k*3+8]-z1; | ||
91 | + float d=x2*x2+y2*y2+z2*z2; | ||
92 | + if (d<best){ | ||
93 | + best=d; | ||
94 | + best_i=k+k2+2; | ||
95 | + } | ||
96 | + } | ||
97 | + { | ||
98 | + float x2=buf[k*3+9]-x1; | ||
99 | + float y2=buf[k*3+10]-y1; | ||
100 | + float z2=buf[k*3+11]-z1; | ||
101 | + float d=x2*x2+y2*y2+z2*z2; | ||
102 | + if (d<best){ | ||
103 | + best=d; | ||
104 | + best_i=k+k2+3; | ||
105 | + } | ||
106 | + } | ||
107 | + } | ||
108 | + } | ||
109 | + for (int k=end_ka;k<end_k;k++){ | ||
110 | + float x2=buf[k*3+0]-x1; | ||
111 | + float y2=buf[k*3+1]-y1; | ||
112 | + float z2=buf[k*3+2]-z1; | ||
113 | + float d=x2*x2+y2*y2+z2*z2; | ||
114 | + if (k==0 || d<best){ | ||
115 | + best=d; | ||
116 | + best_i=k+k2; | ||
117 | + } | ||
118 | + } | ||
119 | + if (k2==0 || result[(i*n+j)]>best){ | ||
120 | + result[(i*n+j)]=best; | ||
121 | + result_i[(i*n+j)]=best_i; | ||
122 | + } | ||
123 | + } | ||
124 | + __syncthreads(); | ||
125 | + } | ||
126 | + } | ||
127 | +} | ||
128 | +void NmDistanceKernelLauncher(int b,int n,const float * xyz,int m,const float * xyz2,float * result,int * result_i,float * result2,int * result2_i){ | ||
129 | + NmDistanceKernel<<<dim3(32,16,1),512>>>(b,n,xyz,m,xyz2,result,result_i); | ||
130 | + NmDistanceKernel<<<dim3(32,16,1),512>>>(b,m,xyz2,n,xyz,result2,result2_i); | ||
131 | +} | ||
132 | +__global__ void NmDistanceGradKernel(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,float * grad_xyz1,float * grad_xyz2){ | ||
133 | + for (int i=blockIdx.x;i<b;i+=gridDim.x){ | ||
134 | + for (int j=threadIdx.x+blockIdx.y*blockDim.x;j<n;j+=blockDim.x*gridDim.y){ | ||
135 | + float x1=xyz1[(i*n+j)*3+0]; | ||
136 | + float y1=xyz1[(i*n+j)*3+1]; | ||
137 | + float z1=xyz1[(i*n+j)*3+2]; | ||
138 | + int j2=idx1[i*n+j]; | ||
139 | + float x2=xyz2[(i*m+j2)*3+0]; | ||
140 | + float y2=xyz2[(i*m+j2)*3+1]; | ||
141 | + float z2=xyz2[(i*m+j2)*3+2]; | ||
142 | + float g=grad_dist1[i*n+j]*2; | ||
143 | + atomicAdd(&(grad_xyz1[(i*n+j)*3+0]),g*(x1-x2)); | ||
144 | + atomicAdd(&(grad_xyz1[(i*n+j)*3+1]),g*(y1-y2)); | ||
145 | + atomicAdd(&(grad_xyz1[(i*n+j)*3+2]),g*(z1-z2)); | ||
146 | + atomicAdd(&(grad_xyz2[(i*m+j2)*3+0]),-(g*(x1-x2))); | ||
147 | + atomicAdd(&(grad_xyz2[(i*m+j2)*3+1]),-(g*(y1-y2))); | ||
148 | + atomicAdd(&(grad_xyz2[(i*m+j2)*3+2]),-(g*(z1-z2))); | ||
149 | + } | ||
150 | + } | ||
151 | +} | ||
152 | +void NmDistanceGradKernelLauncher(int b,int n,const float * xyz1,int m,const float * xyz2,const float * grad_dist1,const int * idx1,const float * grad_dist2,const int * idx2,float * grad_xyz1,float * grad_xyz2){ | ||
153 | + cudaMemset(grad_xyz1,0,b*n*3*4); | ||
154 | + cudaMemset(grad_xyz2,0,b*m*3*4); | ||
155 | + NmDistanceGradKernel<<<dim3(1,16,1),256>>>(b,n,xyz1,m,xyz2,grad_dist1,idx1,grad_xyz1,grad_xyz2); | ||
156 | + NmDistanceGradKernel<<<dim3(1,16,1),256>>>(b,m,xyz2,n,xyz1,grad_dist2,idx2,grad_xyz2,grad_xyz1); | ||
157 | +} | ||
158 | + | ||
159 | +#endif |
1 | +import os, sys | ||
2 | +import tensorflow as tf | ||
3 | +from tensorflow.python.framework import ops | ||
4 | +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | ||
5 | +nn_distance_module=tf.load_op_library(os.path.join(BASE_DIR, 'tf_nndistance_so.so')) | ||
6 | + | ||
7 | +def nn_distance(xyz1,xyz2): | ||
8 | + ''' | ||
9 | +Computes the distance of nearest neighbors for a pair of point clouds | ||
10 | +input: xyz1: (batch_size,#points_1,3) the first point cloud | ||
11 | +input: xyz2: (batch_size,#points_2,3) the second point cloud | ||
12 | +output: dist1: (batch_size,#point_1) distance from first to second | ||
13 | +output: idx1: (batch_size,#point_1) nearest neighbor from first to second | ||
14 | +output: dist2: (batch_size,#point_2) distance from second to first | ||
15 | +output: idx2: (batch_size,#point_2) nearest neighbor from second to first | ||
16 | + ''' | ||
17 | + return nn_distance_module.nn_distance(xyz1,xyz2) | ||
18 | +#@tf.RegisterShape('NnDistance') | ||
19 | +#def _nn_distance_shape(op): | ||
20 | + #shape1=op.inputs[0].get_shape().with_rank(3) | ||
21 | + #shape2=op.inputs[1].get_shape().with_rank(3) | ||
22 | + #return [tf.TensorShape([shape1.dims[0],shape1.dims[1]]),tf.TensorShape([shape1.dims[0],shape1.dims[1]]), | ||
23 | + #tf.TensorShape([shape2.dims[0],shape2.dims[1]]),tf.TensorShape([shape2.dims[0],shape2.dims[1]])] | ||
24 | +@ops.RegisterGradient('NnDistance') | ||
25 | +def _nn_distance_grad(op,grad_dist1,grad_idx1,grad_dist2,grad_idx2): | ||
26 | + xyz1=op.inputs[0] | ||
27 | + xyz2=op.inputs[1] | ||
28 | + idx1=op.outputs[1] | ||
29 | + idx2=op.outputs[3] | ||
30 | + return nn_distance_module.nn_distance_grad(xyz1,xyz2,grad_dist1,idx1,grad_dist2,idx2) | ||
31 | + | ||
32 | + | ||
33 | +if __name__=='__main__': | ||
34 | + import numpy as np | ||
35 | + import random | ||
36 | + import time | ||
37 | + from tensorflow.python.ops.gradient_checker import compute_gradient | ||
38 | + random.seed(100) | ||
39 | + np.random.seed(100) | ||
40 | + with tf.Session('') as sess: | ||
41 | + xyz1=np.random.randn(32,16384,3).astype('float32') | ||
42 | + xyz2=np.random.randn(32,1024,3).astype('float32') | ||
43 | + #with tf.device('/gpu:0'): | ||
44 | + if True: | ||
45 | + inp1=tf.Variable(xyz1) | ||
46 | + inp2=tf.constant(xyz2) | ||
47 | + reta,retb,retc,retd=nn_distance(inp1,inp2) | ||
48 | + loss=tf.reduce_sum(reta)+tf.reduce_sum(retc) | ||
49 | + train=tf.train.GradientDescentOptimizer(learning_rate=0.05).minimize(loss) | ||
50 | + sess.run(tf.initialize_all_variables()) | ||
51 | + t0=time.time() | ||
52 | + t1=t0 | ||
53 | + best=1e100 | ||
54 | + for i in xrange(100): | ||
55 | + trainloss,_=sess.run([loss,train]) | ||
56 | + newt=time.time() | ||
57 | + best=min(best,newt-t1) | ||
58 | + print(i,trainloss,(newt-t0)/(i+1),best) | ||
59 | + t1=newt | ||
60 | + #print sess.run([inp1,retb,inp2,retd]) | ||
61 | + #grads=compute_gradient([inp1,inp2],[(16,32,3),(16,32,3)],loss,(1,),[xyz1,xyz2]) | ||
62 | + #for i,j in grads: | ||
63 | + #print i.shape,j.shape,np.mean(np.abs(i-j)),np.mean(np.abs(i)),np.mean(np.abs(j)) | ||
64 | + #for i in xrange(10): | ||
65 | + #t0=time.time() | ||
66 | + #a,b,c,d=sess.run([reta,retb,retc,retd],feed_dict={inp1:xyz1,inp2:xyz2}) | ||
67 | + #print 'time',time.time()-t0 | ||
68 | + #print a.shape,b.shape,c.shape,d.shape | ||
69 | + #print a.dtype,b.dtype,c.dtype,d.dtype | ||
70 | + #samples=np.array(random.sample(range(xyz2.shape[1]),100),dtype='int32') | ||
71 | + #dist1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).min(axis=-1) | ||
72 | + #idx1=((xyz1[:,samples,None,:]-xyz2[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1) | ||
73 | + #print np.abs(dist1-a[:,samples]).max() | ||
74 | + #print np.abs(idx1-b[:,samples]).max() | ||
75 | + #dist2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).min(axis=-1) | ||
76 | + #idx2=((xyz2[:,samples,None,:]-xyz1[:,None,:,:])**2).sum(axis=-1).argmin(axis=-1) | ||
77 | + #print np.abs(dist2-c[:,samples]).max() | ||
78 | + #print np.abs(idx2-d[:,samples]).max() | ||
79 | + |
code/pcn_modify/pcn/render/.gitignore
0 → 100644
code/pcn_modify/pcn/render/README.md
0 → 100644
1 | +This directory contains code that generates partial point clouds from ShapeNet models. To use it: | ||
2 | +1. Install [Blender](https://blender.org/download/). | ||
3 | +2. Create a model list. Each line of the model list should be in the format `[synset_id]/[model_id]`. | ||
4 | +3. Run `blender -b -P render_depth.py [ShapeNet directory] [model list] [output directory] [num scans per model]` to render the depth images. The images will be stored in OpenEXR format. | ||
5 | +4. Run `python3 process_exr.py [model list] [intrinsics file] [output directory] [num scans per model]` to convert the `.exr` files into 16 bit PNG depth images and point clouds in the model's coordinate frame. | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
code/pcn_modify/pcn/render/process_exr.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import Imath | ||
4 | +import OpenEXR | ||
5 | +import argparse | ||
6 | +import array | ||
7 | +import numpy as np | ||
8 | +import os | ||
9 | +from open3d import * | ||
10 | + | ||
11 | + | ||
12 | +def read_exr(exr_path, height, width): | ||
13 | + file = OpenEXR.InputFile(exr_path) | ||
14 | + depth_arr = array.array('f', file.channel('R', Imath.PixelType(Imath.PixelType.FLOAT))) | ||
15 | + depth = np.array(depth_arr).reshape((height, width)) | ||
16 | + depth[depth < 0] = 0 | ||
17 | + depth[np.isinf(depth)] = 0 | ||
18 | + return depth | ||
19 | + | ||
20 | + | ||
21 | +def depth2pcd(depth, intrinsics, pose): | ||
22 | + inv_K = np.linalg.inv(intrinsics) | ||
23 | + inv_K[2, 2] = -1 | ||
24 | + depth = np.flipud(depth) | ||
25 | + y, x = np.where(depth > 0) | ||
26 | + # image coordinates -> camera coordinates | ||
27 | + points = np.dot(inv_K, np.stack([x, y, np.ones_like(x)] * depth[y, x], 0)) | ||
28 | + # camera coordinates -> world coordinates | ||
29 | + points = np.dot(pose, np.concatenate([points, np.ones((1, points.shape[1]))], 0)).T[:, :3] | ||
30 | + return points | ||
31 | + | ||
32 | + | ||
33 | +if __name__ == '__main__': | ||
34 | + parser = argparse.ArgumentParser() | ||
35 | + parser.add_argument('list_file') | ||
36 | + parser.add_argument('intrinsics_file') | ||
37 | + parser.add_argument('output_dir') | ||
38 | + parser.add_argument('num_scans', type=int) | ||
39 | + args = parser.parse_args() | ||
40 | + | ||
41 | + with open(args.list_file) as file: | ||
42 | + model_list = file.read().splitlines() | ||
43 | + intrinsics = np.loadtxt(args.intrinsics_file) | ||
44 | + width = int(intrinsics[0, 2] * 2) | ||
45 | + height = int(intrinsics[1, 2] * 2) | ||
46 | + | ||
47 | + for model_id in model_list: | ||
48 | + depth_dir = os.path.join(args.output_dir, 'depth', model_id) | ||
49 | + pcd_dir = os.path.join(args.output_dir, 'pcd', model_id) | ||
50 | + os.makedirs(depth_dir, exist_ok=True) | ||
51 | + os.makedirs(pcd_dir, exist_ok=True) | ||
52 | + for i in range(args.num_scans): | ||
53 | + exr_path = os.path.join(args.output_dir, 'exr', model_id, '%d.exr' % i) | ||
54 | + pose_path = os.path.join(args.output_dir, 'pose', model_id, '%d.txt' % i) | ||
55 | + | ||
56 | + depth = read_exr(exr_path, height, width) | ||
57 | + depth_img = Image(np.uint16(depth * 1000)) | ||
58 | + write_image(os.path.join(depth_dir, '%d.png' % i), depth_img) | ||
59 | + | ||
60 | + pose = np.loadtxt(pose_path) | ||
61 | + points = depth2pcd(depth, intrinsics, pose) | ||
62 | + pcd = PointCloud() | ||
63 | + pcd.points = Vector3dVector(points) | ||
64 | + write_point_cloud(os.path.join(pcd_dir, '%d.pcd' % i), pcd) |
code/pcn_modify/pcn/render/render_depth.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import bpy | ||
4 | +import mathutils | ||
5 | +import numpy as np | ||
6 | +import os | ||
7 | +import sys | ||
8 | +import time | ||
9 | + | ||
10 | +# Usage: blender -b -P render_depth.py [ShapeNet directory] [model list] [output directory] [num scans per model] | ||
11 | + | ||
12 | + | ||
13 | +def random_pose(): | ||
14 | + angle_x = np.random.uniform() * 2 * np.pi | ||
15 | + angle_y = np.random.uniform() * 2 * np.pi | ||
16 | + angle_z = np.random.uniform() * 2 * np.pi | ||
17 | + Rx = np.array([[1, 0, 0], | ||
18 | + [0, np.cos(angle_x), -np.sin(angle_x)], | ||
19 | + [0, np.sin(angle_x), np.cos(angle_x)]]) | ||
20 | + Ry = np.array([[np.cos(angle_y), 0, np.sin(angle_y)], | ||
21 | + [0, 1, 0], | ||
22 | + [-np.sin(angle_y), 0, np.cos(angle_y)]]) | ||
23 | + Rz = np.array([[np.cos(angle_z), -np.sin(angle_z), 0], | ||
24 | + [np.sin(angle_z), np.cos(angle_z), 0], | ||
25 | + [0, 0, 1]]) | ||
26 | + R = np.dot(Rz, np.dot(Ry, Rx)) | ||
27 | + # Set camera pointing to the origin and 1 unit away from the origin | ||
28 | + t = np.expand_dims(R[:, 2], 1) | ||
29 | + pose = np.concatenate([np.concatenate([R, t], 1), [[0, 0, 0, 1]]], 0) | ||
30 | + return pose | ||
31 | + | ||
32 | + | ||
33 | +def setup_blender(width, height, focal_length): | ||
34 | + # camera | ||
35 | + camera = bpy.data.objects['Camera'] | ||
36 | + camera.data.angle = np.arctan(width / 2 / focal_length) * 2 | ||
37 | + | ||
38 | + # render layer | ||
39 | + scene = bpy.context.scene | ||
40 | + scene.render.filepath = 'buffer' | ||
41 | + scene.render.image_settings.color_depth = '16' | ||
42 | + scene.render.resolution_percentage = 100 | ||
43 | + scene.render.resolution_x = width | ||
44 | + scene.render.resolution_y = height | ||
45 | + | ||
46 | + # compositor nodes | ||
47 | + scene.use_nodes = True | ||
48 | + tree = scene.node_tree | ||
49 | + rl = tree.nodes.new('CompositorNodeRLayers') | ||
50 | + output = tree.nodes.new('CompositorNodeOutputFile') | ||
51 | + output.base_path = '' | ||
52 | + output.format.file_format = 'OPEN_EXR' | ||
53 | + tree.links.new(rl.outputs['Depth'], output.inputs[0]) | ||
54 | + | ||
55 | + # remove default cube | ||
56 | + bpy.data.objects['Cube'].select = True | ||
57 | + bpy.ops.object.delete() | ||
58 | + | ||
59 | + return scene, camera, output | ||
60 | + | ||
61 | + | ||
62 | +if __name__ == '__main__': | ||
63 | + model_dir = sys.argv[-4] | ||
64 | + list_path = sys.argv[-3] | ||
65 | + output_dir = sys.argv[-2] | ||
66 | + num_scans = int(sys.argv[-1]) | ||
67 | + | ||
68 | + width = 160 | ||
69 | + height = 120 | ||
70 | + focal = 100 | ||
71 | + scene, camera, output = setup_blender(width, height, focal) | ||
72 | + intrinsics = np.array([[focal, 0, width / 2], [0, focal, height / 2], [0, 0, 1]]) | ||
73 | + | ||
74 | + with open(os.path.join(list_path)) as file: | ||
75 | + model_list = [line.strip() for line in file] | ||
76 | + open('blender.log', 'w+').close() | ||
77 | + os.system('rm -rf %s' % output_dir) | ||
78 | + os.makedirs(output_dir) | ||
79 | + np.savetxt(os.path.join(output_dir, 'intrinsics.txt'), intrinsics, '%f') | ||
80 | + | ||
81 | + for model_id in model_list: | ||
82 | + start = time.time() | ||
83 | + exr_dir = os.path.join(output_dir, 'exr', model_id) | ||
84 | + pose_dir = os.path.join(output_dir, 'pose', model_id) | ||
85 | + os.makedirs(exr_dir) | ||
86 | + os.makedirs(pose_dir) | ||
87 | + | ||
88 | + # Redirect output to log file | ||
89 | + old_os_out = os.dup(1) | ||
90 | + os.close(1) | ||
91 | + os.open('blender.log', os.O_WRONLY) | ||
92 | + | ||
93 | + # Import mesh model | ||
94 | + model_path = os.path.join(model_dir, model_id, 'model.obj') | ||
95 | + bpy.ops.import_scene.obj(filepath=model_path) | ||
96 | + | ||
97 | + # Rotate model by 90 degrees around x-axis (z-up => y-up) to match ShapeNet's coordinates | ||
98 | + bpy.ops.transform.rotate(value=-np.pi / 2, axis=(1, 0, 0)) | ||
99 | + | ||
100 | + # Render | ||
101 | + for i in range(num_scans): | ||
102 | + scene.frame_set(i) | ||
103 | + pose = random_pose() | ||
104 | + camera.matrix_world = mathutils.Matrix(pose) | ||
105 | + output.file_slots[0].path = os.path.join(exr_dir, '#.exr') | ||
106 | + bpy.ops.render.render(write_still=True) | ||
107 | + np.savetxt(os.path.join(pose_dir, '%d.txt' % i), pose, '%f') | ||
108 | + | ||
109 | + # Clean up | ||
110 | + bpy.ops.object.delete() | ||
111 | + for m in bpy.data.meshes: | ||
112 | + bpy.data.meshes.remove(m) | ||
113 | + for m in bpy.data.materials: | ||
114 | + m.user_clear() | ||
115 | + bpy.data.materials.remove(m) | ||
116 | + | ||
117 | + # Show time | ||
118 | + os.close(1) | ||
119 | + os.dup(old_os_out) | ||
120 | + os.close(old_os_out) | ||
121 | + print('%s done, time=%.4f sec' % (model_id, time.time() - start)) |
code/pcn_modify/pcn/requirements.txt
0 → 100644
code/pcn_modify/pcn/test_shapenet.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import argparse | ||
4 | +import csv | ||
5 | +import importlib | ||
6 | +import models | ||
7 | +import numpy as np | ||
8 | +import os | ||
9 | +import tensorflow as tf | ||
10 | +import time | ||
11 | +from io_util import read_pcd, save_pcd | ||
12 | +from tf_util import chamfer, earth_mover | ||
13 | +from visu_util import plot_pcd_three_views | ||
14 | + | ||
15 | + | ||
16 | +def test(args): | ||
17 | + inputs = tf.placeholder(tf.float32, (1, None, 3)) | ||
18 | + npts = tf.placeholder(tf.int32, (1,)) | ||
19 | + gt = tf.placeholder(tf.float32, (1, args.num_gt_points, 3)) | ||
20 | + model_module = importlib.import_module('.%s' % args.model_type, 'models') | ||
21 | + model = model_module.Model(inputs, npts, gt, tf.constant(1.0)) | ||
22 | + | ||
23 | + output = tf.placeholder(tf.float32, (1, args.num_gt_points, 3)) | ||
24 | + cd_op = chamfer(output, gt) | ||
25 | + emd_op = earth_mover(output, gt) | ||
26 | + | ||
27 | + config = tf.ConfigProto() | ||
28 | + config.gpu_options.allow_growth = True | ||
29 | + config.allow_soft_placement = True | ||
30 | + sess = tf.Session(config=config) | ||
31 | + | ||
32 | + saver = tf.train.Saver() | ||
33 | + saver.restore(sess, args.checkpoint) | ||
34 | + | ||
35 | + os.makedirs(args.results_dir, exist_ok=True) | ||
36 | + csv_file = open(os.path.join(args.results_dir, 'results.csv'), 'w') | ||
37 | + writer = csv.writer(csv_file) | ||
38 | + writer.writerow(['id', 'cd', 'emd']) | ||
39 | + | ||
40 | + with open(args.list_path) as file: | ||
41 | + model_list = file.read().splitlines() | ||
42 | + total_time = 0 | ||
43 | + total_cd = 0 | ||
44 | + total_emd = 0 | ||
45 | + cd_per_cat = {} | ||
46 | + emd_per_cat = {} | ||
47 | + for i, model_id in enumerate(model_list): | ||
48 | + partial = read_pcd(os.path.join(args.data_dir, 'partial', '%s.pcd' % model_id)) | ||
49 | + complete = read_pcd(os.path.join(args.data_dir, 'complete', '%s.pcd' % model_id)) | ||
50 | + start = time.time() | ||
51 | + completion = sess.run(model.outputs, feed_dict={inputs: [partial], npts: [partial.shape[0]]}) | ||
52 | + total_time += time.time() - start | ||
53 | + cd, emd = sess.run([cd_op, emd_op], feed_dict={output: completion, gt: [complete]}) | ||
54 | + total_cd += cd | ||
55 | + total_emd += emd | ||
56 | + writer.writerow([model_id, cd, emd]) | ||
57 | + | ||
58 | + synset_id, model_id = model_id.split('/') | ||
59 | + if not cd_per_cat.get(synset_id): | ||
60 | + cd_per_cat[synset_id] = [] | ||
61 | + if not emd_per_cat.get(synset_id): | ||
62 | + emd_per_cat[synset_id] = [] | ||
63 | + cd_per_cat[synset_id].append(cd) | ||
64 | + emd_per_cat[synset_id].append(emd) | ||
65 | + | ||
66 | + if i % args.plot_freq == 0: | ||
67 | + os.makedirs(os.path.join(args.results_dir, 'plots', synset_id), exist_ok=True) | ||
68 | + plot_path = os.path.join(args.results_dir, 'plots', synset_id, '%s.png' % model_id) | ||
69 | + plot_pcd_three_views(plot_path, [partial, completion[0], complete], | ||
70 | + ['input', 'output', 'ground truth'], | ||
71 | + 'CD %.4f EMD %.4f' % (cd, emd), | ||
72 | + [5, 0.5, 0.5]) | ||
73 | + if args.save_pcd: | ||
74 | + os.makedirs(os.path.join(args.results_dir, 'pcds', synset_id), exist_ok=True) | ||
75 | + save_pcd(os.path.join(args.results_dir, 'pcds', '%s.pcd' % model_id), completion[0]) | ||
76 | + csv_file.close() | ||
77 | + sess.close() | ||
78 | + | ||
79 | + print('Average time: %f' % (total_time / len(model_list))) | ||
80 | + print('Average Chamfer distance: %f' % (total_cd / len(model_list))) | ||
81 | + print('Average Earth mover distance: %f' % (total_emd / len(model_list))) | ||
82 | + print('Chamfer distance per category') | ||
83 | + for synset_id in cd_per_cat.keys(): | ||
84 | + print(synset_id, '%f' % np.mean(cd_per_cat[synset_id])) | ||
85 | + print('Earth mover distance per category') | ||
86 | + for synset_id in emd_per_cat.keys(): | ||
87 | + print(synset_id, '%f' % np.mean(emd_per_cat[synset_id])) | ||
88 | + | ||
89 | + | ||
90 | +if __name__ == '__main__': | ||
91 | + parser = argparse.ArgumentParser() | ||
92 | + parser.add_argument('--list_path', default='data/shapenet/test.list') | ||
93 | + parser.add_argument('--data_dir', default='data/shapenet/test') | ||
94 | + parser.add_argument('--model_type', default='pcn_emd') | ||
95 | + parser.add_argument('--checkpoint', default='data/trained_models/pcn_emd') | ||
96 | + parser.add_argument('--results_dir', default='results/shapenet_pcn_emd') | ||
97 | + parser.add_argument('--num_gt_points', type=int, default=16384) | ||
98 | + parser.add_argument('--plot_freq', type=int, default=100) | ||
99 | + parser.add_argument('--save_pcd', action='store_true') | ||
100 | + args = parser.parse_args() | ||
101 | + | ||
102 | + test(args) |
1 | +{ | ||
2 | + "cells": [ | ||
3 | + { | ||
4 | + "cell_type": "markdown", | ||
5 | + "metadata": {}, | ||
6 | + "source": [ | ||
7 | + "valid.lmdb를 대상으로 \n", | ||
8 | + "1. cd, emd cost function 값 확인\n", | ||
9 | + "2. 각 표본에 대한 결과 출력\n", | ||
10 | + "3. pcd값 저장해둬서 어떤 결과인지 직접 확인하자 - (이게 발표자료로서 의미가 있을것 같음)" | ||
11 | + ] | ||
12 | + }, | ||
13 | + { | ||
14 | + "cell_type": "code", | ||
15 | + "execution_count": null, | ||
16 | + "metadata": {}, | ||
17 | + "outputs": [], | ||
18 | + "source": [ | ||
19 | + " num_eval_steps = num_valid // args.batch_size\n", | ||
20 | + " total_loss = 0\n", | ||
21 | + " total_time = 0\n", | ||
22 | + " sess.run(tf.local_variables_initializer())\n", | ||
23 | + " for i in range(num_eval_steps):\n", | ||
24 | + " start = time.time()\n", | ||
25 | + " ids, inputs, npts, gt = next(valid_gen)\n", | ||
26 | + " feed_dict = {inputs_pl: inputs,my_inputs_pl:my_inputs, npts_pl: npts, gt_pl: gt, is_training_pl: False}\n", | ||
27 | + " loss, _ = sess.run([model.loss, model.update], feed_dict=feed_dict)\n", | ||
28 | + " total_loss += loss\n", | ||
29 | + " total_time += time.time() - start\n", | ||
30 | + " summary = sess.run(valid_summary, feed_dict={is_training_pl: False})\n", | ||
31 | + " writer.add_summary(summary, step)\n", | ||
32 | + " print(colored('epoch %d step %d loss %.8f - time per batch %.4f' %\n", | ||
33 | + " (epoch, step, total_loss / num_eval_steps, total_time / num_eval_steps),\n", | ||
34 | + " 'grey', 'on_green'))\n", | ||
35 | + " total_time = 0\n", | ||
36 | + " if step % args.steps_per_visu == 0:\n", | ||
37 | + " all_pcds = sess.run(model.visualize_ops, feed_dict=feed_dict)\n", | ||
38 | + " for i in range(0, args.batch_size, args.visu_freq):\n", | ||
39 | + " plot_path = os.path.join(args.log_dir, 'plots',\n", | ||
40 | + " 'epoch_%d_step_%d_%s.png' % (epoch, step, ids[i]))\n", | ||
41 | + " pcds = [x[i] for x in all_pcds]\n", | ||
42 | + " plot_pcd_three_views(plot_path, pcds, model.visualize_titles)" | ||
43 | + ] | ||
44 | + }, | ||
45 | + { | ||
46 | + "cell_type": "code", | ||
47 | + "execution_count": 2, | ||
48 | + "metadata": {}, | ||
49 | + "outputs": [ | ||
50 | + { | ||
51 | + "name": "stdout", | ||
52 | + "output_type": "stream", | ||
53 | + "text": [ | ||
54 | + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/models/pcn_emd.py:21: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.\n", | ||
55 | + "\n", | ||
56 | + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/models/pcn_emd.py:21: The name tf.AUTO_REUSE is deprecated. Please use tf.compat.v1.AUTO_REUSE instead.\n", | ||
57 | + "\n", | ||
58 | + "WARNING:tensorflow:\n", | ||
59 | + "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n", | ||
60 | + "For more information, please see:\n", | ||
61 | + " * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n", | ||
62 | + " * https://github.com/tensorflow/addons\n", | ||
63 | + " * https://github.com/tensorflow/io (for I/O related ops)\n", | ||
64 | + "If you depend on functionality not listed there, please file an issue.\n", | ||
65 | + "\n", | ||
66 | + "WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1057: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n", | ||
67 | + "Instructions for updating:\n", | ||
68 | + "Please use `layer.__call__` method instead.\n", | ||
69 | + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/tf_util.py:71: The name tf.summary.scalar is deprecated. Please use tf.compat.v1.summary.scalar instead.\n", | ||
70 | + "\n", | ||
71 | + "WARNING:tensorflow:From /tf/tensorflow-tutorials/pcn_modify/pcn/tf_util.py:75: The name tf.metrics.mean is deprecated. Please use tf.compat.v1.metrics.mean instead.\n", | ||
72 | + "\n", | ||
73 | + "INFO:tensorflow:Restoring parameters from ./log/pcn_emd_car_modify/model-26000\n", | ||
74 | + "Average time: 0.049774\n", | ||
75 | + "Average Chamfer distance: 0.009361\n", | ||
76 | + "Average Earth mover distance: 0.051862\n", | ||
77 | + "Chamfer distance per category\n", | ||
78 | + "02958343 0.009361\n", | ||
79 | + "Earth mover distance per category\n", | ||
80 | + "02958343 0.051862\n" | ||
81 | + ] | ||
82 | + } | ||
83 | + ], | ||
84 | + "source": [ | ||
85 | + "# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018\n", | ||
86 | + "\n", | ||
87 | + "import argparse\n", | ||
88 | + "import csv\n", | ||
89 | + "import importlib\n", | ||
90 | + "import models\n", | ||
91 | + "import numpy as np\n", | ||
92 | + "import os\n", | ||
93 | + "import tensorflow as tf\n", | ||
94 | + "import time\n", | ||
95 | + "\n", | ||
96 | + "from tf_util import chamfer, earth_mover\n", | ||
97 | + "from visu_util import plot_pcd_three_views\n", | ||
98 | + "from data_util import lmdb_dataflow, get_queued_data, resample_pcd\n", | ||
99 | + "\n", | ||
100 | + "##################\n", | ||
101 | + "#from io_util import read_pcd, save_pcd 이부분 그냥 포함시켜버렸다...ㅎㅎ\n", | ||
102 | + "import numpy as np\n", | ||
103 | + "#from open3d import *\n", | ||
104 | + "import open3d as o3d\n", | ||
105 | + "\n", | ||
106 | + "\n", | ||
107 | + "def read_pcd(filename):\n", | ||
108 | + " pcd = o3d.io.read_point_cloud(filename)\n", | ||
109 | + " return np.array(pcd.points)\n", | ||
110 | + "\n", | ||
111 | + "\n", | ||
112 | + "def save_pcd(filename, points):\n", | ||
113 | + " pcd = o3d.geometry.PointCloud()\n", | ||
114 | + " pcd.points = o3d.utility.Vector3dVector(points)\n", | ||
115 | + " o3d.io.write_point_cloud(filename, pcd)\n", | ||
116 | + "##################\n", | ||
117 | + "\n", | ||
118 | + "list_path ='../../data/shapenet/car_test.list' \n", | ||
119 | + "data_dir = '../../data/shapenet/test'\n", | ||
120 | + "model_type = 'pcn_emd'\n", | ||
121 | + "checkpoint = './log/pcn_emd_car_modify'\n", | ||
122 | + "results_dir ='results/shapenet_pcn_emd_car_modify'\n", | ||
123 | + "num_gt_points = 16384\n", | ||
124 | + "plot_freq = 1\n", | ||
125 | + "_save_pcd = True\n", | ||
126 | + "lmdb_valid = ''\n", | ||
127 | + "num_input_points=3000\n", | ||
128 | + "\n", | ||
129 | + "def test():\n", | ||
130 | + " inputs = tf.placeholder(tf.float32, (1, None, 3))\n", | ||
131 | + " my_inputs = tf.placeholder(tf.float32, (1, None, 3))\n", | ||
132 | + " npts = tf.placeholder(tf.int32, (1,))\n", | ||
133 | + " gt = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n", | ||
134 | + " model_module = importlib.import_module('.%s' % model_type, 'models')\n", | ||
135 | + " model = model_module.Model(inputs,my_inputs, npts, gt, tf.constant(1.0))\n", | ||
136 | + "\n", | ||
137 | + " output = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n", | ||
138 | + " cd_op = chamfer(output, gt)\n", | ||
139 | + " emd_op = earth_mover(output, gt)\n", | ||
140 | + "\n", | ||
141 | + " #####\n", | ||
142 | + " df_valid, num_valid = lmdb_dataflow(\n", | ||
143 | + " lmdb_valid, 1, num_input_points, num_gt_points, is_training=False)\n", | ||
144 | + " valid_gen = df_valid.get_data()\n", | ||
145 | + " #####\n", | ||
146 | + " \n", | ||
147 | + " config = tf.ConfigProto()\n", | ||
148 | + " config.gpu_options.allow_growth = True\n", | ||
149 | + " config.allow_soft_placement = True\n", | ||
150 | + " sess = tf.Session(config=config)\n", | ||
151 | + "\n", | ||
152 | + " saver = tf.train.Saver()\n", | ||
153 | + " saver.restore(sess, tf.train.latest_checkpoint(checkpoint))\n", | ||
154 | + "\n", | ||
155 | + " os.makedirs(results_dir, exist_ok=True)\n", | ||
156 | + " csv_file = open(os.path.join(results_dir, 'results.csv'), 'w') # 각 항목별로 cd, emd 구해줌.\n", | ||
157 | + " writer = csv.writer(csv_file)\n", | ||
158 | + " writer.writerow(['id', 'cd', 'emd'])\n", | ||
159 | + "\n", | ||
160 | + " with open(list_path) as file:\n", | ||
161 | + " model_list = file.read().splitlines()\n", | ||
162 | + " total_time = 0\n", | ||
163 | + " total_cd = 0\n", | ||
164 | + " total_emd = 0\n", | ||
165 | + " cd_per_cat = {}\n", | ||
166 | + " emd_per_cat = {}\n", | ||
167 | + " for i, model_id in enumerate(model_list):\n", | ||
168 | + " partial = read_pcd(os.path.join(data_dir, 'partial', '%s.pcd' % model_id))\n", | ||
169 | + " complete = read_pcd(os.path.join(data_dir, 'complete', '%s.pcd' % model_id))\n", | ||
170 | + " start = time.time()\n", | ||
171 | + " completion = sess.run(model.outputs, feed_dict={inputs: [partial],my_inputs:[partial], npts: [partial.shape[0]]})\n", | ||
172 | + " total_time += time.time() - start\n", | ||
173 | + " cd, emd = sess.run([cd_op, emd_op], feed_dict={output: completion, gt: [complete]})\n", | ||
174 | + " total_cd += cd\n", | ||
175 | + " total_emd += emd\n", | ||
176 | + " writer.writerow([model_id, cd, emd]) #항목별 cd,emd 써줌\n", | ||
177 | + "\n", | ||
178 | + " # 카테고리별 cd,emd 얻음\n", | ||
179 | + " synset_id, model_id = model_id.split('/')\n", | ||
180 | + " if not cd_per_cat.get(synset_id):\n", | ||
181 | + " cd_per_cat[synset_id] = []\n", | ||
182 | + " if not emd_per_cat.get(synset_id):\n", | ||
183 | + " emd_per_cat[synset_id] = []\n", | ||
184 | + " cd_per_cat[synset_id].append(cd)\n", | ||
185 | + " emd_per_cat[synset_id].append(emd)\n", | ||
186 | + " \n", | ||
187 | + " # 3가지 view에서 모델 input,gt,output보여줌.\n", | ||
188 | + " if i % plot_freq == 0:\n", | ||
189 | + " os.makedirs(os.path.join(results_dir, 'plots', synset_id), exist_ok=True)\n", | ||
190 | + " plot_path = os.path.join(results_dir, 'plots', synset_id, '%s.png' % model_id)\n", | ||
191 | + " plot_pcd_three_views(plot_path, [partial, completion[0], complete],\n", | ||
192 | + " ['input', 'output', 'ground truth'],\n", | ||
193 | + " 'CD %.4f EMD %.4f' % (cd, emd),\n", | ||
194 | + " [5, 0.5, 0.5])\n", | ||
195 | + " if _save_pcd:\n", | ||
196 | + " os.makedirs(os.path.join(results_dir, 'pcds', synset_id), exist_ok=True)\n", | ||
197 | + " save_pcd(os.path.join(results_dir, 'pcds', '%s.pcd' % model_id), completion[0])\n", | ||
198 | + " csv_file.close()\n", | ||
199 | + " sess.close()\n", | ||
200 | + "\n", | ||
201 | + " print('Average time: %f' % (total_time / len(model_list)))\n", | ||
202 | + " print('Average Chamfer distance: %f' % (total_cd / len(model_list)))\n", | ||
203 | + " print('Average Earth mover distance: %f' % (total_emd / len(model_list)))\n", | ||
204 | + " print('Chamfer distance per category')\n", | ||
205 | + " for synset_id in cd_per_cat.keys():\n", | ||
206 | + " print(synset_id, '%f' % np.mean(cd_per_cat[synset_id]))\n", | ||
207 | + " print('Earth mover distance per category')\n", | ||
208 | + " for synset_id in emd_per_cat.keys():\n", | ||
209 | + " print(synset_id, '%f' % np.mean(emd_per_cat[synset_id]))\n", | ||
210 | + "'''\n", | ||
211 | + "\n", | ||
212 | + "if __name__ == '__main__':\n", | ||
213 | + " parser = argparse.ArgumentParser()\n", | ||
214 | + " parser.add_argument('--list_path', default='data/shapenet/test.list')\n", | ||
215 | + " parser.add_argument('--data_dir', default='data/shapenet/test')\n", | ||
216 | + " parser.add_argument('--model_type', default='pcn_emd')\n", | ||
217 | + " parser.add_argument('--checkpoint', default='data/trained_models/pcn_emd')\n", | ||
218 | + " parser.add_argument('--results_dir', default='results/shapenet_pcn_emd')\n", | ||
219 | + " parser.add_argument('--num_gt_points', type=int, default=16384)\n", | ||
220 | + " parser.add_argument('--plot_freq', type=int, default=100)\n", | ||
221 | + " parser.add_argument('--save_pcd', action='store_true')\n", | ||
222 | + " args = parser.parse_args()\n", | ||
223 | + "\n", | ||
224 | + " test(args)\n", | ||
225 | + "'''\n", | ||
226 | + "test()" | ||
227 | + ] | ||
228 | + }, | ||
229 | + { | ||
230 | + "cell_type": "code", | ||
231 | + "execution_count": 9, | ||
232 | + "metadata": {}, | ||
233 | + "outputs": [ | ||
234 | + { | ||
235 | + "ename": "TypeError", | ||
236 | + "evalue": "read_point_cloud(): incompatible function arguments. The following argument types are supported:\n 1. (filename: str, format: str = 'auto', remove_nan_points: bool = True, remove_infinite_points: bool = True, print_progress: bool = False) -> open3d.open3d.geometry.PointCloud\n\nInvoked with: ", | ||
237 | + "output_type": "error", | ||
238 | + "traceback": [ | ||
239 | + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
240 | + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", | ||
241 | + "\u001b[0;32m<ipython-input-9-77d1fb1ef881>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mopen3d\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_point_cloud\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | ||
242 | + "\u001b[0;31mTypeError\u001b[0m: read_point_cloud(): incompatible function arguments. The following argument types are supported:\n 1. (filename: str, format: str = 'auto', remove_nan_points: bool = True, remove_infinite_points: bool = True, print_progress: bool = False) -> open3d.open3d.geometry.PointCloud\n\nInvoked with: " | ||
243 | + ] | ||
244 | + } | ||
245 | + ], | ||
246 | + "source": [ | ||
247 | + "from open3d import *\n", | ||
248 | + "io.read_point_cloud()" | ||
249 | + ] | ||
250 | + } | ||
251 | + ], | ||
252 | + "metadata": { | ||
253 | + "kernelspec": { | ||
254 | + "display_name": "Python 3", | ||
255 | + "language": "python", | ||
256 | + "name": "python3" | ||
257 | + }, | ||
258 | + "language_info": { | ||
259 | + "codemirror_mode": { | ||
260 | + "name": "ipython", | ||
261 | + "version": 3 | ||
262 | + }, | ||
263 | + "file_extension": ".py", | ||
264 | + "mimetype": "text/x-python", | ||
265 | + "name": "python", | ||
266 | + "nbconvert_exporter": "python", | ||
267 | + "pygments_lexer": "ipython3", | ||
268 | + "version": "3.6.9" | ||
269 | + } | ||
270 | + }, | ||
271 | + "nbformat": 4, | ||
272 | + "nbformat_minor": 4 | ||
273 | +} |
code/pcn_modify/pcn/test_with_lmdb.ipynb
0 → 100644
1 | +{ | ||
2 | + "cells": [ | ||
3 | + { | ||
4 | + "cell_type": "code", | ||
5 | + "execution_count": 15, | ||
6 | + "metadata": {}, | ||
7 | + "outputs": [ | ||
8 | + { | ||
9 | + "name": "stdout", | ||
10 | + "output_type": "stream", | ||
11 | + "text": [ | ||
12 | + "\u001b[32m[0603 12:20:31 @format.py:92]\u001b[0m Found 100 entries in ../../../data/shapenet-car/valid.lmdb\n", | ||
13 | + "INFO:tensorflow:Restoring parameters from ./log/pcn_addbeta_lr/model-81500\n", | ||
14 | + "Average Chamfer distance: 0.009029\n", | ||
15 | + "Average Earth mover distance: 0.053403\n" | ||
16 | + ] | ||
17 | + } | ||
18 | + ], | ||
19 | + "source": [ | ||
20 | + "# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018\n", | ||
21 | + "\n", | ||
22 | + "import argparse\n", | ||
23 | + "import csv\n", | ||
24 | + "import importlib\n", | ||
25 | + "import models\n", | ||
26 | + "import numpy as np\n", | ||
27 | + "import os\n", | ||
28 | + "import tensorflow as tf\n", | ||
29 | + "import time\n", | ||
30 | + "\n", | ||
31 | + "from tf_util import chamfer, earth_mover\n", | ||
32 | + "from visu_util import plot_pcd_three_views\n", | ||
33 | + "from data_util import lmdb_dataflow, get_queued_data\n", | ||
34 | + "\n", | ||
35 | + "##################\n", | ||
36 | + "#from io_util import read_pcd, save_pcd 이부분 그냥 포함시켜버렸다...ㅎㅎ\n", | ||
37 | + "import numpy as np\n", | ||
38 | + "#from open3d import *\n", | ||
39 | + "import open3d as o3d\n", | ||
40 | + "\n", | ||
41 | + "\n", | ||
42 | + "def read_pcd(filename):\n", | ||
43 | + " pcd = o3d.io.read_point_cloud(filename)\n", | ||
44 | + " return np.array(pcd.points)\n", | ||
45 | + "\n", | ||
46 | + "\n", | ||
47 | + "def save_pcd(filename, points):\n", | ||
48 | + " pcd = o3d.geometry.PointCloud()\n", | ||
49 | + " pcd.points = o3d.utility.Vector3dVector(points)\n", | ||
50 | + " o3d.io.write_point_cloud(filename, pcd)\n", | ||
51 | + "##################\n", | ||
52 | + "\n", | ||
53 | + "#list_path ='../../data/shapenet/car_test.list' \n", | ||
54 | + "#data_dir = '../../data/shapenet/test'\n", | ||
55 | + "model_type = 'pcn_emd'\n", | ||
56 | + "checkpoint = './log/pcn_addbeta_lr'\n", | ||
57 | + "results_dir ='results/pcn_addbeta_lr'\n", | ||
58 | + "num_gt_points = 16384\n", | ||
59 | + "plot_freq = 1\n", | ||
60 | + "_save_pcd = True\n", | ||
61 | + "lmdb_valid='../../../data/shapenet-car/valid.lmdb'\n", | ||
62 | + "num_input_points=3000\n", | ||
63 | + "\n", | ||
64 | + "def test():\n", | ||
65 | + " inputs = tf.placeholder(tf.float32, (1, None, 3))\n", | ||
66 | + " my_inputs = tf.placeholder(tf.float32, (1, None, 3))\n", | ||
67 | + " npts = tf.placeholder(tf.int32, (1,))\n", | ||
68 | + " gt = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n", | ||
69 | + " model_module = importlib.import_module('.%s' % model_type, 'models')\n", | ||
70 | + " model = model_module.Model(inputs,my_inputs, npts, gt, tf.constant(1.0),tf.constant(1.0))\n", | ||
71 | + "\n", | ||
72 | + " output = tf.placeholder(tf.float32, (1, num_gt_points, 3))\n", | ||
73 | + " cd_op = chamfer(output, gt)\n", | ||
74 | + " emd_op = earth_mover(output, gt)\n", | ||
75 | + "\n", | ||
76 | + " ###\n", | ||
77 | + " df_valid, num_valid = lmdb_dataflow(\n", | ||
78 | + " lmdb_valid, 1, num_input_points, num_gt_points, is_training=False)\n", | ||
79 | + " valid_gen = df_valid.get_data()\n", | ||
80 | + " \n", | ||
81 | + " config = tf.ConfigProto()\n", | ||
82 | + " config.gpu_options.allow_growth = True\n", | ||
83 | + " config.allow_soft_placement = True\n", | ||
84 | + " sess = tf.Session(config=config)\n", | ||
85 | + "\n", | ||
86 | + " saver = tf.train.Saver()\n", | ||
87 | + " saver.restore(sess, tf.train.latest_checkpoint(checkpoint))\n", | ||
88 | + "\n", | ||
89 | + " os.makedirs(results_dir, exist_ok=True)\n", | ||
90 | + " csv_file = open(os.path.join(results_dir, 'results.csv'), 'w') # 각 항목별로 cd, emd 구해줌.\n", | ||
91 | + " writer = csv.writer(csv_file)\n", | ||
92 | + " writer.writerow(['id', 'cd', 'emd'])\n", | ||
93 | + "\n", | ||
94 | + " ###\n", | ||
95 | + " total_time = 0\n", | ||
96 | + " total_cd = 0\n", | ||
97 | + " total_emd = 0\n", | ||
98 | + " for i in range(num_valid):\n", | ||
99 | + " ids,iinputs,inpts,igt = next(valid_gen)\n", | ||
100 | + "\n", | ||
101 | + " completion = sess.run(model.outputs, feed_dict={inputs:iinputs, my_inputs:iinputs, npts:inpts})\n", | ||
102 | + " cd,emd = sess.run([cd_op,emd_op],feed_dict={output: completion, gt:igt})\n", | ||
103 | + " total_cd +=cd\n", | ||
104 | + " total_emd +=emd\n", | ||
105 | + " writer.writerow([ids,cd,emd]) #항목별 cd,emd\n", | ||
106 | + " \n", | ||
107 | + " if i % plot_freq == 0:\n", | ||
108 | + " os.makedirs(os.path.join(results_dir, 'plots'), exist_ok=True)\n", | ||
109 | + " plot_path = os.path.join(results_dir, 'plots', '%s.png' % ids)\n", | ||
110 | + "# print(iinputs.shape,completion[0].shape,igt.shape)###\n", | ||
111 | + "\n", | ||
112 | + " plot_pcd_three_views(plot_path, [iinputs.reshape(iinputs.shape[1],iinputs.shape[2]), completion[0], igt.reshape(igt.shape[1],igt.shape[2])],\n", | ||
113 | + " ['input', 'output', 'ground truth'],\n", | ||
114 | + " 'CD %.4f EMD %.4f' % (cd, emd),\n", | ||
115 | + " [5, 0.5, 0.5])\n", | ||
116 | + " if _save_pcd:\n", | ||
117 | + " os.makedirs(os.path.join(results_dir, 'pcds'), exist_ok=True)\n", | ||
118 | + " save_pcd(os.path.join(results_dir, 'pcds', '%s.pcd' % ids), completion[0])\n", | ||
119 | + " csv_file.close()\n", | ||
120 | + " sess.close()\n", | ||
121 | + "\n", | ||
122 | + " print('Average Chamfer distance: %f' % (total_cd / num_valid))\n", | ||
123 | + " print('Average Earth mover distance: %f' % (total_emd / num_valid))\n", | ||
124 | + "######################\n", | ||
125 | + "''' \n", | ||
126 | + " with open(list_path) as file:\n", | ||
127 | + " model_list = file.read().splitlines()\n", | ||
128 | + " total_time = 0\n", | ||
129 | + " total_cd = 0\n", | ||
130 | + " total_emd = 0\n", | ||
131 | + " cd_per_cat = {}\n", | ||
132 | + " emd_per_cat = {}\n", | ||
133 | + " for i, model_id in enumerate(model_list):\n", | ||
134 | + " partial = read_pcd(os.path.join(data_dir, 'partial', '%s.pcd' % model_id))\n", | ||
135 | + " complete = read_pcd(os.path.join(data_dir, 'complete', '%s.pcd' % model_id))\n", | ||
136 | + " start = time.time()\n", | ||
137 | + " completion = sess.run(model.outputs, feed_dict={inputs: [partial],my_inputs:[partial], npts: [partial.shape[0]]})\n", | ||
138 | + " total_time += time.time() - start\n", | ||
139 | + " cd, emd = sess.run([cd_op, emd_op], feed_dict={output: completion, gt: [complete]})\n", | ||
140 | + " total_cd += cd\n", | ||
141 | + " total_emd += emd\n", | ||
142 | + " writer.writerow([model_id, cd, emd]) #항목별 cd,emd 써줌\n", | ||
143 | + "\n", | ||
144 | + " # 카테고리별 cd,emd 얻음\n", | ||
145 | + " synset_id, model_id = model_id.split('/')\n", | ||
146 | + " if not cd_per_cat.get(synset_id):\n", | ||
147 | + " cd_per_cat[synset_id] = []\n", | ||
148 | + " if not emd_per_cat.get(synset_id):\n", | ||
149 | + " emd_per_cat[synset_id] = []\n", | ||
150 | + " cd_per_cat[synset_id].append(cd)\n", | ||
151 | + " emd_per_cat[synset_id].append(emd)\n", | ||
152 | + " \n", | ||
153 | + " # 3가지 view에서 모델 input,gt,output보여줌.\n", | ||
154 | + " if i % plot_freq == 0:\n", | ||
155 | + " os.makedirs(os.path.join(results_dir, 'plots', synset_id), exist_ok=True)\n", | ||
156 | + " plot_path = os.path.join(results_dir, 'plots', synset_id, '%s.png' % model_id)\n", | ||
157 | + " plot_pcd_three_views(plot_path, [partial, completion[0], complete],\n", | ||
158 | + " ['input', 'output', 'ground truth'],\n", | ||
159 | + " 'CD %.4f EMD %.4f' % (cd, emd),\n", | ||
160 | + " [5, 0.5, 0.5])\n", | ||
161 | + " if _save_pcd:\n", | ||
162 | + " os.makedirs(os.path.join(results_dir, 'pcds', synset_id), exist_ok=True)\n", | ||
163 | + " save_pcd(os.path.join(results_dir, 'pcds', '%s.pcd' % model_id), completion[0])\n", | ||
164 | + " csv_file.close()\n", | ||
165 | + " sess.close()\n", | ||
166 | + "\n", | ||
167 | + " print('Average time: %f' % (total_time / len(model_list)))\n", | ||
168 | + " print('Average Chamfer distance: %f' % (total_cd / len(model_list)))\n", | ||
169 | + " print('Average Earth mover distance: %f' % (total_emd / len(model_list)))\n", | ||
170 | + " print('Chamfer distance per category')\n", | ||
171 | + " for synset_id in cd_per_cat.keys():\n", | ||
172 | + " print(synset_id, '%f' % np.mean(cd_per_cat[synset_id]))\n", | ||
173 | + " print('Earth mover distance per category')\n", | ||
174 | + " for synset_id in emd_per_cat.keys():\n", | ||
175 | + " print(synset_id, '%f' % np.mean(emd_per_cat[synset_id]))\n", | ||
176 | + " '''\n", | ||
177 | + "'''\n", | ||
178 | + "\n", | ||
179 | + "if __name__ == '__main__':\n", | ||
180 | + " parser = argparse.ArgumentParser()\n", | ||
181 | + " parser.add_argument('--list_path', default='data/shapenet/test.list')\n", | ||
182 | + " parser.add_argument('--data_dir', default='data/shapenet/test')\n", | ||
183 | + " parser.add_argument('--model_type', default='pcn_emd')\n", | ||
184 | + " parser.add_argument('--checkpoint', default='data/trained_models/pcn_emd')\n", | ||
185 | + " parser.add_argument('--results_dir', default='results/shapenet_pcn_emd')\n", | ||
186 | + " parser.add_argument('--num_gt_points', type=int, default=16384)\n", | ||
187 | + " parser.add_argument('--plot_freq', type=int, default=100)\n", | ||
188 | + " parser.add_argument('--save_pcd', action='store_true')\n", | ||
189 | + " args = parser.parse_args()\n", | ||
190 | + "\n", | ||
191 | + " test(args)\n", | ||
192 | + "'''\n", | ||
193 | + "test()" | ||
194 | + ] | ||
195 | + }, | ||
196 | + { | ||
197 | + "cell_type": "code", | ||
198 | + "execution_count": null, | ||
199 | + "metadata": {}, | ||
200 | + "outputs": [], | ||
201 | + "source": [] | ||
202 | + } | ||
203 | + ], | ||
204 | + "metadata": { | ||
205 | + "kernelspec": { | ||
206 | + "display_name": "Python 3", | ||
207 | + "language": "python", | ||
208 | + "name": "python3" | ||
209 | + }, | ||
210 | + "language_info": { | ||
211 | + "codemirror_mode": { | ||
212 | + "name": "ipython", | ||
213 | + "version": 3 | ||
214 | + }, | ||
215 | + "file_extension": ".py", | ||
216 | + "mimetype": "text/x-python", | ||
217 | + "name": "python", | ||
218 | + "nbconvert_exporter": "python", | ||
219 | + "pygments_lexer": "ipython3", | ||
220 | + "version": "3.6.9" | ||
221 | + } | ||
222 | + }, | ||
223 | + "nbformat": 4, | ||
224 | + "nbformat_minor": 4 | ||
225 | +} |
code/pcn_modify/pcn/tf_util.py
0 → 100755
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +import tensorflow as tf | ||
4 | +from pc_distance import tf_nndistance, tf_approxmatch | ||
5 | + | ||
6 | + | ||
7 | +def mlp(features, layer_dims, bn=None, bn_params=None): | ||
8 | + for i, num_outputs in enumerate(layer_dims[:-1]): | ||
9 | + features = tf.contrib.layers.fully_connected( | ||
10 | + features, num_outputs, | ||
11 | + normalizer_fn=bn, | ||
12 | + normalizer_params=bn_params, | ||
13 | + scope='fc_%d' % i) | ||
14 | + outputs = tf.contrib.layers.fully_connected( | ||
15 | + features, layer_dims[-1], | ||
16 | + activation_fn=None, | ||
17 | + scope='fc_%d' % (len(layer_dims) - 1)) | ||
18 | + return outputs | ||
19 | + | ||
20 | + | ||
21 | +def mlp_conv(inputs, layer_dims, bn=None, bn_params=None): | ||
22 | + for i, num_out_channel in enumerate(layer_dims[:-1]): | ||
23 | + inputs = tf.contrib.layers.conv1d( | ||
24 | + inputs, num_out_channel, | ||
25 | + kernel_size=1, | ||
26 | + normalizer_fn=bn, | ||
27 | + normalizer_params=bn_params, | ||
28 | + scope='conv_%d' % i) | ||
29 | + outputs = tf.contrib.layers.conv1d( | ||
30 | + inputs, layer_dims[-1], | ||
31 | + kernel_size=1, | ||
32 | + activation_fn=None, | ||
33 | + scope='conv_%d' % (len(layer_dims) - 1)) | ||
34 | + return outputs | ||
35 | + | ||
36 | + | ||
37 | +def point_maxpool(inputs, npts, keepdims=False): | ||
38 | + outputs = [tf.reduce_max(f, axis=1, keepdims=keepdims) | ||
39 | + for f in tf.split(inputs, npts, axis=1)] | ||
40 | + return tf.concat(outputs, axis=0) | ||
41 | + | ||
42 | + | ||
43 | +def point_unpool(inputs, npts): | ||
44 | + inputs = tf.split(inputs, inputs.shape[0], axis=0) | ||
45 | + outputs = [tf.tile(f, [1, npts[i], 1]) for i,f in enumerate(inputs)] | ||
46 | + return tf.concat(outputs, axis=1) | ||
47 | + | ||
48 | + | ||
49 | +def chamfer(pcd1, pcd2): | ||
50 | + dist1, _, dist2, _ = tf_nndistance.nn_distance(pcd1, pcd2) | ||
51 | + dist1 = tf.reduce_mean(tf.sqrt(dist1)) | ||
52 | + dist2 = tf.reduce_mean(tf.sqrt(dist2)) | ||
53 | + return (dist1 + dist2) / 2 | ||
54 | + | ||
55 | +def my_chamfer(pcd1, pcd2): | ||
56 | + dist1, _, dist2, _ = tf_nndistance.nn_distance(pcd1, pcd2) | ||
57 | + dist1 = tf.reduce_mean(tf.sqrt(dist1)) | ||
58 | + #dist2 = tf.reduce_mean(tf.sqrt(dist2)) | ||
59 | + return dist1 | ||
60 | + | ||
61 | + | ||
62 | +def earth_mover(pcd1, pcd2): | ||
63 | + assert pcd1.shape[1] == pcd2.shape[1] | ||
64 | + num_points = tf.cast(pcd1.shape[1], tf.float32) | ||
65 | + match = tf_approxmatch.approx_match(pcd1, pcd2) | ||
66 | + cost = tf_approxmatch.match_cost(pcd1, pcd2, match) | ||
67 | + return tf.reduce_mean(cost / num_points) | ||
68 | + | ||
69 | + | ||
70 | +def add_train_summary(name, value): | ||
71 | + tf.summary.scalar(name, value, collections=['train_summary']) | ||
72 | + | ||
73 | + | ||
74 | +def add_valid_summary(name, value): | ||
75 | + avg, update = tf.metrics.mean(value) | ||
76 | + tf.summary.scalar(name, avg, collections=['valid_summary']) | ||
77 | + return update |
code/pcn_modify/pcn/visu_util.py
0 → 100644
1 | +# Author: Wentao Yuan (wyuan1@cs.cmu.edu) 05/31/2018 | ||
2 | + | ||
3 | +from matplotlib import pyplot as plt | ||
4 | +from mpl_toolkits.mplot3d import Axes3D | ||
5 | + | ||
6 | + | ||
7 | +def plot_pcd_three_views(filename, pcds, titles, suptitle='', sizes=None, cmap='Reds', zdir='y', | ||
8 | + xlim=(-0.3, 0.3), ylim=(-0.3, 0.3), zlim=(-0.3, 0.3)): | ||
9 | + if sizes is None: | ||
10 | + sizes = [0.5 for i in range(len(pcds))] | ||
11 | + fig = plt.figure(figsize=(len(pcds) * 3, 9)) | ||
12 | + for i in range(3): | ||
13 | + elev = 30 | ||
14 | + azim = -45 + 90 * i | ||
15 | + for j, (pcd, size) in enumerate(zip(pcds, sizes)): | ||
16 | + color = pcd[:, 0] | ||
17 | + ax = fig.add_subplot(3, len(pcds), i * len(pcds) + j + 1, projection='3d') | ||
18 | + ax.view_init(elev, azim) | ||
19 | + ax.scatter(pcd[:, 0], pcd[:, 1], pcd[:, 2], zdir=zdir, c=color, s=size, cmap=cmap, vmin=-1, vmax=0.5) | ||
20 | + ax.set_title(titles[j]) | ||
21 | + ax.set_axis_off() | ||
22 | + ax.set_xlim(xlim) | ||
23 | + ax.set_ylim(ylim) | ||
24 | + ax.set_zlim(zlim) | ||
25 | + plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.9, wspace=0.1, hspace=0.1) | ||
26 | + plt.suptitle(suptitle) | ||
27 | + fig.savefig(filename) | ||
28 | + plt.close(fig) |
-
Please register or login to post a comment