장재혁

add server

1 +from flask import Flask, request, send_file
2 +from extract_feature4 import extract
3 +from verification4_merge import load_model, load_enroll_embeddings,perform_verification
4 +from identification4 import perform_identification
5 +from enroll4_merge import split_enroll_and_test,enroll_per_spk
6 +import os
7 +import shutil
8 +
9 +app = Flask(__name__)
10 +log_dir = '../new_model4_merge' # Where the checkpoints are saved
11 +embedding_dir = '../enroll_embeddings4_merge' # Where embeddings are saved
12 +test_dir = '../feat_logfbank_nfilt40/test/' # Where test features are saved
13 +
14 + # Settings
15 +use_cuda = True # Use cuda or not
16 +embedding_size = 128 # Dimension of speaker embeddings
17 +cp_num = 50 # Which checkpoint to use?
18 +n_classes = 348 # How many speakers in training data?
19 +test_frames = 100 # Split the test utterance
20 +
21 +model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes)
22 +embeddings = load_enroll_embeddings(embedding_dir)
23 +test_path = './test.p'
24 +spk_list = ['103F3021', '207F2088', '213F5100', '217F3038', '225M4062',\
25 + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063']
26 +
27 +
28 +def enrollment():
29 + try:
30 + global embeddings
31 + enroll_DB, test_DB = split_enroll_and_test(test_dir)
32 + enroll_per_spk(use_cuda, test_frames, model, enroll_DB, embedding_dir)
33 + embeddings = load_enroll_embeddings(embedding_dir)
34 +
35 + except Exception as e:
36 + print(e)
37 +
38 +def verification(enroll_speaker):
39 + test_speaker = 'TEST_SPEAKER'
40 + thres = 0.95
41 + # Perform the test
42 + return perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres)
43 +
44 +def identification():
45 + best_spk = perform_identification(use_cuda, model, embeddings, test_path, test_frames, spk_list)
46 + return best_spk
47 +
48 +
49 +
50 +
51 +@app.route('/enroll', methods=['POST', "GET"])
52 +def enroll_controller():
53 + if request.method == 'POST':
54 + f = request.files['file']
55 + enroll_speaker = request.form['enroll_speaker']
56 + print(f.name)
57 + f.save('./myrequest_enroll.wav')
58 + extract('./myrequest_enroll.wav',enroll_speaker)
59 + new_path = '../feat_logfbank_nfilt40/test/'+enroll_speaker+'/'
60 + os.mkdir(new_path)
61 + shutil.move('./enroll.p',new_path+'enroll.p')
62 +
63 + try:
64 + enrollment()
65 + spk_list.append(enroll_speaker)
66 + return 'enroll_complete'
67 + except:
68 + return 'failed'
69 +
70 +
71 + #return 'post'
72 + return 'get'
73 +
74 +
75 +
76 +@app.route('/verification', methods=['POST', "GET"])
77 +def verfication_controller():
78 + if request.method == 'POST':
79 + f = request.files['file']
80 + enroll_speaker = request.form['enroll_speaker']
81 + print(f.name)
82 + f.save('./myrequest.wav')
83 + extract('./myrequest.wav')
84 + speak, score = verification(enroll_speaker)
85 + return score
86 +
87 + #return 'post'
88 + return 'get'
89 +
90 +@app.route('/identification', methods=['POST', "GET"])
91 +def identification_controller():
92 + if request.method == 'POST':
93 + f = request.files['file']
94 + print(f.name)
95 + f.save('./myrequest.wav')
96 + extract('./myrequest.wav')
97 + best_spk = identification()
98 + return best_spk
99 +
100 + #return 'post'
101 + return 'get'
102 +
103 +@app.route('/debugger', methods=['GET'])
104 +def debugger():
105 + return anything
106 +
107 +@app.route('/robots.txt',methods=['GET'])
108 +def antirobot():
109 + return send_file('robots.txt')
110 +
111 +if __name__ == '__main__':
112 + app.run(host='0.0.0.0', port="7777", debug=True)
113 +
1 +import torch
2 +import torch.nn.functional as F
3 +from torch.autograd import Variable
4 +
5 +import pandas as pd
6 +import math
7 +import os
8 +import sys
9 +sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
10 +
11 +
12 +import configure as c
13 +
14 +from DB_wav_reader import read_feats_structure
15 +from SR_Dataset import read_MFB, ToTensorTestInput
16 +from model.model4 import background_resnet
17 +
18 +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes):
19 + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes)
20 + if use_cuda:
21 + model.cuda()
22 + print('=> loading checkpoint')
23 + # original saved file with DataParallel
24 + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth')
25 + # create new OrderedDict that does not contain `module.`
26 + model.load_state_dict(checkpoint['state_dict'])
27 + model.eval()
28 + return model
29 +
30 +def split_enroll_and_test(dataroot_dir):
31 + DB_all = read_feats_structure(dataroot_dir)
32 + enroll_DB = pd.DataFrame()
33 + test_DB = pd.DataFrame()
34 +
35 + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')]
36 + test_DB = DB_all[DB_all['filename'].str.contains('test.p')]
37 +
38 + # Reset the index
39 + enroll_DB = enroll_DB.reset_index(drop=True)
40 + test_DB = test_DB.reset_index(drop=True)
41 + return enroll_DB, test_DB
42 +
43 +def get_embeddings(use_cuda, filename, model, test_frames):
44 + input, label = read_MFB(filename) # input size:(n_frames, n_dims)
45 +
46 + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames'
47 + activation = 0
48 + with torch.no_grad():
49 + for i in range(tot_segments):
50 + temp_input = input[i*test_frames:i*test_frames+test_frames]
51 +
52 + TT = ToTensorTestInput()
53 + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames)
54 +
55 + if use_cuda:
56 + temp_input = temp_input.cuda()
57 + temp_activation,_ = model(temp_input)
58 + activation += torch.sum(temp_activation, dim=0, keepdim=True)
59 +
60 + activation = l2_norm(activation, 1)
61 +
62 + return activation
63 +
64 +def l2_norm(input, alpha):
65 + input_size = input.size() # size:(n_frames, dim)
66 + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim)
67 + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames)
68 + norm = torch.sqrt(normp) # size:(n_frames)
69 + _output = torch.div(input, norm.view(-1, 1).expand_as(input))
70 + output = _output.view(input_size)
71 + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf
72 + output = output * alpha
73 + return output
74 +
75 +def enroll_per_spk(use_cuda, test_frames, model, DB, embedding_dir):
76 + """
77 + Output the averaged d-vector for each speaker (enrollment)
78 + Return the dictionary (length of n_spk)
79 + """
80 + n_files = len(DB) # 10
81 + enroll_speaker_list = sorted(set(DB['speaker_id']))
82 +
83 + embeddings = {}
84 +
85 + # Aggregates all the activations
86 + print("Start to aggregate all the d-vectors per enroll speaker")
87 +
88 + for i in range(n_files):
89 + filename = DB['filename'][i]
90 + spk = DB['speaker_id'][i]
91 +
92 + activation = get_embeddings(use_cuda, filename, model, test_frames)
93 + if spk in embeddings:
94 + embeddings[spk] += activation
95 + else:
96 + embeddings[spk] = activation
97 +
98 + print("Aggregates the activation (spk : %s)" % (spk))
99 +
100 + if not os.path.exists(embedding_dir):
101 + os.makedirs(embedding_dir)
102 +
103 + # Save the embeddings
104 + for spk_index in enroll_speaker_list:
105 + embedding_path = os.path.join(embedding_dir, spk_index+'.pth')
106 + torch.save(embeddings[spk_index], embedding_path)
107 + print("Save the embeddings for %s" % (spk_index))
108 + return embeddings
109 +
110 +def main():
111 +
112 + # Settings
113 + use_cuda = True
114 + log_dir = 'new_model4_merge'
115 + embedding_size = 128
116 + cp_num = 50 # Which checkpoint to use?
117 + n_classes = 348
118 + test_frames = 200
119 +
120 + # Load model from checkpoint
121 + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes)
122 +
123 + # Get the dataframe for enroll DB
124 + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR)
125 +
126 + # Where to save embeddings
127 + embedding_dir = 'enroll_embeddings4_merge'
128 +
129 + # Perform the enrollment and save the results
130 + enroll_per_spk(use_cuda, test_frames, model, enroll_DB, embedding_dir)
131 +
132 + """ Test speaker list
133 + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062',
134 + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063'
135 + """
136 +
137 +if __name__ == '__main__':
138 + main()
1 +import librosa
2 +import numpy as np
3 +from python_speech_features import fbank
4 +import pickle
5 +sample_rate=16000
6 +#filename='./sunghwan/8sec2.wav'
7 +
8 +def normalize_frames(m,Scale=True):
9 + if Scale:
10 + return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
11 + else:
12 + return (m - np.mean(m, axis=0))
13 +
14 +def extract(filename,savename='test.p'):
15 + audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
16 + filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=40, winlen=0.025)
17 + filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))
18 + feature = normalize_frames(filter_banks, Scale=False)
19 + label = savename.split('.')[0]
20 + todump = {'feat': feature, 'label': label}
21 + with open(savename,'wb') as f:
22 + pickle.dump(todump,f)
23 +
24 +
1 +import librosa
2 +import numpy as np
3 +from python_speech_features import fbank
4 +import pickle
5 +sample_rate = 16000
6 +#filename='./sunghwan/8sec2.wav'
7 +
8 +
9 +def normalize_frames(m, Scale=True):
10 + if Scale:
11 + return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
12 + else:
13 + return (m - np.mean(m, axis=0))
14 +
15 +
16 +def extract(filename, label='test.p'):
17 + audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
18 + filter_banks, energies = fbank(audio,
19 + samplerate=sample_rate,
20 + nfilt=40,
21 + winlen=0.025)
22 + filter_banks = 20 * np.log10(np.maximum(filter_banks, 1e-5))
23 + feature = normalize_frames(filter_banks, Scale=False)
24 + savename = 'test.p'
25 + if label != savename:
26 + savename='enroll.p'
27 + todump = {'feat': feature, 'label': label}
28 + with open(savename, 'wb') as f:
29 + pickle.dump(todump, f)
30 +
1 +import torch
2 +import torch.nn.functional as F
3 +from torch.autograd import Variable
4 +
5 +import pandas as pd
6 +import math
7 +import os
8 +import sys
9 +sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
10 +
11 +import configure as c
12 +
13 +from DB_wav_reader import read_feats_structure
14 +from SR_Dataset import read_MFB, ToTensorTestInput
15 +from model.model4 import background_resnet
16 +
17 +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes):
18 + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes)
19 + if use_cuda:
20 + model.cuda()
21 + print('=> loading checkpoint')
22 + # original saved file with DataParallel
23 + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth')
24 + # create new OrderedDict that does not contain `module.`
25 + model.load_state_dict(checkpoint['state_dict'])
26 + model.eval()
27 + return model
28 +
29 +def split_enroll_and_test(dataroot_dir):
30 + DB_all = read_feats_structure(dataroot_dir)
31 + enroll_DB = pd.DataFrame()
32 + test_DB = pd.DataFrame()
33 +
34 + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')]
35 + test_DB = DB_all[DB_all['filename'].str.contains('test.p')]
36 +
37 + # Reset the index
38 + enroll_DB = enroll_DB.reset_index(drop=True)
39 + test_DB = test_DB.reset_index(drop=True)
40 + return enroll_DB, test_DB
41 +
42 +def load_enroll_embeddings(embedding_dir):
43 + embeddings = {}
44 + for f in os.listdir(embedding_dir):
45 + spk = f.replace('.pth','')
46 + # Select the speakers who are in the 'enroll_spk_list'
47 + embedding_path = os.path.join(embedding_dir, f)
48 + tmp_embeddings = torch.load(embedding_path)
49 + embeddings[spk] = tmp_embeddings
50 +
51 + return embeddings
52 +
53 +def get_embeddings(use_cuda, filename, model, test_frames):
54 + input, label = read_MFB(filename) # input size:(n_frames, n_dims)
55 +
56 + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames'
57 + activation = 0
58 + with torch.no_grad():
59 + for i in range(tot_segments):
60 + temp_input = input[i*test_frames:i*test_frames+test_frames]
61 +
62 + TT = ToTensorTestInput()
63 + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames)
64 +
65 + if use_cuda:
66 + temp_input = temp_input.cuda()
67 + temp_activation,_ = model(temp_input)
68 + activation += torch.sum(temp_activation, dim=0, keepdim=True)
69 +
70 + activation = l2_norm(activation, 1)
71 +
72 + return activation
73 +
74 +def l2_norm(input, alpha):
75 + input_size = input.size() # size:(n_frames, dim)
76 + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim)
77 + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames)
78 + norm = torch.sqrt(normp) # size:(n_frames)
79 + _output = torch.div(input, norm.view(-1, 1).expand_as(input))
80 + output = _output.view(input_size)
81 + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf
82 + output = output * alpha
83 + return output
84 +
85 +def perform_identification(use_cuda, model, embeddings, test_filename, test_frames, spk_list):
86 + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames)
87 + max_score = -10**8
88 + best_spk = None
89 + for spk in spk_list:
90 + score = F.cosine_similarity(test_embedding, embeddings[spk])
91 + score = score.data.cpu().numpy()
92 + if score > max_score:
93 + max_score = score
94 + best_spk = spk
95 + #print("Speaker identification result : %s" %best_spk)
96 + true_spk = test_filename.split('/')[-2].split('_')[0]
97 + print("\n=== Speaker identification ===")
98 + print("True speaker : %s\nPredicted speaker : %s\nResult : %s\n" %(true_spk, best_spk, true_spk==best_spk))
99 + return best_spk
100 +
101 +def main():
102 +
103 + log_dir = 'new_model4' # Where the checkpoints are saved
104 + embedding_dir = 'enroll_embeddings4' # Where embeddings are saved
105 + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved
106 +
107 + # Settings
108 + use_cuda = True # Use cuda or not
109 + embedding_size = 128 # Dimension of speaker embeddings
110 + cp_num = 25 # Which checkpoint to use?
111 + n_classes = 241 # How many speakers in training data?
112 + test_frames = 100 # Split the test utterance
113 +
114 + # Load model from checkpoint
115 + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes)
116 +
117 + # Get the dataframe for test DB
118 + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR)
119 +
120 + # Load enroll embeddings
121 + embeddings = load_enroll_embeddings(embedding_dir)
122 +
123 + """ Test speaker list
124 + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062',
125 + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063'
126 + """
127 +
128 + spk_list = ['103F3021', '207F2088', '213F5100', '217F3038', '225M4062',\
129 + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063','777M7777','778M8777','sunghwan1']
130 +
131 + # Set the test speaker
132 + test_speaker = '207F2088'
133 +
134 + test_path = os.path.join(test_dir, test_speaker, 'test.p')
135 +
136 + # Perform the test
137 + best_spk = perform_identification(use_cuda, model, embeddings, test_path, test_frames, spk_list)
138 +
139 +if __name__ == '__main__':
140 + main()
No preview for this file type
No preview for this file type
1 +User-agent: *
2 +Disallow: /
No preview for this file type
1 +import torch
2 +import torch.nn.functional as F
3 +from torch.autograd import Variable
4 +
5 +import pandas as pd
6 +import math
7 +import os
8 +import sys
9 +sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
10 +import configure as c
11 +
12 +from DB_wav_reader import read_feats_structure
13 +from SR_Dataset import read_MFB, ToTensorTestInput
14 +from model.model4 import background_resnet
15 +
16 +def load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes):
17 + model = background_resnet(embedding_size=embedding_size, num_classes=n_classes)
18 + if use_cuda:
19 + model.cuda()
20 + print('=> loading checkpoint')
21 + # original saved file with DataParallel
22 + checkpoint = torch.load(log_dir + '/checkpoint_' + str(cp_num) + '.pth')
23 + # create new OrderedDict that does not contain `module.`
24 + model.load_state_dict(checkpoint['state_dict'])
25 + model.eval()
26 + return model
27 +
28 +def split_enroll_and_test(dataroot_dir):
29 + DB_all = read_feats_structure(dataroot_dir)
30 + enroll_DB = pd.DataFrame()
31 + test_DB = pd.DataFrame()
32 +
33 + enroll_DB = DB_all[DB_all['filename'].str.contains('enroll.p')]
34 + test_DB = DB_all[DB_all['filename'].str.contains('test.p')]
35 +
36 + # Reset the index
37 + enroll_DB = enroll_DB.reset_index(drop=True)
38 + test_DB = test_DB.reset_index(drop=True)
39 + return enroll_DB, test_DB
40 +
41 +def load_enroll_embeddings(embedding_dir):
42 + embeddings = {}
43 + for f in os.listdir(embedding_dir):
44 + spk = f.replace('.pth','')
45 + # Select the speakers who are in the 'enroll_spk_list'
46 + embedding_path = os.path.join(embedding_dir, f)
47 + tmp_embeddings = torch.load(embedding_path)
48 + embeddings[spk] = tmp_embeddings
49 +
50 + return embeddings
51 +
52 +def get_embeddings(use_cuda, filename, model, test_frames):
53 + input, label = read_MFB(filename) # input size:(n_frames, n_dims)
54 +
55 + tot_segments = math.ceil(len(input)/test_frames) # total number of segments with 'test_frames'
56 + activation = 0
57 + with torch.no_grad():
58 + for i in range(tot_segments):
59 + temp_input = input[i*test_frames:i*test_frames+test_frames]
60 +
61 + TT = ToTensorTestInput()
62 + temp_input = TT(temp_input) # size:(1, 1, n_dims, n_frames)
63 +
64 + if use_cuda:
65 + temp_input = temp_input.cuda()
66 + temp_activation,_ = model(temp_input)
67 + activation += torch.sum(temp_activation, dim=0, keepdim=True)
68 +
69 + activation = l2_norm(activation, 1)
70 +
71 + return activation
72 +
73 +def l2_norm(input, alpha):
74 + input_size = input.size() # size:(n_frames, dim)
75 + buffer = torch.pow(input, 2) # 2 denotes a squared operation. size:(n_frames, dim)
76 + normp = torch.sum(buffer, 1).add_(1e-10) # size:(n_frames)
77 + norm = torch.sqrt(normp) # size:(n_frames)
78 + _output = torch.div(input, norm.view(-1, 1).expand_as(input))
79 + output = _output.view(input_size)
80 + # Multiply by alpha = 10 as suggested in https://arxiv.org/pdf/1703.09507.pdf
81 + output = output * alpha
82 + return output
83 +
84 +def perform_verification(use_cuda, model, embeddings, enroll_speaker, test_filename, test_frames, thres):
85 + enroll_embedding = embeddings[enroll_speaker]
86 + test_embedding = get_embeddings(use_cuda, test_filename, model, test_frames)
87 +
88 + score = F.cosine_similarity(test_embedding, enroll_embedding)
89 + score = score.data.cpu().numpy()
90 +
91 + if score > thres:
92 + result = 'Accept'
93 + else:
94 + result = 'Reject'
95 +
96 + test_spk = test_filename.split('/')[-2].split('_')[0]
97 + print("\n=== Speaker verification ===")
98 + print("True speaker: %s\nClaimed speaker : %s\n\nResult : %s\n" %(enroll_speaker, test_spk, result))
99 + print("Score : %0.4f\nThreshold : %0.2f\n" %(score, thres))
100 + return (enroll_speaker,'%0.4f'%score)
101 +
102 +def main():
103 +
104 + log_dir = 'new_model4_merge' # Where the checkpoints are saved
105 + embedding_dir = 'enroll_embeddings4_merge' # Where embeddings are saved
106 + test_dir = 'feat_logfbank_nfilt40/test/' # Where test features are saved
107 +
108 + # Settings
109 + use_cuda = True # Use cuda or not
110 + embedding_size = 128 # Dimension of speaker embeddings
111 + cp_num = 50 # Which checkpoint to use?
112 + n_classes = 348 # How many speakers in training data?
113 + test_frames = 100 # Split the test utterance
114 +
115 + # Load model from checkpoint
116 + model = load_model(use_cuda, log_dir, cp_num, embedding_size, n_classes)
117 +
118 + # Get the dataframe for test DB
119 + enroll_DB, test_DB = split_enroll_and_test(c.TEST_FEAT_DIR)
120 +
121 + # Load enroll embeddings
122 + embeddings = load_enroll_embeddings(embedding_dir)
123 +
124 + """ Test speaker list
125 + '103F3021', '207F2088', '213F5100', '217F3038', '225M4062',
126 + '229M2031', '230M4087', '233F4013', '236M3043', '240M3063'
127 + """
128 +
129 + # Set the true speaker
130 + enroll_speaker = '213F5100'
131 +
132 + # Set the claimed speaker
133 + test_speaker = '207F2088'
134 +
135 + # Threshold
136 + thres = 0.95
137 +
138 + test_path = os.path.join(test_dir, test_speaker, 'test.p')
139 +
140 + # Perform the test
141 + perform_verification(use_cuda, model, embeddings, enroll_speaker, test_path, test_frames, thres)
142 +
143 +if __name__ == '__main__':
144 + main()