extract_feature4.py
933 Bytes
import librosa
import numpy as np
from python_speech_features import fbank
import pickle
sample_rate = 16000
#filename='./sunghwan/8sec2.wav'
def normalize_frames(m, Scale=True):
if Scale:
return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
else:
return (m - np.mean(m, axis=0))
def extract(filename, label='test.p'):
audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
filter_banks, energies = fbank(audio,
samplerate=sample_rate,
nfilt=40,
winlen=0.025)
filter_banks = 20 * np.log10(np.maximum(filter_banks, 1e-5))
feature = normalize_frames(filter_banks, Scale=False)
savename = 'test.p'
if label != savename:
savename='enroll.p'
todump = {'feat': feature, 'label': label}
with open(savename, 'wb') as f:
pickle.dump(todump, f)