extract_feature.py
764 Bytes
import librosa
import numpy as np
from python_speech_features import fbank
import pickle
sample_rate=16000
#filename='./sunghwan/8sec2.wav'
def normalize_frames(m,Scale=True):
if Scale:
return (m - np.mean(m, axis=0)) / (np.std(m, axis=0) + 2e-12)
else:
return (m - np.mean(m, axis=0))
def extract(filename,savename='test.p'):
audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=40, winlen=0.025)
filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))
feature = normalize_frames(filter_banks, Scale=False)
label = savename.split('.')[0]
todump = {'feat': feature, 'label': label}
with open(savename,'wb') as f:
pickle.dump(todump,f)