Demo.py 1.67 KB

Raw Blame History Permalink

import cv2
import numpy as np
import pyaudio
import librosa
import librosa.display
import matplotlib.pyplot as plt
import time
import tensorflow as tf
import tensorflow.keras as keras

CATEGORES = ["angry", "happy", "lonely", "sad"]

model = keras.load_model("yout model path")

rate = 16000
chunk_size = rate // 4

"""
    생성된 데이터를 모델에 맞게 변경함
    @param filepath(str) : path where your image file is
    @return array that has reshaped for predict
"""
def prepare(filepath):
    img_array = cv2.imread(filepath, cv2.IMREAD_COLOR)
    new_array = cv2.resize(img_array, (62, 78))
    return new_array.reshape(-1, 62, 78, 1)

p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paFloat32,
                channels=1,
                rate=rate,
                input=True,
                input_device_index=1,
                frames_per_buffer=chunk_size)


frames = []

plt.figure(figsize=(10, 4))
do_melspec = librosa.feature.melspectrogram
pwr_to_db = librosa.core.power_to_db

while True:

    start = time.time()

    data = stream.read(chunk_size)
    data = np.fromstring(data, dtype=np.float32)

    melspec = do_melspec(y=data, sr=rate, n_mels=128, fmax=4000)
    norm_melspec = pwr_to_db(melspec, ref=np.max)

    frames.append(norm_melspec)

    if len(frames) == 20:


        stack = np.hstack(frames)
        librosa.display.specshow(stack, fmax=4000)
        plt.savefig('your save path' + '.jpg', dpi=300)
        prediction = model.predict([prepare('path where your image is')])
        print(CATEGORES[int(prediction[0][0])])
        plt.draw()
        plt.pause(0.0001)
        plt.clf()
        #break
        frames.pop(0)


    t = time.time() - start