visualize.py
2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pyaudio
import numpy as np
import matplotlib.pyplot as plt
import librosa
import threading
import sys
print = sys.stdout.write
# Define constants for audio parameters
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = 44100
FRAMES_PER_BUFFER = 1024
DELAY = 0.1
GAIN = 0.5
# Open an audio stream
stream = pyaudio.PyAudio().open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=FRAMES_PER_BUFFER)
sound = []
def get_stream_data():
global sound
sound.append(stream.read(FRAMES_PER_BUFFER*5, False))
get_stream_data()
# Create buffer for delayed audio data
buffer_size = int(RATE * DELAY)
buffer = np.zeros(buffer_size, dtype=np.int16)
def add_echo(in_data, frame_count, time_info, status_flags):
global buffer
data = np.frombuffer(in_data, dtype=np.int16)
output = data + GAIN * buffer[:len(data)]
buffer = np.roll(buffer, len(data))
buffer[-len(data):] = data
return (output.astype(np.int16).tostring(), pyaudio.paContinue)
def get_max_average_db():
global sound
data = sound[-1]
# Convert data to numpy array
data_float = np.frombuffer(data, dtype=np.float32)
# Compute the power spectrogram of the data
S = librosa.stft(data_float, n_fft=2048, hop_length=512)
S_power = np.abs(S)**2
# Convert power spectrogram to dB scale
S_dB = librosa.amplitude_to_db(S_power, ref=np.max)
# Calculate the average dB level
avg_dB = np.mean(S_dB)
print("Average dB: {:.2f}".format(avg_dB) + " "+ "Max dB: {:.2f}".format(np.max(S_dB)) + "\n")
def print_dominant_freq():
global sound
data = sound[-1]
# Convert data to numpy array
data = np.frombuffer(data, dtype=np.float32)
# Compute the Fourier transform of the data
fft_data = np.fft.fft(data)
# Compute the power spectral density of the data
psd_data = np.abs(fft_data)**2
# Define the frequency range of interest
freqs = np.fft.fftfreq(len(psd_data), d=1/RATE)
# Compute the power spectrogram on the mel scale
S = librosa.feature.melspectrogram(y=data, sr=RATE, n_fft=2048, hop_length=1024, n_mels=512)
# Find the frequency bin with the maximum energy in each frame
max_bin = np.argmax(S, axis=0)
# Find the dominant frequency in each frame
dominant_freqs = freqs[max_bin]
# Compute the median of the dominant frequencies to get the overall dominant frequency
dominant_freq = np.median(dominant_freqs)
print("Dominant frequency: {:.2f} Hz\n".format(dominant_freq))
threading.Thread(target=get_stream_data).start()
while True:
get_data = threading.Thread(target=get_stream_data)
calc_data = threading.Thread(target=print_dominant_freq)
#get_decibel = threading.Thread(target=get_max_average_db)
get_data.start()
calc_data.start()
#get_decibel.start()
get_data.join()
sys.stdin.flush()