노현욱

feat : finish dev

...@@ -6,7 +6,12 @@ import time ...@@ -6,7 +6,12 @@ import time
6 import librosa 6 import librosa
7 import tkinter as tk 7 import tkinter as tk
8 8
9 -ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/s1.mp3").raw_data, dtype=np.int16) 9 +ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
10 +
11 +# 상수 설정
12 +CUSTOM_AUDIO_DURATION = 1 # 사용자의 소리를 녹음할 시간
13 +SOURCE_MODE = "frequency" # height, decibel or frequency
14 +MODE = "pitch_shift" # low_filter, echo or pitch_shift
10 15
11 # 파라미터 설정 16 # 파라미터 설정
12 RATE = 44100 # 샘플링 주파수 17 RATE = 44100 # 샘플링 주파수
...@@ -14,14 +19,13 @@ CHUNK = 1024 # 읽을 샘플의 수 ...@@ -14,14 +19,13 @@ CHUNK = 1024 # 읽을 샘플의 수
14 THRESHOLD = 128 # 피크를 검출하기 위한 threshold 값 19 THRESHOLD = 128 # 피크를 검출하기 위한 threshold 값
15 WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈 20 WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈
16 HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈) 21 HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
17 -DELAY = 0.1 # Delay time in seconds 22 +DELAY = 0.1 # Delay time in seconds for echo
18 MAX_FREQ = 10000 # max freq for pitch shifting 23 MAX_FREQ = 10000 # max freq for pitch shifting
19 MAX_HEIGHT = 10000 # max height for pitch shifting 24 MAX_HEIGHT = 10000 # max height for pitch shifting
20 MAX_DECIBEL = 50 # max decibel for decibel shifting 25 MAX_DECIBEL = 50 # max decibel for decibel shifting
21 -SOURCE_MODE = "decibel" # height, decibel or frequency
22 -MODE = "low_filter" # low_filter, echo or pitch_shift
23 SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이 26 SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이
24 27
28 +
25 sound_idx = 0 29 sound_idx = 0
26 30
27 31
...@@ -46,7 +50,7 @@ def get_user_audio(duration): ...@@ -46,7 +50,7 @@ def get_user_audio(duration):
46 return (in_data, pyaudio.paComplete) 50 return (in_data, pyaudio.paComplete)
47 51
48 # 녹음 진행 52 # 녹음 진행
49 - stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame) 53 + stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
50 54
51 time.sleep(1) 55 time.sleep(1)
52 stream.start_stream() 56 stream.start_stream()
...@@ -61,7 +65,7 @@ def get_user_audio(duration): ...@@ -61,7 +65,7 @@ def get_user_audio(duration):
61 return sound 65 return sound
62 66
63 if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "): 67 if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
64 - ORIGIN_SOUND = get_user_audio(0.2) 68 + ORIGIN_SOUND = get_user_audio(CUSTOM_AUDIO_DURATION)
65 69
66 sound = ORIGIN_SOUND.copy() 70 sound = ORIGIN_SOUND.copy()
67 71
...@@ -71,6 +75,9 @@ p = pyaudio.PyAudio() ...@@ -71,6 +75,9 @@ p = pyaudio.PyAudio()
71 75
72 last_time = 0 76 last_time = 0
73 77
78 +print("current input : ", SOURCE_MODE)
79 +print("current distortion : ", MODE)
80 +
74 # 콜백 함수 정의 81 # 콜백 함수 정의
75 def process_audio(in_data, frame_count, time_info, status): 82 def process_audio(in_data, frame_count, time_info, status):
76 global buffer 83 global buffer
...@@ -117,6 +124,7 @@ def process_audio(in_data, frame_count, time_info, status): ...@@ -117,6 +124,7 @@ def process_audio(in_data, frame_count, time_info, status):
117 def shift_pitch(frequency): 124 def shift_pitch(frequency):
118 pitch_shift_factor = frequency * 3 125 pitch_shift_factor = frequency * 3
119 audio_array = ORIGIN_SOUND.copy() 126 audio_array = ORIGIN_SOUND.copy()
127 + print("pitch shift over : ", pitch_shift_factor, "octave")
120 # Resample the audio array to change the pitch 128 # Resample the audio array to change the pitch
121 resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1) 129 resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
122 return np.array(resampled_array, dtype=np.int16) 130 return np.array(resampled_array, dtype=np.int16)
...@@ -124,8 +132,8 @@ def process_audio(in_data, frame_count, time_info, status): ...@@ -124,8 +132,8 @@ def process_audio(in_data, frame_count, time_info, status):
124 def low_filter(param): 132 def low_filter(param):
125 audio_data = np.array(ORIGIN_SOUND.copy(), dtype=np.float32) 133 audio_data = np.array(ORIGIN_SOUND.copy(), dtype=np.float32)
126 # Define the filter parameters 134 # Define the filter parameters
127 - cutoff_freq = param * RATE # Frequency cutoff for the low-pass filter (in Hz) 135 + cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
128 - print("cut of below : ", cutoff_freq) 136 + print("cut off over : ", cutoff_freq)
129 nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate) 137 nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
130 normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency 138 normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
131 139
...@@ -153,7 +161,7 @@ def process_audio(in_data, frame_count, time_info, status): ...@@ -153,7 +161,7 @@ def process_audio(in_data, frame_count, time_info, status):
153 amp = np.max(np.abs(data)) # 신호의 진폭 추정 161 amp = np.max(np.abs(data)) # 신호의 진폭 추정
154 decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환 162 decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환
155 163
156 - if(decibel > 10) and height > 100: 164 + if(decibel > 10) and height > 100 :
157 last_time = time_info['current_time'] 165 last_time = time_info['current_time']
158 print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}, time_info {:.2f}".format(height, freq, amp, decibel, time_info['current_time'])) 166 print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}, time_info {:.2f}".format(height, freq, amp, decibel, time_info['current_time']))
159 new_sound = get_distortion(height, freq, decibel) 167 new_sound = get_distortion(height, freq, decibel)
......
1 -import pyaudio
2 -import numpy as np
3 -
4 -pa = pyaudio.PyAudio()
5 -delay_buffer = np.zeros((44100, 2), dtype=np.float32)
6 -
7 -def callback(in_data, frame_count, time_info, status):
8 - global delay_buffer
9 - audio_data = np.frombuffer(in_data, dtype=np.float32).reshape(frame_count, 2)
10 - delayed_data = np.concatenate((delay_buffer, audio_data))
11 - delay_buffer = delayed_data[frame_count:]
12 - return (audio_data + 0.5 * delay_buffer).tobytes(), pyaudio.paContinue
13 -RATE = 44100
14 -CHUNK = 1024
15 -
16 -stream = pa.open(format=pyaudio.paFloat32,
17 - channels=1,
18 - rate=RATE,
19 - input=True,
20 - output=True,
21 - frames_per_buffer=CHUNK,
22 - stream_callback=callback)
23 -
24 -stream.start_stream()
25 -# keep the stream running for a few seconds
26 -
27 -for i in range(0, int(RATE / CHUNK * 30)):
28 - stream.write(stream.read(CHUNK))
29 -
30 -stream.stop()
31 -stream.close()
32 -pa.terminate()
1 +from pydub import AudioSegment
2 +import pyaudio
3 +import numpy as np
4 +# Usage:
5 +file_path = "./sounds/s1.mp3"
6 +
7 +audio_file = AudioSegment.from_mp3(file_path)
8 +print(audio_file.frame_rate)
9 +print(audio_file.sample_width)
10 +print(audio_file.channels)
11 +
12 +raw_audio_data = np.frombuffer(AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
13 +
14 +
15 +p = pyaudio.PyAudio()
16 +idx = 0
17 +
18 +def callback(in_data, frame_count, time_info, status):
19 + global raw_audio_data
20 + global idx
21 + idx += frame_count
22 + chunk = raw_audio_data[idx:idx+frame_count]
23 + if len(chunk) < frame_count:
24 + chunk = np.concatenate((chunk, np.zeros(frame_count - len(chunk), dtype=np.int16)))
25 + idx = 0
26 + return (chunk, pyaudio.paContinue)
27 +
28 +stream = p.open(format=p.get_format_from_width(audio_file.sample_width),
29 + channels=audio_file.channels,
30 + rate=audio_file.frame_rate,
31 + output_device_index=2,
32 + output=True,
33 + stream_callback=callback)
34 +
35 +stream.start_stream()
36 +
37 +while stream.is_active():
38 + pass
39 +
40 +stream.stop_stream()
41 +stream.close()
42 +
43 +p.terminate()
1 +import pyaudio
2 +import numpy as np
3 +import tkinter as tk
4 +from threading import Thread
5 +from queue import Queue
6 +from mutagen.mp3 import MP3
7 +
8 +class MP3Player:
9 + def __init__(self):
10 + self.filename = "./sounds/s4.mp3"
11 + self.audio = pyaudio.PyAudio()
12 + self.stream = None
13 + self.isPlaying = False
14 + self.chunk = 1024
15 + self.queue = Queue()
16 +
17 + def load_mp3_file(self):
18 + audio_info = MP3(self.filename)
19 + self.sample_rate = audio_info.info.sample_rate
20 + self.channels = audio_info.info.channels
21 +
22 + def start_stream(self):
23 + self.stream = self.audio.open(format=pyaudio.paFloat32,
24 + channels=self.channels,
25 + rate=self.sample_rate,
26 + output=True,
27 + output_device_index=2,
28 + stream_callback=self.callback)
29 +
30 + self.stream.start_stream()
31 +
32 + def stop_stream(self):
33 + if self.stream:
34 + self.stream.stop_stream()
35 + self.stream.close()
36 + self.stream = None
37 +
38 + def play(self):
39 + self.load_mp3_file()
40 + self.start_stream()
41 + self.isPlaying = True
42 +
43 + def pause(self):
44 + if self.isPlaying:
45 + self.stop_stream()
46 + self.isPlaying = False
47 +
48 + def callback(self, in_data, frame_count, time_info, status):
49 + data = self.queue.get()
50 + return (data, pyaudio.paContinue)
51 +
52 + def enqueue_data(self, data):
53 + self.queue.put(data)
54 +
55 + def create_gui(self):
56 + self.root = tk.Tk()
57 + self.root.title("MP3 Player")
58 +
59 + self.play_button = tk.Button(self.root, text="Play", command=self.play)
60 + self.play_button.pack(pady=10)
61 +
62 + self.pause_button = tk.Button(self.root, text="Pause", command=self.pause)
63 + self.pause_button.pack(pady=10)
64 +
65 + self.root.mainloop()
66 +
67 +if __name__ == "__main__":
68 + player = MP3Player()
69 + player.create_gui()
1 +import pyaudio
2 +import numpy as np
3 +import librosa
4 +import queue
5 +import pydub
6 +from scipy import signal
7 +from scipy.fft import fft, fftfreq
8 +import math
9 +import time
10 +
11 +RATE = 44100
12 +CHUNK = 1024
13 +MAX_FREQ = 40 # max freq for pitch shifting
14 +MAX_AMPLITUDE = 10000 # max amplitude for pitch shifting
15 +MAX_DECIBEL = 50 # max decibel for decibel shifting
16 +EFFECT_LEVEL = 20 # number of effect level
17 +
18 +INPUT_DEVICE_INDEX = 0
19 +OUTPUT_DEVICE_INDEX = 1
20 +
21 +CUSTOM_AUDIO_DURATION = 0.5 # seconds
22 +
23 +# make sound queue, chunk 단위로 들어감
24 +Q = queue.Queue()
25 +Q.put(np.zeros(CHUNK, dtype=np.int16))
26 +
27 +sound_idx = 0
28 +
29 +
30 +# 사용자의 목소리를 duration 초간 녹음.
31 +def get_user_audio(duration):
32 +
33 + frames = []
34 + p = pyaudio.PyAudio()
35 +
36 + # 카운터 시작
37 + print("ready for recording...")
38 + for _ in range(3, 0, -1):
39 + print(_)
40 + time.sleep(1)
41 + print("start...")
42 +
43 + # 실제 녹음 콜백 함수
44 + def add_to_frame(in_data, frame_count, time_info, status):
45 + frames.append(np.frombuffer(in_data, dtype=np.int16))
46 + if(len(frames) < RATE/CHUNK * duration):
47 + return (in_data, pyaudio.paContinue)
48 + return (in_data, pyaudio.paComplete)
49 +
50 + # 녹음 진행
51 + stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
52 +
53 + time.sleep(1)
54 + stream.start_stream()
55 +
56 + sound = np.frombuffer(b''.join(frames), dtype=np.int16)
57 +
58 + # stream및 객체들 정리
59 + stream.stop_stream()
60 + stream.close()
61 + p.terminate()
62 +
63 + return sound
64 +
65 +class Effector:
66 + def __init__(self, sound):
67 + self.sound = sound.copy()
68 + np.set_printoptions(threshold=np.inf)
69 + self.sound = np.concatenate((self.sound, np.zeros(CHUNK - len(sound)%CHUNK, dtype=np.int16)))
70 +
71 +
72 + self.target = "frequency"
73 + self.effect = "pitch_shift"
74 +
75 + self.echoed_sounds = list()
76 + for i in range(EFFECT_LEVEL):
77 + self.echoed_sounds.append(self.make_echo(i/EFFECT_LEVEL))
78 +
79 + self.pitch_shifted_sounds = list()
80 + for i in range(EFFECT_LEVEL):
81 + self.pitch_shifted_sounds.append(self.pitch_shift(i/EFFECT_LEVEL))
82 +
83 + self.low_filtered_sounds = list()
84 + for i in range(EFFECT_LEVEL):
85 + self.low_filtered_sounds.append(self.low_filter(i/EFFECT_LEVEL))
86 +
87 + self.augumented_sound = {
88 + "echo": self.echoed_sounds,
89 + "pitch_shift": self.pitch_shifted_sounds,
90 + "low_filter": self.low_filtered_sounds
91 + }
92 +
93 + def change_target(self, target):
94 + if target not in ["decibel", "frequency", "amplitude"]:
95 + raise Exception("Invalid target")
96 + self.target = target
97 +
98 + def change_effect(self, effect):
99 + if effect not in ["echo", "pitch_shift", "low_filter"]:
100 + raise Exception("Invalid effect")
101 + self.effect = effect
102 +
103 + def make_echo(self, decay):
104 + delay = int(0.01 * RATE) # Echo delay in samples
105 + sound = self.sound
106 + echoed_audio = np.zeros_like(sound, dtype=np.int16)
107 + echoed_audio[:-delay] = sound[:-delay] + decay * sound[delay:]
108 + echoed_audio[-delay:] = decay*sound[-delay:]
109 + return np.array(echoed_audio, dtype=np.int16)
110 +
111 + def pitch_shift(self, shift):
112 + sound = librosa.effects.pitch_shift(y=np.array(self.sound, np.float32), sr=RATE, n_steps=shift*3, bins_per_octave=1)
113 + return np.array(sound, dtype=np.int16)
114 +
115 + def low_filter(self, param):
116 + param = max(param, 0.1)
117 + audio_data = np.array(self.sound.copy(), dtype=np.int16)
118 + # Define the filter parameters
119 + cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
120 + # print("cut off over : ", cutoff_freq)
121 + nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
122 + normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
123 +
124 + # Design the low-pass filter
125 + b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
126 +
127 + # Apply the low-pass filter to the audio data
128 + filtered_audio = signal.lfilter(b, a, audio_data)
129 +
130 + return np.array(filtered_audio, dtype=np.int16)
131 +
132 +
133 + def get_distortion_rate(self, db, main_frequency, amplitude):
134 + print("current target is ", self.target)
135 + param = 0
136 + print(MAX_FREQ, main_frequency)
137 + if self.target == "frequency":
138 + param = min(MAX_FREQ-1, main_frequency) / MAX_FREQ
139 + elif self.target == "amplitude":
140 + param = min(MAX_AMPLITUDE-1, amplitude) / MAX_AMPLITUDE
141 + elif self.target == "decibel":
142 + param = min(MAX_DECIBEL-1, db) / MAX_DECIBEL
143 +
144 + param = max(param, 0)
145 + return param
146 +
147 + def get_decibel_freq_amplitude(self, active_sound):
148 + samples = active_sound.copy()
149 + fft_data = fft(samples)
150 + frequencies = fftfreq(len(samples))
151 +
152 + # Find the index of the main frequency component (excluding the DC component)
153 + main_freq_index = np.argmax(np.abs(fft_data[1:len(samples)//2]))
154 +
155 + # Calculate the main frequency in Hz
156 + main_frequency = abs(frequencies[main_freq_index])*1000
157 +
158 + # Calculate the amplitude of the main frequency component
159 + amplitude = abs(fft_data[main_freq_index])
160 +
161 + # Convert amplitude to decibels (dB)
162 + db = 20 * math.log10(amplitude)
163 +
164 + return db, main_frequency, amplitude
165 +
166 +
167 + def add_to_queue(self, active_sound):
168 + global Q
169 +
170 + db, main_frequency, amplitude = self.get_decibel_freq_amplitude(active_sound)
171 + print("touched, db {}, main_frequency {}, amplitude {}".format(db, main_frequency, amplitude))
172 + param = self.get_distortion_rate(db, main_frequency, amplitude)
173 + print("param : ", param)
174 + for i in range(0, len(self.sound), CHUNK):
175 + temp_chunk = self.augumented_sound[self.effect][int(param*EFFECT_LEVEL)][i:i+CHUNK]
176 + Q.put(temp_chunk)
177 +
178 +
179 +
180 +# get file
181 +audio_file_path = "./sounds/short_wooAk.mp3"
182 +audio = np.frombuffer(pydub.AudioSegment.from_mp3(audio_file_path).raw_data, dtype=np.int16)
183 +
184 +
185 +if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
186 + audio = get_user_audio(CUSTOM_AUDIO_DURATION)
187 +
188 +# make effect
189 +effector = Effector(audio)
190 +
191 +idx = 0
192 +prev_touched = False
193 +pa = pyaudio.PyAudio()
194 +
195 +acitve_sound_buffer = np.zeros(0, dtype=np.int16)
196 +
197 +def callback(in_data, frame_count, time_info, status):
198 + global idx, effector, Q, prev_touched, acitve_sound_buffer
199 + audio_data = np.frombuffer(in_data, dtype=np.int16)
200 +
201 + raw_power = sum([abs(x) for x in audio_data])
202 +
203 + if raw_power > 2**20 and not prev_touched:
204 + prev_touched = True
205 + acitve_sound_buffer = audio_data
206 +
207 + elif raw_power > 2**20 and prev_touched:
208 + acitve_sound_buffer = np.concatenate((acitve_sound_buffer, audio_data))
209 +
210 + elif prev_touched and raw_power <= 2**20:
211 + effector.add_to_queue(acitve_sound_buffer)
212 + acitve_sound_buffer = np.zeros(0, dtype=np.int16)
213 + prev_touched = False
214 +
215 + elif not prev_touched and raw_power <= 2**20:
216 + prev_touched = False
217 + acitve_sound_buffer = np.zeros(0, dtype=np.int16)
218 +
219 + if Q.qsize() == 1:
220 + Q.put(np.zeros(CHUNK, dtype=np.int16))
221 + else:
222 + print(Q.qsize())
223 + return (Q.get(), pyaudio.paContinue)
224 +
225 +stream = pa.open(format=pa.get_format_from_width(2),
226 + channels=1,
227 + rate=RATE,
228 + input_device_index=INPUT_DEVICE_INDEX,
229 + output_device_index=OUTPUT_DEVICE_INDEX,
230 + input=True,
231 + output=True,
232 + frames_per_buffer=CHUNK,
233 + stream_callback=callback
234 + )
235 +stream.start_stream()
236 +# keep the stream running for a few seconds
237 +
238 +while stream.is_active():
239 + pass
240 +
241 +stream.close()
242 +pa.terminate()
1 +# Capstone Design 2
2 +# Sound Augumentation을 이용한 악기
3 +
4 +
5 +1. peak 추정방법 다르게 만들기
6 +2. 피크 추정시에만 fft 돌릴수 있도록
7 +3. delay 최소화를 진행
...\ No newline at end of file ...\ No newline at end of file
......
No preview for this file type
No preview for this file type