feat : finish dev

노현욱
Commit b25076ad72a9f620ac720faa9133d33313c3dc71 b25076ad 1 parent 24c1fff7
Showing 10 changed files with 378 additions and 41 deletions
cli_version.py → extras/cli_version.py
extras/echo.py
get_device_check.py → extras/get_device_check.py
gui_version.py → extras/gui_version.py
extras/play_sound.py
extras/stream_queue.py
low_latency.py
readme.md
sounds/short_wooAk.mp3
sounds/wooAk.mp3
--- a/cli_version.py → extras/cli_version.py
View file @b25076a
+++ b/cli_version.py → extras/cli_version.py
View file @b25076a
@@ -6,7 +6,12 @@ import time
 import librosa
 import tkinter as tk
 
- ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/s1.mp3").raw_data, dtype=np.int16)
+ ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
+ 
+ # 상수 설정
+ CUSTOM_AUDIO_DURATION = 1 # 사용자의 소리를 녹음할 시간
+ SOURCE_MODE = "frequency" # height, decibel or frequency
+ MODE = "pitch_shift" # low_filter, echo or pitch_shift
 
 # 파라미터 설정
 RATE = 44100  # 샘플링 주파수
@@ -14,14 +19,13 @@ CHUNK = 1024  # 읽을 샘플의 수
 THRESHOLD = 128  # 피크를 검출하기 위한 threshold 값
 WIN_SIZE = 1024  # STFT를 적용할 윈도우 사이즈
 HOP_SIZE = 512  # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
- DELAY = 0.1  # Delay time in seconds
+ DELAY = 0.1  # Delay time in seconds for echo
 MAX_FREQ = 10000 # max freq for pitch shifting
 MAX_HEIGHT = 10000 # max height for pitch shifting
 MAX_DECIBEL = 50 # max decibel for decibel shifting
- SOURCE_MODE = "decibel" # height, decibel or frequency
- MODE = "low_filter" # low_filter, echo or pitch_shift
 SOUND_SIZE = len(ORIGIN_SOUND)  # 음원 길이
 
+ 
 sound_idx = 0
 
 
@@ -46,7 +50,7 @@ def get_user_audio(duration):
         return (in_data, pyaudio.paComplete)
 
     # 녹음 진행
-     stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
+     stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
     
     time.sleep(1)    
     stream.start_stream()
@@ -61,7 +65,7 @@ def get_user_audio(duration):
     return sound
 
 if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
-     ORIGIN_SOUND = get_user_audio(0.2)
+     ORIGIN_SOUND = get_user_audio(CUSTOM_AUDIO_DURATION)
 
 sound = ORIGIN_SOUND.copy()
 
@@ -71,6 +75,9 @@ p = pyaudio.PyAudio()
 
 last_time = 0
 
+ print("current input : ", SOURCE_MODE)
+ print("current distortion : ", MODE)
+ 
 # 콜백 함수 정의
 def process_audio(in_data, frame_count, time_info, status):
     global buffer
@@ -117,6 +124,7 @@ def process_audio(in_data, frame_count, time_info, status):
     def shift_pitch(frequency):
         pitch_shift_factor = frequency * 3
         audio_array = ORIGIN_SOUND.copy()
+         print("pitch shift over : ", pitch_shift_factor, "octave")
         # Resample the audio array to change the pitch
         resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
         return np.array(resampled_array, dtype=np.int16)
@@ -124,8 +132,8 @@ def process_audio(in_data, frame_count, time_info, status):
     def low_filter(param):
         audio_data = np.array(ORIGIN_SOUND.copy(), dtype=np.float32)
         # Define the filter parameters
-         cutoff_freq = param * RATE  # Frequency cutoff for the low-pass filter (in Hz)
-         print("cut of below : ", cutoff_freq)
+         cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
+         print("cut off over : ", cutoff_freq)
         nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
         normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
 
@@ -153,7 +161,7 @@ def process_audio(in_data, frame_count, time_info, status):
         amp = np.max(np.abs(data))  # 신호의 진폭 추정
         decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx)))  # 진폭을 데시벨로 변환
     
-         if(decibel > 10) and height > 100:
+         if(decibel > 10) and height > 100 :
             last_time = time_info['current_time']
             print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}, time_info {:.2f}".format(height, freq, amp, decibel, time_info['current_time']))
             new_sound = get_distortion(height, freq, decibel)
--- a/extras/echo.py deleted 100644 → 0
View file @24c1fff
+++ b/extras/echo.py deleted 100644 → 0
View file @24c1fff
- import pyaudio
- import numpy as np
- 
- pa = pyaudio.PyAudio()
- delay_buffer = np.zeros((44100, 2), dtype=np.float32)
- 
- def callback(in_data, frame_count, time_info, status):
-     global delay_buffer
-     audio_data = np.frombuffer(in_data, dtype=np.float32).reshape(frame_count, 2)
-     delayed_data = np.concatenate((delay_buffer, audio_data))
-     delay_buffer = delayed_data[frame_count:]
-     return (audio_data + 0.5 * delay_buffer).tobytes(), pyaudio.paContinue
- RATE = 44100
- CHUNK = 1024
- 
- stream = pa.open(format=pyaudio.paFloat32,
-                  channels=1,
-                  rate=RATE,
-                  input=True,
-                  output=True,
-                  frames_per_buffer=CHUNK,
-                  stream_callback=callback)
- 
- stream.start_stream()
- # keep the stream running for a few seconds
- 
- for i in range(0, int(RATE / CHUNK * 30)):
-     stream.write(stream.read(CHUNK))
- 
- stream.stop()
- stream.close()
- pa.terminate()
--- a/get_device_check.py → extras/get_device_check.py
View file @b25076a
+++ b/get_device_check.py → extras/get_device_check.py
View file @b25076a
--- a/gui_version.py → extras/gui_version.py
View file @b25076a
+++ b/gui_version.py → extras/gui_version.py
View file @b25076a
--- a/extras/play_sound.py 0 → 100644
View file @b25076a
+++ b/extras/play_sound.py 0 → 100644
View file @b25076a
+ from pydub import AudioSegment
+ import pyaudio
+ import numpy as np
+ # Usage:
+ file_path = "./sounds/s1.mp3"
+ 
+ audio_file = AudioSegment.from_mp3(file_path)
+ print(audio_file.frame_rate)
+ print(audio_file.sample_width)
+ print(audio_file.channels)
+ 
+ raw_audio_data = np.frombuffer(AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
+ 
+ 
+ p = pyaudio.PyAudio()
+ idx = 0
+ 
+ def callback(in_data, frame_count, time_info, status):
+     global raw_audio_data
+     global idx
+     idx += frame_count
+     chunk = raw_audio_data[idx:idx+frame_count]
+     if len(chunk) < frame_count:
+         chunk = np.concatenate((chunk, np.zeros(frame_count - len(chunk), dtype=np.int16)))
+         idx = 0
+     return (chunk, pyaudio.paContinue)
+ 
+ stream = p.open(format=p.get_format_from_width(audio_file.sample_width),
+                 channels=audio_file.channels,
+                 rate=audio_file.frame_rate,
+                 output_device_index=2,
+                 output=True,
+                 stream_callback=callback)
+ 
+ stream.start_stream()
+ 
+ while stream.is_active():
+     pass
+ 
+ stream.stop_stream()
+ stream.close()
+ 
+ p.terminate()
--- a/extras/stream_queue.py 0 → 100644
View file @b25076a
+++ b/extras/stream_queue.py 0 → 100644
View file @b25076a
+ import pyaudio
+ import numpy as np
+ import tkinter as tk
+ from threading import Thread
+ from queue import Queue
+ from mutagen.mp3 import MP3
+ 
+ class MP3Player:
+     def __init__(self):
+         self.filename = "./sounds/s4.mp3"
+         self.audio = pyaudio.PyAudio()
+         self.stream = None
+         self.isPlaying = False
+         self.chunk = 1024
+         self.queue = Queue()
+ 
+     def load_mp3_file(self):
+         audio_info = MP3(self.filename)
+         self.sample_rate = audio_info.info.sample_rate
+         self.channels = audio_info.info.channels
+ 
+     def start_stream(self):
+         self.stream = self.audio.open(format=pyaudio.paFloat32,
+                                       channels=self.channels,
+                                       rate=self.sample_rate,
+                                       output=True,
+                                       output_device_index=2,
+                                       stream_callback=self.callback)
+ 
+         self.stream.start_stream()
+ 
+     def stop_stream(self):
+         if self.stream:
+             self.stream.stop_stream()
+             self.stream.close()
+             self.stream = None
+ 
+     def play(self):
+         self.load_mp3_file()
+         self.start_stream()
+         self.isPlaying = True
+ 
+     def pause(self):
+         if self.isPlaying:
+             self.stop_stream()
+             self.isPlaying = False
+ 
+     def callback(self, in_data, frame_count, time_info, status):
+         data = self.queue.get()
+         return (data, pyaudio.paContinue)
+ 
+     def enqueue_data(self, data):
+         self.queue.put(data)
+ 
+     def create_gui(self):
+         self.root = tk.Tk()
+         self.root.title("MP3 Player")
+ 
+         self.play_button = tk.Button(self.root, text="Play", command=self.play)
+         self.play_button.pack(pady=10)
+ 
+         self.pause_button = tk.Button(self.root, text="Pause", command=self.pause)
+         self.pause_button.pack(pady=10)
+ 
+         self.root.mainloop()
+ 
+ if __name__ == "__main__":
+     player = MP3Player()
+     player.create_gui()
--- a/low_latency.py 0 → 100644
View file @b25076a
+++ b/low_latency.py 0 → 100644
View file @b25076a
+ import pyaudio
+ import numpy as np
+ import librosa
+ import queue
+ import pydub
+ from scipy import signal
+ from scipy.fft import fft, fftfreq
+ import math
+ import time
+ 
+ RATE = 44100
+ CHUNK = 1024
+ MAX_FREQ = 40 # max freq for pitch shifting
+ MAX_AMPLITUDE = 10000 # max amplitude for pitch shifting
+ MAX_DECIBEL = 50 # max decibel for decibel shifting
+ EFFECT_LEVEL = 20 # number of effect level
+ 
+ INPUT_DEVICE_INDEX = 0
+ OUTPUT_DEVICE_INDEX = 1
+ 
+ CUSTOM_AUDIO_DURATION = 0.5 # seconds
+ 
+ # make sound queue, chunk 단위로 들어감
+ Q = queue.Queue()
+ Q.put(np.zeros(CHUNK, dtype=np.int16))
+ 
+ sound_idx = 0
+ 
+ 
+ # 사용자의 목소리를 duration 초간 녹음.
+ def get_user_audio(duration):
+ 
+     frames = []
+     p = pyaudio.PyAudio()
+ 
+     # 카운터 시작
+     print("ready for recording...")
+     for _ in range(3, 0, -1):
+         print(_)
+         time.sleep(1)
+     print("start...")
+ 
+     # 실제 녹음 콜백 함수
+     def add_to_frame(in_data, frame_count, time_info, status):
+         frames.append(np.frombuffer(in_data, dtype=np.int16))
+         if(len(frames) < RATE/CHUNK * duration):
+             return (in_data, pyaudio.paContinue)
+         return (in_data, pyaudio.paComplete)
+ 
+     # 녹음 진행
+     stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
+     
+     time.sleep(1)    
+     stream.start_stream()
+ 
+     sound = np.frombuffer(b''.join(frames), dtype=np.int16)
+ 
+     # stream및 객체들 정리
+     stream.stop_stream()
+     stream.close()
+     p.terminate()
+ 
+     return sound
+ 
+ class Effector:
+     def __init__(self, sound):
+         self.sound = sound.copy()
+         np.set_printoptions(threshold=np.inf)
+         self.sound = np.concatenate((self.sound, np.zeros(CHUNK - len(sound)%CHUNK, dtype=np.int16)))
+ 
+ 
+         self.target = "frequency"
+         self.effect = "pitch_shift"
+ 
+         self.echoed_sounds = list()
+         for i in range(EFFECT_LEVEL):
+             self.echoed_sounds.append(self.make_echo(i/EFFECT_LEVEL))
+ 
+         self.pitch_shifted_sounds = list()
+         for i in range(EFFECT_LEVEL):
+             self.pitch_shifted_sounds.append(self.pitch_shift(i/EFFECT_LEVEL))
+ 
+         self.low_filtered_sounds = list()
+         for i in range(EFFECT_LEVEL):
+             self.low_filtered_sounds.append(self.low_filter(i/EFFECT_LEVEL))
+ 
+         self.augumented_sound = {
+             "echo": self.echoed_sounds,
+             "pitch_shift": self.pitch_shifted_sounds,
+             "low_filter": self.low_filtered_sounds
+         }
+ 
+     def change_target(self, target):
+         if target not in ["decibel", "frequency", "amplitude"]:
+             raise Exception("Invalid target")
+         self.target = target
+ 
+     def change_effect(self, effect):
+         if effect not in ["echo", "pitch_shift", "low_filter"]:
+             raise Exception("Invalid effect")
+         self.effect = effect
+ 
+     def make_echo(self, decay):
+         delay = int(0.01 * RATE)  # Echo delay in samples
+         sound = self.sound
+         echoed_audio = np.zeros_like(sound, dtype=np.int16)
+         echoed_audio[:-delay] = sound[:-delay] + decay * sound[delay:]
+         echoed_audio[-delay:] = decay*sound[-delay:]
+         return np.array(echoed_audio, dtype=np.int16)
+ 
+     def pitch_shift(self, shift):
+         sound = librosa.effects.pitch_shift(y=np.array(self.sound, np.float32), sr=RATE, n_steps=shift*3, bins_per_octave=1)
+         return np.array(sound, dtype=np.int16)
+ 
+     def low_filter(self, param):
+         param = max(param, 0.1)
+         audio_data = np.array(self.sound.copy(), dtype=np.int16)
+         # Define the filter parameters
+         cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
+         # print("cut off over : ", cutoff_freq)
+         nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
+         normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
+ 
+         # Design the low-pass filter
+         b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
+ 
+         # Apply the low-pass filter to the audio data
+         filtered_audio = signal.lfilter(b, a, audio_data)
+ 
+         return np.array(filtered_audio, dtype=np.int16)
+     
+ 
+     def get_distortion_rate(self, db, main_frequency, amplitude):
+         print("current target is ", self.target)
+         param = 0
+         print(MAX_FREQ, main_frequency)
+         if self.target == "frequency":
+             param = min(MAX_FREQ-1, main_frequency) / MAX_FREQ
+         elif self.target == "amplitude":
+             param = min(MAX_AMPLITUDE-1, amplitude) / MAX_AMPLITUDE
+         elif self.target == "decibel":
+             param = min(MAX_DECIBEL-1, db) / MAX_DECIBEL
+ 
+         param = max(param, 0)
+         return param
+ 
+     def get_decibel_freq_amplitude(self, active_sound):
+         samples = active_sound.copy()
+         fft_data = fft(samples)
+         frequencies = fftfreq(len(samples))
+ 
+         # Find the index of the main frequency component (excluding the DC component)
+         main_freq_index = np.argmax(np.abs(fft_data[1:len(samples)//2]))
+ 
+         # Calculate the main frequency in Hz
+         main_frequency = abs(frequencies[main_freq_index])*1000
+ 
+         # Calculate the amplitude of the main frequency component
+         amplitude = abs(fft_data[main_freq_index])
+ 
+         # Convert amplitude to decibels (dB)
+         db = 20 * math.log10(amplitude)
+ 
+         return db, main_frequency, amplitude
+ 
+ 
+     def add_to_queue(self, active_sound):
+         global Q
+ 
+         db, main_frequency, amplitude = self.get_decibel_freq_amplitude(active_sound)
+         print("touched, db {}, main_frequency {}, amplitude {}".format(db, main_frequency, amplitude))
+         param = self.get_distortion_rate(db, main_frequency, amplitude)
+         print("param : ", param)
+         for i in range(0, len(self.sound), CHUNK):
+             temp_chunk = self.augumented_sound[self.effect][int(param*EFFECT_LEVEL)][i:i+CHUNK]
+             Q.put(temp_chunk)
+ 
+         
+ 
+ # get file
+ audio_file_path = "./sounds/short_wooAk.mp3"
+ audio = np.frombuffer(pydub.AudioSegment.from_mp3(audio_file_path).raw_data, dtype=np.int16)
+ 
+ 
+ if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
+     audio = get_user_audio(CUSTOM_AUDIO_DURATION)
+ 
+ # make effect
+ effector = Effector(audio)
+ 
+ idx = 0
+ prev_touched = False
+ pa = pyaudio.PyAudio()
+ 
+ acitve_sound_buffer = np.zeros(0, dtype=np.int16)
+ 
+ def callback(in_data, frame_count, time_info, status):
+     global idx, effector, Q, prev_touched, acitve_sound_buffer
+     audio_data = np.frombuffer(in_data, dtype=np.int16)
+ 
+     raw_power = sum([abs(x) for x in audio_data])
+ 
+     if raw_power > 2**20 and not prev_touched:
+         prev_touched = True
+         acitve_sound_buffer = audio_data
+     
+     elif raw_power > 2**20 and prev_touched:
+         acitve_sound_buffer = np.concatenate((acitve_sound_buffer, audio_data))
+     
+     elif prev_touched and raw_power <= 2**20:
+         effector.add_to_queue(acitve_sound_buffer)
+         acitve_sound_buffer = np.zeros(0, dtype=np.int16)
+         prev_touched = False
+     
+     elif not prev_touched and raw_power <= 2**20:
+         prev_touched = False
+         acitve_sound_buffer = np.zeros(0, dtype=np.int16)
+ 
+     if Q.qsize() == 1:
+         Q.put(np.zeros(CHUNK, dtype=np.int16))
+     else:
+         print(Q.qsize())
+     return (Q.get(), pyaudio.paContinue)
+ 
+ stream = pa.open(format=pa.get_format_from_width(2),
+                 channels=1,
+                 rate=RATE,
+                 input_device_index=INPUT_DEVICE_INDEX,
+                 output_device_index=OUTPUT_DEVICE_INDEX,
+                 input=True,
+                 output=True,
+                 frames_per_buffer=CHUNK,
+                 stream_callback=callback
+                 )
+ stream.start_stream()
+ # keep the stream running for a few seconds
+ 
+ while stream.is_active():
+     pass
+ 
+ stream.close()
+ pa.terminate()
--- a/readme.md
View file @b25076a
+++ b/readme.md
View file @b25076a
+ # Capstone Design 2
+ # Sound Augumentation을 이용한 악기
+ 
+ 
+ 1. peak 추정방법 다르게 만들기
+ 2. 피크 추정시에만 fft 돌릴수 있도록
+ 3. delay 최소화를 진행
\ No newline at end of file
--- a/sounds/short_wooAk.mp3 0 → 100644
View file @b25076a
+++ b/sounds/short_wooAk.mp3 0 → 100644
View file @b25076a
--- a/sounds/wooAk.mp3 0 → 100644
View file @b25076a
+++ b/sounds/wooAk.mp3 0 → 100644
View file @b25076a