feat : finish dev

노현욱
Commit b25076ad72a9f620ac720faa9133d33313c3dc71 b25076ad 1 parent 24c1fff7
Showing 10 changed files with 378 additions and 41 deletions
cli_version.py → extras/cli_version.py
extras/echo.py
get_device_check.py → extras/get_device_check.py
gui_version.py → extras/gui_version.py
extras/play_sound.py
extras/stream_queue.py
low_latency.py
readme.md
sounds/short_wooAk.mp3
sounds/wooAk.mp3
--- a/cli_version.py → extras/cli_version.py
View file @b25076a
+++ b/cli_version.py → extras/cli_version.py
View file @b25076a
@@ -6,7 +6,12 @@ import time
 import librosa
 import tkinter as tk
-ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/s1.mp3").raw_data, dtype=np.int16)
+ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
+
+# 상수 설정
+CUSTOM_AUDIO_DURATION = 1 # 사용자의 소리를 녹음할 시간
+SOURCE_MODE = "frequency" # height, decibel or frequency
+MODE = "pitch_shift" # low_filter, echo or pitch_shift
 # 파라미터 설정
 RATE = 44100  # 샘플링 주파수
@@ -14,14 +19,13 @@ CHUNK = 1024  # 읽을 샘플의 수
 THRESHOLD = 128  # 피크를 검출하기 위한 threshold 값
 WIN_SIZE = 1024  # STFT를 적용할 윈도우 사이즈
 HOP_SIZE = 512  # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
-DELAY = 0.1  # Delay time in seconds
+DELAY = 0.1  # Delay time in seconds for echo
 MAX_FREQ = 10000 # max freq for pitch shifting
 MAX_HEIGHT = 10000 # max height for pitch shifting
 MAX_DECIBEL = 50 # max decibel for decibel shifting
-SOURCE_MODE = "decibel" # height, decibel or frequency
-MODE = "low_filter" # low_filter, echo or pitch_shift
 SOUND_SIZE = len(ORIGIN_SOUND)  # 음원 길이
+
 sound_idx = 0
@@ -46,7 +50,7 @@ def get_user_audio(duration):
         return (in_data, pyaudio.paComplete)
     # 녹음 진행
-    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
+    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
     time.sleep(1)    
     stream.start_stream()
@@ -61,7 +65,7 @@ def get_user_audio(duration):
     return sound
 if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
-    ORIGIN_SOUND = get_user_audio(0.2)
+    ORIGIN_SOUND = get_user_audio(CUSTOM_AUDIO_DURATION)
 sound = ORIGIN_SOUND.copy()
@@ -71,6 +75,9 @@ p = pyaudio.PyAudio()
 last_time = 0
+print("current input : ", SOURCE_MODE)
+print("current distortion : ", MODE)
+
 # 콜백 함수 정의
 def process_audio(in_data, frame_count, time_info, status):
     global buffer
@@ -117,6 +124,7 @@ def process_audio(in_data, frame_count, time_info, status):
     def shift_pitch(frequency):
         pitch_shift_factor = frequency * 3
         audio_array = ORIGIN_SOUND.copy()
+        print("pitch shift over : ", pitch_shift_factor, "octave")
         # Resample the audio array to change the pitch
         resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
         return np.array(resampled_array, dtype=np.int16)
@@ -124,8 +132,8 @@ def process_audio(in_data, frame_count, time_info, status):
     def low_filter(param):
         audio_data = np.array(ORIGIN_SOUND.copy(), dtype=np.float32)
         # Define the filter parameters
-        cutoff_freq = param * RATE  # Frequency cutoff for the low-pass filter (in Hz)
+        cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
-        print("cut of below : ", cutoff_freq)
+        print("cut off over : ", cutoff_freq)
         nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
         normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
@@ -153,7 +161,7 @@ def process_audio(in_data, frame_count, time_info, status):
         amp = np.max(np.abs(data))  # 신호의 진폭 추정
         decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx)))  # 진폭을 데시벨로 변환
-        if(decibel > 10) and height > 100:
+        if(decibel > 10) and height > 100 :
             last_time = time_info['current_time']
             print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}, time_info {:.2f}".format(height, freq, amp, decibel, time_info['current_time']))
             new_sound = get_distortion(height, freq, decibel)
--- a/extras/echo.py deleted 100644 → 0
View file @24c1fff
+++ b/extras/echo.py deleted 100644 → 0
View file @24c1fff
-import pyaudio
-import numpy as np
-
-pa = pyaudio.PyAudio()
-delay_buffer = np.zeros((44100, 2), dtype=np.float32)
-
-def callback(in_data, frame_count, time_info, status):
-    global delay_buffer
-    audio_data = np.frombuffer(in_data, dtype=np.float32).reshape(frame_count, 2)
-    delayed_data = np.concatenate((delay_buffer, audio_data))
-    delay_buffer = delayed_data[frame_count:]
-    return (audio_data + 0.5 * delay_buffer).tobytes(), pyaudio.paContinue
-RATE = 44100
-CHUNK = 1024
-
-stream = pa.open(format=pyaudio.paFloat32,
-                 channels=1,
-                 rate=RATE,
-                 input=True,
-                 output=True,
-                 frames_per_buffer=CHUNK,
-                 stream_callback=callback)
-
-stream.start_stream()
-# keep the stream running for a few seconds
-
-for i in range(0, int(RATE / CHUNK * 30)):
-    stream.write(stream.read(CHUNK))
-
-stream.stop()
-stream.close()
-pa.terminate()
--- a/get_device_check.py → extras/get_device_check.py
View file @b25076a
+++ b/get_device_check.py → extras/get_device_check.py
View file @b25076a
--- a/gui_version.py → extras/gui_version.py
View file @b25076a
+++ b/gui_version.py → extras/gui_version.py
View file @b25076a
--- a/extras/play_sound.py 0 → 100644
View file @b25076a
+++ b/extras/play_sound.py 0 → 100644
View file @b25076a
+from pydub import AudioSegment
+import pyaudio
+import numpy as np
+# Usage:
+file_path = "./sounds/s1.mp3"
+
+audio_file = AudioSegment.from_mp3(file_path)
+print(audio_file.frame_rate)
+print(audio_file.sample_width)
+print(audio_file.channels)
+
+raw_audio_data = np.frombuffer(AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
+
+
+p = pyaudio.PyAudio()
+idx = 0
+
+def callback(in_data, frame_count, time_info, status):
+    global raw_audio_data
+    global idx
+    idx += frame_count
+    chunk = raw_audio_data[idx:idx+frame_count]
+    if len(chunk) < frame_count:
+        chunk = np.concatenate((chunk, np.zeros(frame_count - len(chunk), dtype=np.int16)))
+        idx = 0
+    return (chunk, pyaudio.paContinue)
+
+stream = p.open(format=p.get_format_from_width(audio_file.sample_width),
+                channels=audio_file.channels,
+                rate=audio_file.frame_rate,
+                output_device_index=2,
+                output=True,
+                stream_callback=callback)
+
+stream.start_stream()
+
+while stream.is_active():
+    pass
+
+stream.stop_stream()
+stream.close()
+
+p.terminate()
--- a/extras/stream_queue.py 0 → 100644
View file @b25076a
+++ b/extras/stream_queue.py 0 → 100644
View file @b25076a
+import pyaudio
+import numpy as np
+import tkinter as tk
+from threading import Thread
+from queue import Queue
+from mutagen.mp3 import MP3
+
+class MP3Player:
+    def __init__(self):
+        self.filename = "./sounds/s4.mp3"
+        self.audio = pyaudio.PyAudio()
+        self.stream = None
+        self.isPlaying = False
+        self.chunk = 1024
+        self.queue = Queue()
+
+    def load_mp3_file(self):
+        audio_info = MP3(self.filename)
+        self.sample_rate = audio_info.info.sample_rate
+        self.channels = audio_info.info.channels
+
+    def start_stream(self):
+        self.stream = self.audio.open(format=pyaudio.paFloat32,
+                                      channels=self.channels,
+                                      rate=self.sample_rate,
+                                      output=True,
+                                      output_device_index=2,
+                                      stream_callback=self.callback)
+
+        self.stream.start_stream()
+
+    def stop_stream(self):
+        if self.stream:
+            self.stream.stop_stream()
+            self.stream.close()
+            self.stream = None
+
+    def play(self):
+        self.load_mp3_file()
+        self.start_stream()
+        self.isPlaying = True
+
+    def pause(self):
+        if self.isPlaying:
+            self.stop_stream()
+            self.isPlaying = False
+
+    def callback(self, in_data, frame_count, time_info, status):
+        data = self.queue.get()
+        return (data, pyaudio.paContinue)
+
+    def enqueue_data(self, data):
+        self.queue.put(data)
+
+    def create_gui(self):
+        self.root = tk.Tk()
+        self.root.title("MP3 Player")
+
+        self.play_button = tk.Button(self.root, text="Play", command=self.play)
+        self.play_button.pack(pady=10)
+
+        self.pause_button = tk.Button(self.root, text="Pause", command=self.pause)
+        self.pause_button.pack(pady=10)
+
+        self.root.mainloop()
+
+if __name__ == "__main__":
+    player = MP3Player()
+    player.create_gui()
--- a/low_latency.py 0 → 100644
View file @b25076a
+++ b/low_latency.py 0 → 100644
View file @b25076a
+import pyaudio
+import numpy as np
+import librosa
+import queue
+import pydub
+from scipy import signal
+from scipy.fft import fft, fftfreq
+import math
+import time
+
+RATE = 44100
+CHUNK = 1024
+MAX_FREQ = 40 # max freq for pitch shifting
+MAX_AMPLITUDE = 10000 # max amplitude for pitch shifting
+MAX_DECIBEL = 50 # max decibel for decibel shifting
+EFFECT_LEVEL = 20 # number of effect level
+
+INPUT_DEVICE_INDEX = 0
+OUTPUT_DEVICE_INDEX = 1
+
+CUSTOM_AUDIO_DURATION = 0.5 # seconds
+
+# make sound queue, chunk 단위로 들어감
+Q = queue.Queue()
+Q.put(np.zeros(CHUNK, dtype=np.int16))
+
+sound_idx = 0
+
+
+# 사용자의 목소리를 duration 초간 녹음.
+def get_user_audio(duration):
+
+    frames = []
+    p = pyaudio.PyAudio()
+
+    # 카운터 시작
+    print("ready for recording...")
+    for _ in range(3, 0, -1):
+        print(_)
+        time.sleep(1)
+    print("start...")
+
+    # 실제 녹음 콜백 함수
+    def add_to_frame(in_data, frame_count, time_info, status):
+        frames.append(np.frombuffer(in_data, dtype=np.int16))
+        if(len(frames) < RATE/CHUNK * duration):
+            return (in_data, pyaudio.paContinue)
+        return (in_data, pyaudio.paComplete)
+
+    # 녹음 진행
+    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
+    
+    time.sleep(1)    
+    stream.start_stream()
+
+    sound = np.frombuffer(b''.join(frames), dtype=np.int16)
+
+    # stream및 객체들 정리
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+    return sound
+
+class Effector:
+    def __init__(self, sound):
+        self.sound = sound.copy()
+        np.set_printoptions(threshold=np.inf)
+        self.sound = np.concatenate((self.sound, np.zeros(CHUNK - len(sound)%CHUNK, dtype=np.int16)))
+
+
+        self.target = "frequency"
+        self.effect = "pitch_shift"
+
+        self.echoed_sounds = list()
+        for i in range(EFFECT_LEVEL):
+            self.echoed_sounds.append(self.make_echo(i/EFFECT_LEVEL))
+
+        self.pitch_shifted_sounds = list()
+        for i in range(EFFECT_LEVEL):
+            self.pitch_shifted_sounds.append(self.pitch_shift(i/EFFECT_LEVEL))
+
+        self.low_filtered_sounds = list()
+        for i in range(EFFECT_LEVEL):
+            self.low_filtered_sounds.append(self.low_filter(i/EFFECT_LEVEL))
+
+        self.augumented_sound = {
+            "echo": self.echoed_sounds,
+            "pitch_shift": self.pitch_shifted_sounds,
+            "low_filter": self.low_filtered_sounds
+        }
+
+    def change_target(self, target):
+        if target not in ["decibel", "frequency", "amplitude"]:
+            raise Exception("Invalid target")
+        self.target = target
+
+    def change_effect(self, effect):
+        if effect not in ["echo", "pitch_shift", "low_filter"]:
+            raise Exception("Invalid effect")
+        self.effect = effect
+
+    def make_echo(self, decay):
+        delay = int(0.01 * RATE)  # Echo delay in samples
+        sound = self.sound
+        echoed_audio = np.zeros_like(sound, dtype=np.int16)
+        echoed_audio[:-delay] = sound[:-delay] + decay * sound[delay:]
+        echoed_audio[-delay:] = decay*sound[-delay:]
+        return np.array(echoed_audio, dtype=np.int16)
+
+    def pitch_shift(self, shift):
+        sound = librosa.effects.pitch_shift(y=np.array(self.sound, np.float32), sr=RATE, n_steps=shift*3, bins_per_octave=1)
+        return np.array(sound, dtype=np.int16)
+
+    def low_filter(self, param):
+        param = max(param, 0.1)
+        audio_data = np.array(self.sound.copy(), dtype=np.int16)
+        # Define the filter parameters
+        cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
+        # print("cut off over : ", cutoff_freq)
+        nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
+        normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
+
+        # Design the low-pass filter
+        b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
+
+        # Apply the low-pass filter to the audio data
+        filtered_audio = signal.lfilter(b, a, audio_data)
+
+        return np.array(filtered_audio, dtype=np.int16)
+    
+
+    def get_distortion_rate(self, db, main_frequency, amplitude):
+        print("current target is ", self.target)
+        param = 0
+        print(MAX_FREQ, main_frequency)
+        if self.target == "frequency":
+            param = min(MAX_FREQ-1, main_frequency) / MAX_FREQ
+        elif self.target == "amplitude":
+            param = min(MAX_AMPLITUDE-1, amplitude) / MAX_AMPLITUDE
+        elif self.target == "decibel":
+            param = min(MAX_DECIBEL-1, db) / MAX_DECIBEL
+
+        param = max(param, 0)
+        return param
+
+    def get_decibel_freq_amplitude(self, active_sound):
+        samples = active_sound.copy()
+        fft_data = fft(samples)
+        frequencies = fftfreq(len(samples))
+
+        # Find the index of the main frequency component (excluding the DC component)
+        main_freq_index = np.argmax(np.abs(fft_data[1:len(samples)//2]))
+
+        # Calculate the main frequency in Hz
+        main_frequency = abs(frequencies[main_freq_index])*1000
+
+        # Calculate the amplitude of the main frequency component
+        amplitude = abs(fft_data[main_freq_index])
+
+        # Convert amplitude to decibels (dB)
+        db = 20 * math.log10(amplitude)
+
+        return db, main_frequency, amplitude
+
+
+    def add_to_queue(self, active_sound):
+        global Q
+
+        db, main_frequency, amplitude = self.get_decibel_freq_amplitude(active_sound)
+        print("touched, db {}, main_frequency {}, amplitude {}".format(db, main_frequency, amplitude))
+        param = self.get_distortion_rate(db, main_frequency, amplitude)
+        print("param : ", param)
+        for i in range(0, len(self.sound), CHUNK):
+            temp_chunk = self.augumented_sound[self.effect][int(param*EFFECT_LEVEL)][i:i+CHUNK]
+            Q.put(temp_chunk)
+
+        
+
+# get file
+audio_file_path = "./sounds/short_wooAk.mp3"
+audio = np.frombuffer(pydub.AudioSegment.from_mp3(audio_file_path).raw_data, dtype=np.int16)
+
+
+if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
+    audio = get_user_audio(CUSTOM_AUDIO_DURATION)
+
+# make effect
+effector = Effector(audio)
+
+idx = 0
+prev_touched = False
+pa = pyaudio.PyAudio()
+
+acitve_sound_buffer = np.zeros(0, dtype=np.int16)
+
+def callback(in_data, frame_count, time_info, status):
+    global idx, effector, Q, prev_touched, acitve_sound_buffer
+    audio_data = np.frombuffer(in_data, dtype=np.int16)
+
+    raw_power = sum([abs(x) for x in audio_data])
+
+    if raw_power > 2**20 and not prev_touched:
+        prev_touched = True
+        acitve_sound_buffer = audio_data
+    
+    elif raw_power > 2**20 and prev_touched:
+        acitve_sound_buffer = np.concatenate((acitve_sound_buffer, audio_data))
+    
+    elif prev_touched and raw_power <= 2**20:
+        effector.add_to_queue(acitve_sound_buffer)
+        acitve_sound_buffer = np.zeros(0, dtype=np.int16)
+        prev_touched = False
+    
+    elif not prev_touched and raw_power <= 2**20:
+        prev_touched = False
+        acitve_sound_buffer = np.zeros(0, dtype=np.int16)
+
+    if Q.qsize() == 1:
+        Q.put(np.zeros(CHUNK, dtype=np.int16))
+    else:
+        print(Q.qsize())
+    return (Q.get(), pyaudio.paContinue)
+
+stream = pa.open(format=pa.get_format_from_width(2),
+                channels=1,
+                rate=RATE,
+                input_device_index=INPUT_DEVICE_INDEX,
+                output_device_index=OUTPUT_DEVICE_INDEX,
+                input=True,
+                output=True,
+                frames_per_buffer=CHUNK,
+                stream_callback=callback
+                )
+stream.start_stream()
+# keep the stream running for a few seconds
+
+while stream.is_active():
+    pass
+
+stream.close()
+pa.terminate()
--- a/readme.md
View file @b25076a
+++ b/readme.md
View file @b25076a
+# Capstone Design 2
+# Sound Augumentation을 이용한 악기
+
+
+1. peak 추정방법 다르게 만들기
+2. 피크 추정시에만 fft 돌릴수 있도록
+3. delay 최소화를 진행
\ No newline at end of file
--- a/sounds/short_wooAk.mp3 0 → 100644
View file @b25076a
+++ b/sounds/short_wooAk.mp3 0 → 100644
View file @b25076a
--- a/sounds/wooAk.mp3 0 → 100644
View file @b25076a
+++ b/sounds/wooAk.mp3 0 → 100644
View file @b25076a