함수및 기능 대부분 구현

노현욱
Commit d99715d1981b266b0d09b5d2b5872f1d1300eb30 d99715d1 1 parent 86a3d093
Showing 12 changed files with 539 additions and 86 deletions
find_peak copy.py
find_peak.py
gui.py
pitch_shifting.py
sets.py
sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver2-110924.mp3
sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver3-110925.mp3
sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver4-110926.mp3
sounds/ping.mp3
sounds/s1.mp3
stream_echo.py
what_to_do
--- a/find_peak copy.py 0 → 100644
View file @d99715d
+++ b/find_peak copy.py 0 → 100644
View file @d99715d
+import pyaudio
+import numpy as np
+import scipy.signal as signal
+import pydub
+import time
+import librosa
+import tkinter as tk
+
+ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16)
+
+# 파라미터 설정
+RATE = 44100  # 샘플링 주파수
+CHUNK = 1024  # 읽을 샘플의 수
+THRESHOLD = 256  # 피크를 검출하기 위한 threshold 값
+WIN_SIZE = 1024  # STFT를 적용할 윈도우 사이즈
+HOP_SIZE = 512  # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
+DELAY = 0.1  # Delay time in seconds
+MAX_FREQ = 3000 # max freq for pitch shifting
+MAX_HEIGHT = 2000 # max height for pitch shifting
+
+sound_idx = 0
+
+window = tk.Tk()
+window.title("Sound Effect")
+window.geometry("640x400+100+100")
+window.resizable(False, False)
+
+info_text = tk.StringVar()
+info_text.set("welcome! please press record button.")
+
+info_label = tk.Button(window, textvariable=info_text, foreground="black", background="white")
+info_label.pack()
+
+def set_source_mode(mode):
+    global SOURCE_MODE
+    SOURCE_MODE = mode
+
+# 사용자의 목소리를 duration 초간 녹음.
+def get_user_audio(duration):
+    global info_text, info_label
+    frames = []
+    p = pyaudio.PyAudio()
+
+    # 카운터 시작
+
+    info_text.set("ready for recording...")
+    for _ in range(3, 0, -1):
+        info_text.set(str(_))
+        time.sleep(1)
+    info_text.set("start...")
+
+    # 실제 녹음 콜백 함수
+    def add_to_frame(in_data, frame_count, time_info, status):
+        frames.append(np.frombuffer(in_data, dtype=np.int16))
+        if(len(frames) < RATE/CHUNK * duration):
+            return (in_data, pyaudio.paContinue)
+        return (in_data, pyaudio.paComplete)
+
+    # 녹음 진행
+    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
+    
+    time.sleep(1)    
+    stream.start_stream()
+
+    sound = np.frombuffer(b''.join(frames), dtype=np.int16)
+
+    # stream및 객체들 정리
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+    return sound
+
+def record():
+    global ORIGIN_SOUND
+    global SOURCE_MODE
+    ORIGIN_SOUND = get_user_audio(0.5)
+    SOURCE_MODE = "decibel" # decibel or frequency
+
+def start():
+    global MODE, SOUND_SIZE, sound_idx, sound
+    MODE = "high_filter" # echo or pitch_shift
+    SOUND_SIZE = len(ORIGIN_SOUND)  # 음원 길이
+
+    sound = ORIGIN_SOUND.copy()
+
+    print(type(sound), len(sound))
+
+    p = pyaudio.PyAudio()
+
+    last_frame = 0
+
+    # 콜백 함수 정의
+    def process_audio(in_data, frame_count, time_info, status):
+        
+        
+        def get_distortion(height, frequency):
+            height = min(height, MAX_HEIGHT) / MAX_HEIGHT
+            frequency = min(frequency, MAX_FREQ) / MAX_FREQ
+
+            if SOURCE_MODE == "decibel":
+                param = height
+            elif SOURCE_MODE == "frequency":
+                param = frequency
+            else:
+                return ORIGIN_SOUND
+
+            if MODE == "pitch_shift":
+                return shift_pitch(param)
+            elif MODE == "echo":
+                return add_echo(param)
+            elif MODE == "low_filter":
+                return low_filter(param)
+            else:
+                return ORIGIN_SOUND
+        
+        def add_echo(decay):
+            # Create an empty array to store the echoed audio samples
+            echoed_samples = np.zeros_like(ORIGIN_SOUND, dtype=np.int16)
+
+            # Calculate the delay in samples
+            delay_samples = int(DELAY * 44100)  # Assuming a sample rate of 44100 Hz
+
+            # Apply the echo effect
+            for i in range(delay_samples, len(ORIGIN_SOUND)):
+                echoed_samples[i] = ORIGIN_SOUND[i] + int(decay * echoed_samples[i - delay_samples])
+
+            return echoed_samples
+        
+        def shift_pitch(frequency):
+            pitch_shift_factor = frequency
+            audio_array = ORIGIN_SOUND.copy()
+            # Resample the audio array to change the pitch
+            resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
+            return np.array(resampled_array, dtype=np.int16)
+
+        def low_filter(param):
+            audio_data = data
+            # Define the filter parameters
+            cutoff_freq = param * MAX_FREQ  # Frequency cutoff for the low-pass filter (in Hz)
+            nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
+            normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
+
+            # Design the low-pass filter
+            b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
+
+            # Apply the low-pass filter to the audio data
+            filtered_audio = signal.lfilter(b, a, audio_data)
+
+            return filtered_audio
+
+        # 오디오 데이터 변환
+        data = np.frombuffer(in_data, dtype=np.int16)
+
+        # STFT 수행
+        f, t, Zxx = signal.stft(data, RATE, nperseg=WIN_SIZE, noverlap=HOP_SIZE)
+
+        # 피크 검출
+        peaks, _ = signal.find_peaks(np.abs(np.mean(Zxx, axis=1)), height=THRESHOLD, distance=WIN_SIZE)
+        # 파라미터 추정
+        if len(peaks) > 0 and last_frame+1 != frame_count:
+            last_frame = frame_count
+            peak_idx = peaks[0]  # 첫 번째 피크 선택
+            height = np.abs(Zxx[peak_idx, 0])  # 피크의 높이 추정
+            freq = f[peak_idx]  # 피크의 주파수 추정
+            amp = np.max(np.abs(data))  # 신호의 진폭 추정
+            decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx)))  # 진폭을 데시벨로 변환
+        
+            if(decibel > 20):
+                print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}".format(height, freq, amp, decibel))
+                new_sound = get_distortion(height, freq)
+                if(sound_idx > len(sound)):
+                    sound_idx = 0
+                else:
+                    mixed_end = min(len(sound), sound_idx + len(new_sound))
+                    print(mixed_end, sound_idx)
+                    sound[sound_idx:mixed_end] = new_sound[:mixed_end-sound_idx] + sound[sound_idx:mixed_end]
+                    if(mixed_end-sound_idx < len(new_sound)):
+                        result = np.concatenate((sound, new_sound[mixed_end-sound_idx:]),axis=0)
+                        sound = result
+        elif len(peaks) > 0:
+            last_frame = frame_count
+
+        sound_idx += 1024
+        if sound_idx > len(sound):
+            sound = ORIGIN_SOUND.copy()
+            return (np.zeros(data.shape), pyaudio.paContinue)
+        return (sound[sound_idx-1024:sound_idx], pyaudio.paContinue)
+
+
+    # 입력 스트림 열기
+    stream = p.open(format=p.get_format_from_width(2),
+                    channels=1,
+                    rate=RATE,
+                    input_device_index=1,
+                    output_device_index=2,
+                    input=True,
+                    output=True,
+                    frames_per_buffer=CHUNK,
+                    stream_callback=process_audio
+                    )
+
+    # 스트림 시작
+    stream.start_stream()
+
+
+
+    # 프로그램 실행 중지 전까지 무한 대기
+    while stream.is_active():
+        pass
+
+
+    # 스트림과 PyAudio 객체 종료
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+
+record_button = tk.Button(window, text="Record", width=10, height=2, command=lambda: record())
+record_button.pack()
+
+decibel_button = tk.Button(window, text="Decibel", width=10, height=2, command=lambda: set_source_mode("decibel"))
+decibel_button.pack()
+
+frequency_button = tk.Button(window, text="Frequency", width=10, height=2, command = lambda: set_source_mode("frequency"))
+frequency_button.pack()
+
+start_button = tk.Button(window, text="Start", width=10, height=2, command=lambda: start())
+start_button.pack()
+
+
+window.mainloop()
--- a/find_peak.py
View file @d99715d
+++ b/find_peak.py
View file @d99715d
 import pyaudio
 import numpy as np
 import scipy.signal as signal
-import matplotlib.pyplot as plt
+import pydub
+import time
+import librosa
+import tkinter as tk
+
+ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16)
+
 # 파라미터 설정
 RATE = 44100  # 샘플링 주파수
 CHUNK = 1024  # 읽을 샘플의 수
 THRESHOLD = 256  # 피크를 검출하기 위한 threshold 값
 WIN_SIZE = 1024  # STFT를 적용할 윈도우 사이즈
 HOP_SIZE = 512  # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
+DELAY = 0.1  # Delay time in seconds
+MAX_FREQ = 3000 # max freq for pitch shifting
+MAX_HEIGHT = 2000 # max height for pitch shifting
+
+sound_idx = 0
+
+
+# 사용자의 목소리를 duration 초간 녹음.
+def get_user_audio(duration):
+
+    frames = []
+    p = pyaudio.PyAudio()
+
+    # 카운터 시작
+    print("ready for recording...")
+    for _ in range(3, 0, -1):
+        print(_)
+        time.sleep(1)
+    print("start...")
+
+    # 실제 녹음 콜백 함수
+    def add_to_frame(in_data, frame_count, time_info, status):
+        frames.append(np.frombuffer(in_data, dtype=np.int16))
+        if(len(frames) < RATE/CHUNK * duration):
+            return (in_data, pyaudio.paContinue)
+        return (in_data, pyaudio.paComplete)
+
+    # 녹음 진행
+    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
+    
+    time.sleep(1)    
+    stream.start_stream()
+
+    sound = np.frombuffer(b''.join(frames), dtype=np.int16)
+
+    # stream및 객체들 정리
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+    return sound
+
+ORIGIN_SOUND = get_user_audio(0.5)
+SOURCE_MODE = "decibel" # decibel or frequency
+MODE = "high_filter" # echo or pitch_shift
+SOUND_SIZE = len(ORIGIN_SOUND)  # 음원 길이
+
+sound = ORIGIN_SOUND.copy()
+
+print(type(sound), len(sound))
-# PyAudio 객체 생성
 p = pyaudio.PyAudio()
+last_frame = 0
+
 # 콜백 함수 정의
 def process_audio(in_data, frame_count, time_info, status):
+    global buffer
+    global sound
+    global sound_idx
+    global last_frame
+    
+    
+    def get_distortion(height, frequency):
+        height = min(height, MAX_HEIGHT) / MAX_HEIGHT
+        frequency = min(frequency, MAX_FREQ) / MAX_FREQ
+
+        if SOURCE_MODE == "decibel":
+            param = height
+        elif SOURCE_MODE == "frequency":
+            param = frequency
+        else:
+            return ORIGIN_SOUND
+
+        if MODE == "pitch_shift":
+            return shift_pitch(param)
+        elif MODE == "echo":
+            return add_echo(param)
+        elif MODE == "low_filter":
+            return low_filter(param)
+        return ORIGIN_SOUND
+    
+    def add_echo(decay):
+        # Create an empty array to store the echoed audio samples
+        echoed_samples = np.zeros_like(ORIGIN_SOUND, dtype=np.int16)
+
+        # Calculate the delay in samples
+        delay_samples = int(DELAY * 44100)  # Assuming a sample rate of 44100 Hz
+
+        # Apply the echo effect
+        for i in range(delay_samples, len(ORIGIN_SOUND)):
+            echoed_samples[i] = ORIGIN_SOUND[i] + int(decay * echoed_samples[i - delay_samples])
+
+        return echoed_samples
+    
+    def shift_pitch(frequency):
+        pitch_shift_factor = frequency
+        audio_array = ORIGIN_SOUND.copy()
+        # Resample the audio array to change the pitch
+        resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
+        return np.array(resampled_array, dtype=np.int16)
+
+    def low_filter(param):
+        audio_data = data
+        # Define the filter parameters
+        cutoff_freq = param * MAX_FREQ  # Frequency cutoff for the low-pass filter (in Hz)
+        nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
+        normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
+
+        # Design the low-pass filter
+        b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
+
+        # Apply the low-pass filter to the audio data
+        filtered_audio = signal.lfilter(b, a, audio_data)
+
+        return filtered_audio
+
     # 오디오 데이터 변환
     data = np.frombuffer(in_data, dtype=np.int16)
@@ -23,23 +140,42 @@ def process_audio(in_data, frame_count, time_info, status):
     # 피크 검출
     peaks, _ = signal.find_peaks(np.abs(np.mean(Zxx, axis=1)), height=THRESHOLD, distance=WIN_SIZE)
     # 파라미터 추정
-    if len(peaks) > 0:
+    if len(peaks) > 0 and last_frame+1 != frame_count:
+        last_frame = frame_count
         peak_idx = peaks[0]  # 첫 번째 피크 선택
         height = np.abs(Zxx[peak_idx, 0])  # 피크의 높이 추정
         freq = f[peak_idx]  # 피크의 주파수 추정
         amp = np.max(np.abs(data))  # 신호의 진폭 추정
-        progress = (peak_idx + HOP_SIZE) / RATE  # 충돌음의 진행 길이 추정
+        decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx)))  # 진폭을 데시벨로 변환
-        # 결과 출력
+        if(decibel > 20):
-        print("Height: {:.2f}, Frequency: {:.2f}, Amplitude: {:.2f}, Progress: {:.2f}".format(height, freq, amp, progress))
+            print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}".format(height, freq, amp, decibel))
+            new_sound = get_distortion(height, freq)
+            if(sound_idx > len(sound)):
+                sound_idx = 0
+            else:
+                mixed_end = min(len(sound), sound_idx + len(new_sound))
+                print(mixed_end, sound_idx)
+                sound[sound_idx:mixed_end] = new_sound[:mixed_end-sound_idx] + sound[sound_idx:mixed_end]
+                if(mixed_end-sound_idx < len(new_sound)):
+                    result = np.concatenate((sound, new_sound[mixed_end-sound_idx:]),axis=0)
+                    sound = result
+    elif len(peaks) > 0:
+        last_frame = frame_count
+
+    sound_idx += 1024
+    if sound_idx > len(sound):
+        sound = ORIGIN_SOUND.copy()
+        return (np.zeros(data.shape), pyaudio.paContinue)
+    return (sound[sound_idx-1024:sound_idx], pyaudio.paContinue)
-    # 반환할 데이터 없음
-    return (in_data, pyaudio.paContinue)
 # 입력 스트림 열기
 stream = p.open(format=p.get_format_from_width(2),
                 channels=1,
                 rate=RATE,
+                input_device_index=1,
+                output_device_index=2,
                 input=True,
                 output=True,
                 frames_per_buffer=CHUNK,
@@ -49,6 +185,8 @@ stream = p.open(format=p.get_format_from_width(2),
 # 스트림 시작
 stream.start_stream()
+
+
 # 프로그램 실행 중지 전까지 무한 대기
 while stream.is_active():
     pass
--- a/gui.py 0 → 100644
View file @d99715d
+++ b/gui.py 0 → 100644
View file @d99715d
+import tkinter as tk
+
+
+def clear_row(button, buttons):
+    button_row = buttons[:3] if button in buttons[:3] else buttons[3:]
+
+    for button in button_row:
+        button.config(text='OFF')
+
+def toggle_button(button):
+    global buttons
+    if button['text'] == 'ON':
+        button.config(text='OFF')
+    else:
+        clear_row(button, buttons)
+        button.config(text='ON')
+
+def get_states():
+    global buttons
+    for i in range(3):
+        if buttons[i].cget('text') == 'ON':
+            mode = i
+    mode = 0
+
+    for i in range(3, 6):
+        if buttons[i].cget('text') == 'ON':
+            distortion = i - 3
+    distortion = 0
+    
+    return mode, distortion
+
+
+
+# Create the Tkinter window
+window = tk.Tk()
+window.title("Toggle Buttons")
+
+# Create two frames for two rows
+frame1 = tk.Frame(window)
+frame1.pack(side=tk.TOP)
+
+frame2 = tk.Frame(window)
+frame2.pack(side=tk.TOP)
+
+# Create six toggle buttons in two rows
+buttons = []
+for i in range(6):
+    if i < 3:
+        frame = frame1
+    else:
+        frame = frame2
+    
+    button = tk.Button(frame, text='OFF', width=10)
+    button.config(command=lambda button=button: toggle_button(button))
+    button.pack(side=tk.LEFT)
+    buttons.append(button)
+
+# Start the Tkinter event loop
+window.mainloop()
--- a/pitch_shifting.py 0 → 100644
View file @d99715d
+++ b/pitch_shifting.py 0 → 100644
View file @d99715d
+import pyaudio
+import numpy as np
+
+# Create an instance of the PyAudio class
+audio = pyaudio.PyAudio()
+
+# Define the desired pitch shift factor
+pitch_shift_factor = 1.5  # Increase by 50%
+
+# Choose the desired input and output devices
+input_device_index = 1
+output_device_index = 1
+
+# Open input stream
+input_stream = audio.open(input_device_index=input_device_index, format=pyaudio.paFloat32,
+                          channels=1, rate=44100, input=True, frames_per_buffer=1024)
+
+# Open output stream
+output_stream = audio.open(output_device_index=output_device_index, format=pyaudio.paFloat32,
+                           channels=2, rate=int(44100 * pitch_shift_factor), output=True)
+
+# Read input audio and apply pitch shift
+frames = []
+while True:
+    data = input_stream.read(1024)
+    frames.append(data)
+
+    # Adjust pitch by changing sample rate
+    if len(frames) >= 1024:
+        audio_data = b''.join(frames)
+        audio_array = np.frombuffer(audio_data, dtype=np.float32)
+
+        # Resample the audio array to change the pitch
+        resampled_array = np.resize(audio_array, int(len(audio_array) / pitch_shift_factor))
+
+        # Convert the resampled array back to bytes
+        resampled_data = resampled_array.astype(np.float32).tobytes()
+
+        # Play the resampled audio
+        output_stream.write(resampled_data)
+
+        frames = []
+
+# Stop and close the streams
+input_stream.stop_stream()
+input_stream.close()
+output_stream.stop_stream()
+output_stream.close()
+
+# Terminate PyAudio
+audio.terminate()
--- a/sets.py deleted 100644 → 0
View file @86a3d09
+++ b/sets.py deleted 100644 → 0
View file @86a3d09
-import base64
-
-task_list = []
-
-
-def display_menu():
-    print("일정 관리자")
-    print("1. 일정 추가")
-    print("2. 일정 보기")
-    print("3. 일정 완료 표시")
-    print("4. 종료")
-
-
-def add_task():
-    title = input("일정 제목 입력: ")
-    description = input("일정 설명 입력: ")
-    status = "하는 중"
-    task = { "title": title, "description": description, "status": status}
-    task_list.append(task)
-    print("일정이 추가되었습니다.")
-
-
-def view_tasks():
-    if not task_list:
-        print("일정 목록이 비어 있습니다.")
-    else:
-        print()
-        print("일정 목록:")
-        print("----------------")
-        for task in task_list:
-            print(f"제목: {task['title']}")
-            print(f"설명: {task['description']}")
-            print(f"상태: {task['status']}")
-            print("----------------")
-
-
-def mark_task_complete():
-    if not task_list:
-        print("일정 목록이 비어 있습니다.")
-        return
-
-    title = input("완료로 표시할 일정의 제목 입력: ")
-    for task in task_list:
-        if task['title'] == title:
-            task['status'] = "완료"
-            print("일정이 완료로 표시되었습니다.")
-            return
-
-    print("식별자와 일치하는 일정을 찾을 수 없습니다.")
-
-
-while True:
-    display_menu()
-    choice = input("선택: ")
-
-    if choice == "1":
-        add_task()
-    elif choice == "2":
-        view_tasks()
-    elif choice == "3":
-        mark_task_complete()
-    elif choice == "4":
-        print("프로그램을 종료합니다.")
-        break
-    else:
-        print("올바른 선택지를 입력하세요.")
-    print()
-
--- a/sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver2-110924.mp3 0 → 100644
View file @d99715d
+++ b/sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver2-110924.mp3 0 → 100644
View file @d99715d
--- a/sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver3-110925.mp3 0 → 100644
View file @d99715d
+++ b/sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver3-110925.mp3 0 → 100644
View file @d99715d
--- a/sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver4-110926.mp3 0 → 100644
View file @d99715d
+++ b/sounds/funny-sound-effect-for-quotjack-in-the-boxquot-sound-ver4-110926.mp3 0 → 100644
View file @d99715d
--- a/sounds/ping.mp3 0 → 100644
View file @d99715d
+++ b/sounds/ping.mp3 0 → 100644
View file @d99715d
--- a/sounds/s1.mp3 0 → 100644
View file @d99715d
+++ b/sounds/s1.mp3 0 → 100644
View file @d99715d
--- a/stream_echo.py
View file @d99715d
+++ b/stream_echo.py
View file @d99715d
@@ -2,6 +2,7 @@ import sys
 import numpy as np
 import pyaudio
 import librosa
+import scipy.signal as signal
 RECORD_SECONDS = 5
 CHUNK = 1024
@@ -10,11 +11,14 @@ DELAY = 0.1  # Delay time in seconds
 GAIN = 1  # Echo gain (0 to 1)
 MAX_FREQ = 3000
+input_device_index = 1
+output_device_index = 4
+
 # Create buffer for delayed audio data
 buffer_size = int(RATE * DELAY)
 buffer = np.zeros(buffer_size, dtype=np.int16)
-def add_echo(in_data, frame_count, time_info, status_flags):
+def do_process(in_data, frame_count, time_info, status_flags):
     global buffer
     data = np.frombuffer(in_data, dtype=np.int16)
@@ -22,7 +26,7 @@ def add_echo(in_data, frame_count, time_info, status_flags):
         data_float = data.astype(np.float32) 
         # Compute the power spectrogram of the data
-        S = librosa.stft(data_float, n_fft=2048, hop_length=512)
+        S = librosa.stft(data_float, n_fft=256, hop_length=512)
         S_power = np.abs(S)**2
         # Convert power spectrogram to dB scale
@@ -47,7 +51,7 @@ def add_echo(in_data, frame_count, time_info, status_flags):
         freqs = np.fft.fftfreq(len(psd_data), d=1/RATE)
         # Compute the power spectrogram on the mel scale
-        S = librosa.feature.melspectrogram(y=data, sr=RATE, n_fft=2048, hop_length=1024)
+        S = librosa.feature.melspectrogram(y=data, sr=RATE, n_fft=256, hop_length=1024, n_mels=64)
         # Find the frequency bin with the maximum energy in each frame
         max_bin = np.argmax(S, axis=0)
@@ -60,14 +64,47 @@ def add_echo(in_data, frame_count, time_info, status_flags):
         return dominant_freq
-    freq = get_dominant_freq(data)
+    def add_echo(gain):
-    avg_db, max_db = get_max_average_db(data)
+        global buffer
-    print(int(freq), int(avg_db), int(max_db))
-    temp_gain = freq/MAX_FREQ
-    output = data + freq/2500 * buffer[:len(data)]
         buffer = np.roll(buffer, len(data))
         buffer[-len(data):] = data
-    return (output.astype(np.int16).tostring(), pyaudio.paContinue)
+        return data + gain * buffer[:len(data)]
+    
+    def shift_pitch(pitch_shift_factor):
+        audio_array = data
+        # Resample the audio array to change the pitch
+        resampled_array = np.resize(audio_array, int(len(audio_array) / pitch_shift_factor))
+
+        return resampled_array
+
+    def high_filter(param):
+        audio_data = data
+        # Define the filter parameters
+        cutoff_freq = param * MAX_FREQ  # Frequency cutoff for the low-pass filter (in Hz)
+        nyquist_freq = 0.5 * RATE  # Nyquist frequency (half of the sampling rate)
+        normalized_cutoff = cutoff_freq / nyquist_freq  # Normalized cutoff frequency
+
+        # Design the low-pass filter
+        b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
+
+        # Apply the low-pass filter to the audio data
+        filtered_audio = signal.lfilter(b, a, audio_data)
+
+        return filtered_audio
+
+    try:
+        freq = get_dominant_freq(data)
+        # avg_db, max_db = get_max_average_db(data)
+        
+        # temp_gain = freq/MAX_FREQ
+        # output = add_echo(temp_gain)
+        output = shift_pitch(0.5 + freq/MAX_FREQ)
+        # output = high_filter(0.5)
+        # print(int(freq), int(avg_db), int(max_db))
+        return (output.astype(np.int16).tobytes(), pyaudio.paContinue)
+    except:
+        print("exception occured")
+        return data
 p = pyaudio.PyAudio()
@@ -75,9 +112,11 @@ stream = p.open(format=p.get_format_from_width(2),
                 channels=1 if sys.platform == 'darwin' else 2,
                 rate=RATE,
                 input=True,
+                input_device_index=input_device_index,
+                output_device_index=output_device_index,
                 output=True,
                 frames_per_buffer=CHUNK,
-                stream_callback=add_echo
+                stream_callback=do_process
                 )
 print('* recording')
--- a/what_to_do
View file @d99715d
+++ b/what_to_do
View file @d99715d
@@ -7,3 +7,5 @@
 1. 실제 소리변형으로 뭔가 만들기
 - 실제 소리가 들릴거냐 말거냐
 2. identification만 하고, 다른 소리 재생도 할 수 있음.
+
+- 소리 identification을 진행?