노현욱

함수및 기능 대부분 구현

import pyaudio
import numpy as np
import scipy.signal as signal
import pydub
import time
import librosa
import tkinter as tk
ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16)
# 파라미터 설정
RATE = 44100 # 샘플링 주파수
CHUNK = 1024 # 읽을 샘플의 수
THRESHOLD = 256 # 피크를 검출하기 위한 threshold 값
WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈
HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
DELAY = 0.1 # Delay time in seconds
MAX_FREQ = 3000 # max freq for pitch shifting
MAX_HEIGHT = 2000 # max height for pitch shifting
sound_idx = 0
window = tk.Tk()
window.title("Sound Effect")
window.geometry("640x400+100+100")
window.resizable(False, False)
info_text = tk.StringVar()
info_text.set("welcome! please press record button.")
info_label = tk.Button(window, textvariable=info_text, foreground="black", background="white")
info_label.pack()
def set_source_mode(mode):
global SOURCE_MODE
SOURCE_MODE = mode
# 사용자의 목소리를 duration 초간 녹음.
def get_user_audio(duration):
global info_text, info_label
frames = []
p = pyaudio.PyAudio()
# 카운터 시작
info_text.set("ready for recording...")
for _ in range(3, 0, -1):
info_text.set(str(_))
time.sleep(1)
info_text.set("start...")
# 실제 녹음 콜백 함수
def add_to_frame(in_data, frame_count, time_info, status):
frames.append(np.frombuffer(in_data, dtype=np.int16))
if(len(frames) < RATE/CHUNK * duration):
return (in_data, pyaudio.paContinue)
return (in_data, pyaudio.paComplete)
# 녹음 진행
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
time.sleep(1)
stream.start_stream()
sound = np.frombuffer(b''.join(frames), dtype=np.int16)
# stream및 객체들 정리
stream.stop_stream()
stream.close()
p.terminate()
return sound
def record():
global ORIGIN_SOUND
global SOURCE_MODE
ORIGIN_SOUND = get_user_audio(0.5)
SOURCE_MODE = "decibel" # decibel or frequency
def start():
global MODE, SOUND_SIZE, sound_idx, sound
MODE = "high_filter" # echo or pitch_shift
SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이
sound = ORIGIN_SOUND.copy()
print(type(sound), len(sound))
p = pyaudio.PyAudio()
last_frame = 0
# 콜백 함수 정의
def process_audio(in_data, frame_count, time_info, status):
def get_distortion(height, frequency):
height = min(height, MAX_HEIGHT) / MAX_HEIGHT
frequency = min(frequency, MAX_FREQ) / MAX_FREQ
if SOURCE_MODE == "decibel":
param = height
elif SOURCE_MODE == "frequency":
param = frequency
else:
return ORIGIN_SOUND
if MODE == "pitch_shift":
return shift_pitch(param)
elif MODE == "echo":
return add_echo(param)
elif MODE == "low_filter":
return low_filter(param)
else:
return ORIGIN_SOUND
def add_echo(decay):
# Create an empty array to store the echoed audio samples
echoed_samples = np.zeros_like(ORIGIN_SOUND, dtype=np.int16)
# Calculate the delay in samples
delay_samples = int(DELAY * 44100) # Assuming a sample rate of 44100 Hz
# Apply the echo effect
for i in range(delay_samples, len(ORIGIN_SOUND)):
echoed_samples[i] = ORIGIN_SOUND[i] + int(decay * echoed_samples[i - delay_samples])
return echoed_samples
def shift_pitch(frequency):
pitch_shift_factor = frequency
audio_array = ORIGIN_SOUND.copy()
# Resample the audio array to change the pitch
resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
return np.array(resampled_array, dtype=np.int16)
def low_filter(param):
audio_data = data
# Define the filter parameters
cutoff_freq = param * MAX_FREQ # Frequency cutoff for the low-pass filter (in Hz)
nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
# Design the low-pass filter
b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
# Apply the low-pass filter to the audio data
filtered_audio = signal.lfilter(b, a, audio_data)
return filtered_audio
# 오디오 데이터 변환
data = np.frombuffer(in_data, dtype=np.int16)
# STFT 수행
f, t, Zxx = signal.stft(data, RATE, nperseg=WIN_SIZE, noverlap=HOP_SIZE)
# 피크 검출
peaks, _ = signal.find_peaks(np.abs(np.mean(Zxx, axis=1)), height=THRESHOLD, distance=WIN_SIZE)
# 파라미터 추정
if len(peaks) > 0 and last_frame+1 != frame_count:
last_frame = frame_count
peak_idx = peaks[0] # 첫 번째 피크 선택
height = np.abs(Zxx[peak_idx, 0]) # 피크의 높이 추정
freq = f[peak_idx] # 피크의 주파수 추정
amp = np.max(np.abs(data)) # 신호의 진폭 추정
decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환
if(decibel > 20):
print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}".format(height, freq, amp, decibel))
new_sound = get_distortion(height, freq)
if(sound_idx > len(sound)):
sound_idx = 0
else:
mixed_end = min(len(sound), sound_idx + len(new_sound))
print(mixed_end, sound_idx)
sound[sound_idx:mixed_end] = new_sound[:mixed_end-sound_idx] + sound[sound_idx:mixed_end]
if(mixed_end-sound_idx < len(new_sound)):
result = np.concatenate((sound, new_sound[mixed_end-sound_idx:]),axis=0)
sound = result
elif len(peaks) > 0:
last_frame = frame_count
sound_idx += 1024
if sound_idx > len(sound):
sound = ORIGIN_SOUND.copy()
return (np.zeros(data.shape), pyaudio.paContinue)
return (sound[sound_idx-1024:sound_idx], pyaudio.paContinue)
# 입력 스트림 열기
stream = p.open(format=p.get_format_from_width(2),
channels=1,
rate=RATE,
input_device_index=1,
output_device_index=2,
input=True,
output=True,
frames_per_buffer=CHUNK,
stream_callback=process_audio
)
# 스트림 시작
stream.start_stream()
# 프로그램 실행 중지 전까지 무한 대기
while stream.is_active():
pass
# 스트림과 PyAudio 객체 종료
stream.stop_stream()
stream.close()
p.terminate()
record_button = tk.Button(window, text="Record", width=10, height=2, command=lambda: record())
record_button.pack()
decibel_button = tk.Button(window, text="Decibel", width=10, height=2, command=lambda: set_source_mode("decibel"))
decibel_button.pack()
frequency_button = tk.Button(window, text="Frequency", width=10, height=2, command = lambda: set_source_mode("frequency"))
frequency_button.pack()
start_button = tk.Button(window, text="Start", width=10, height=2, command=lambda: start())
start_button.pack()
window.mainloop()
import pyaudio
import numpy as np
import scipy.signal as signal
import matplotlib.pyplot as plt
import pydub
import time
import librosa
import tkinter as tk
ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16)
# 파라미터 설정
RATE = 44100 # 샘플링 주파수
CHUNK = 1024 # 읽을 샘플의 수
THRESHOLD = 256 # 피크를 검출하기 위한 threshold 값
WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈
HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
DELAY = 0.1 # Delay time in seconds
MAX_FREQ = 3000 # max freq for pitch shifting
MAX_HEIGHT = 2000 # max height for pitch shifting
sound_idx = 0
# 사용자의 목소리를 duration 초간 녹음.
def get_user_audio(duration):
frames = []
p = pyaudio.PyAudio()
# 카운터 시작
print("ready for recording...")
for _ in range(3, 0, -1):
print(_)
time.sleep(1)
print("start...")
# 실제 녹음 콜백 함수
def add_to_frame(in_data, frame_count, time_info, status):
frames.append(np.frombuffer(in_data, dtype=np.int16))
if(len(frames) < RATE/CHUNK * duration):
return (in_data, pyaudio.paContinue)
return (in_data, pyaudio.paComplete)
# 녹음 진행
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
time.sleep(1)
stream.start_stream()
sound = np.frombuffer(b''.join(frames), dtype=np.int16)
# stream및 객체들 정리
stream.stop_stream()
stream.close()
p.terminate()
return sound
ORIGIN_SOUND = get_user_audio(0.5)
SOURCE_MODE = "decibel" # decibel or frequency
MODE = "high_filter" # echo or pitch_shift
SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이
sound = ORIGIN_SOUND.copy()
print(type(sound), len(sound))
# PyAudio 객체 생성
p = pyaudio.PyAudio()
last_frame = 0
# 콜백 함수 정의
def process_audio(in_data, frame_count, time_info, status):
global buffer
global sound
global sound_idx
global last_frame
def get_distortion(height, frequency):
height = min(height, MAX_HEIGHT) / MAX_HEIGHT
frequency = min(frequency, MAX_FREQ) / MAX_FREQ
if SOURCE_MODE == "decibel":
param = height
elif SOURCE_MODE == "frequency":
param = frequency
else:
return ORIGIN_SOUND
if MODE == "pitch_shift":
return shift_pitch(param)
elif MODE == "echo":
return add_echo(param)
elif MODE == "low_filter":
return low_filter(param)
return ORIGIN_SOUND
def add_echo(decay):
# Create an empty array to store the echoed audio samples
echoed_samples = np.zeros_like(ORIGIN_SOUND, dtype=np.int16)
# Calculate the delay in samples
delay_samples = int(DELAY * 44100) # Assuming a sample rate of 44100 Hz
# Apply the echo effect
for i in range(delay_samples, len(ORIGIN_SOUND)):
echoed_samples[i] = ORIGIN_SOUND[i] + int(decay * echoed_samples[i - delay_samples])
return echoed_samples
def shift_pitch(frequency):
pitch_shift_factor = frequency
audio_array = ORIGIN_SOUND.copy()
# Resample the audio array to change the pitch
resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
return np.array(resampled_array, dtype=np.int16)
def low_filter(param):
audio_data = data
# Define the filter parameters
cutoff_freq = param * MAX_FREQ # Frequency cutoff for the low-pass filter (in Hz)
nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
# Design the low-pass filter
b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
# Apply the low-pass filter to the audio data
filtered_audio = signal.lfilter(b, a, audio_data)
return filtered_audio
# 오디오 데이터 변환
data = np.frombuffer(in_data, dtype=np.int16)
......@@ -23,23 +140,42 @@ def process_audio(in_data, frame_count, time_info, status):
# 피크 검출
peaks, _ = signal.find_peaks(np.abs(np.mean(Zxx, axis=1)), height=THRESHOLD, distance=WIN_SIZE)
# 파라미터 추정
if len(peaks) > 0:
if len(peaks) > 0 and last_frame+1 != frame_count:
last_frame = frame_count
peak_idx = peaks[0] # 첫 번째 피크 선택
height = np.abs(Zxx[peak_idx, 0]) # 피크의 높이 추정
freq = f[peak_idx] # 피크의 주파수 추정
amp = np.max(np.abs(data)) # 신호의 진폭 추정
progress = (peak_idx + HOP_SIZE) / RATE # 충돌음의 진행 길이 추정
decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환
# 결과 출력
print("Height: {:.2f}, Frequency: {:.2f}, Amplitude: {:.2f}, Progress: {:.2f}".format(height, freq, amp, progress))
if(decibel > 20):
print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}".format(height, freq, amp, decibel))
new_sound = get_distortion(height, freq)
if(sound_idx > len(sound)):
sound_idx = 0
else:
mixed_end = min(len(sound), sound_idx + len(new_sound))
print(mixed_end, sound_idx)
sound[sound_idx:mixed_end] = new_sound[:mixed_end-sound_idx] + sound[sound_idx:mixed_end]
if(mixed_end-sound_idx < len(new_sound)):
result = np.concatenate((sound, new_sound[mixed_end-sound_idx:]),axis=0)
sound = result
elif len(peaks) > 0:
last_frame = frame_count
sound_idx += 1024
if sound_idx > len(sound):
sound = ORIGIN_SOUND.copy()
return (np.zeros(data.shape), pyaudio.paContinue)
return (sound[sound_idx-1024:sound_idx], pyaudio.paContinue)
# 반환할 데이터 없음
return (in_data, pyaudio.paContinue)
# 입력 스트림 열기
stream = p.open(format=p.get_format_from_width(2),
channels=1,
rate=RATE,
input_device_index=1,
output_device_index=2,
input=True,
output=True,
frames_per_buffer=CHUNK,
......@@ -49,6 +185,8 @@ stream = p.open(format=p.get_format_from_width(2),
# 스트림 시작
stream.start_stream()
# 프로그램 실행 중지 전까지 무한 대기
while stream.is_active():
pass
......
import tkinter as tk
def clear_row(button, buttons):
button_row = buttons[:3] if button in buttons[:3] else buttons[3:]
for button in button_row:
button.config(text='OFF')
def toggle_button(button):
global buttons
if button['text'] == 'ON':
button.config(text='OFF')
else:
clear_row(button, buttons)
button.config(text='ON')
def get_states():
global buttons
for i in range(3):
if buttons[i].cget('text') == 'ON':
mode = i
mode = 0
for i in range(3, 6):
if buttons[i].cget('text') == 'ON':
distortion = i - 3
distortion = 0
return mode, distortion
# Create the Tkinter window
window = tk.Tk()
window.title("Toggle Buttons")
# Create two frames for two rows
frame1 = tk.Frame(window)
frame1.pack(side=tk.TOP)
frame2 = tk.Frame(window)
frame2.pack(side=tk.TOP)
# Create six toggle buttons in two rows
buttons = []
for i in range(6):
if i < 3:
frame = frame1
else:
frame = frame2
button = tk.Button(frame, text='OFF', width=10)
button.config(command=lambda button=button: toggle_button(button))
button.pack(side=tk.LEFT)
buttons.append(button)
# Start the Tkinter event loop
window.mainloop()
import pyaudio
import numpy as np
# Create an instance of the PyAudio class
audio = pyaudio.PyAudio()
# Define the desired pitch shift factor
pitch_shift_factor = 1.5 # Increase by 50%
# Choose the desired input and output devices
input_device_index = 1
output_device_index = 1
# Open input stream
input_stream = audio.open(input_device_index=input_device_index, format=pyaudio.paFloat32,
channels=1, rate=44100, input=True, frames_per_buffer=1024)
# Open output stream
output_stream = audio.open(output_device_index=output_device_index, format=pyaudio.paFloat32,
channels=2, rate=int(44100 * pitch_shift_factor), output=True)
# Read input audio and apply pitch shift
frames = []
while True:
data = input_stream.read(1024)
frames.append(data)
# Adjust pitch by changing sample rate
if len(frames) >= 1024:
audio_data = b''.join(frames)
audio_array = np.frombuffer(audio_data, dtype=np.float32)
# Resample the audio array to change the pitch
resampled_array = np.resize(audio_array, int(len(audio_array) / pitch_shift_factor))
# Convert the resampled array back to bytes
resampled_data = resampled_array.astype(np.float32).tobytes()
# Play the resampled audio
output_stream.write(resampled_data)
frames = []
# Stop and close the streams
input_stream.stop_stream()
input_stream.close()
output_stream.stop_stream()
output_stream.close()
# Terminate PyAudio
audio.terminate()
import base64
task_list = []
def display_menu():
print("일정 관리자")
print("1. 일정 추가")
print("2. 일정 보기")
print("3. 일정 완료 표시")
print("4. 종료")
def add_task():
title = input("일정 제목 입력: ")
description = input("일정 설명 입력: ")
status = "하는 중"
task = { "title": title, "description": description, "status": status}
task_list.append(task)
print("일정이 추가되었습니다.")
def view_tasks():
if not task_list:
print("일정 목록이 비어 있습니다.")
else:
print()
print("일정 목록:")
print("----------------")
for task in task_list:
print(f"제목: {task['title']}")
print(f"설명: {task['description']}")
print(f"상태: {task['status']}")
print("----------------")
def mark_task_complete():
if not task_list:
print("일정 목록이 비어 있습니다.")
return
title = input("완료로 표시할 일정의 제목 입력: ")
for task in task_list:
if task['title'] == title:
task['status'] = "완료"
print("일정이 완료로 표시되었습니다.")
return
print("식별자와 일치하는 일정을 찾을 수 없습니다.")
while True:
display_menu()
choice = input("선택: ")
if choice == "1":
add_task()
elif choice == "2":
view_tasks()
elif choice == "3":
mark_task_complete()
elif choice == "4":
print("프로그램을 종료합니다.")
break
else:
print("올바른 선택지를 입력하세요.")
print()
No preview for this file type
No preview for this file type
......@@ -2,6 +2,7 @@ import sys
import numpy as np
import pyaudio
import librosa
import scipy.signal as signal
RECORD_SECONDS = 5
CHUNK = 1024
......@@ -10,11 +11,14 @@ DELAY = 0.1 # Delay time in seconds
GAIN = 1 # Echo gain (0 to 1)
MAX_FREQ = 3000
input_device_index = 1
output_device_index = 4
# Create buffer for delayed audio data
buffer_size = int(RATE * DELAY)
buffer = np.zeros(buffer_size, dtype=np.int16)
def add_echo(in_data, frame_count, time_info, status_flags):
def do_process(in_data, frame_count, time_info, status_flags):
global buffer
data = np.frombuffer(in_data, dtype=np.int16)
......@@ -22,7 +26,7 @@ def add_echo(in_data, frame_count, time_info, status_flags):
data_float = data.astype(np.float32)
# Compute the power spectrogram of the data
S = librosa.stft(data_float, n_fft=2048, hop_length=512)
S = librosa.stft(data_float, n_fft=256, hop_length=512)
S_power = np.abs(S)**2
# Convert power spectrogram to dB scale
......@@ -47,7 +51,7 @@ def add_echo(in_data, frame_count, time_info, status_flags):
freqs = np.fft.fftfreq(len(psd_data), d=1/RATE)
# Compute the power spectrogram on the mel scale
S = librosa.feature.melspectrogram(y=data, sr=RATE, n_fft=2048, hop_length=1024)
S = librosa.feature.melspectrogram(y=data, sr=RATE, n_fft=256, hop_length=1024, n_mels=64)
# Find the frequency bin with the maximum energy in each frame
max_bin = np.argmax(S, axis=0)
......@@ -60,14 +64,47 @@ def add_echo(in_data, frame_count, time_info, status_flags):
return dominant_freq
freq = get_dominant_freq(data)
avg_db, max_db = get_max_average_db(data)
print(int(freq), int(avg_db), int(max_db))
temp_gain = freq/MAX_FREQ
output = data + freq/2500 * buffer[:len(data)]
def add_echo(gain):
global buffer
buffer = np.roll(buffer, len(data))
buffer[-len(data):] = data
return (output.astype(np.int16).tostring(), pyaudio.paContinue)
return data + gain * buffer[:len(data)]
def shift_pitch(pitch_shift_factor):
audio_array = data
# Resample the audio array to change the pitch
resampled_array = np.resize(audio_array, int(len(audio_array) / pitch_shift_factor))
return resampled_array
def high_filter(param):
audio_data = data
# Define the filter parameters
cutoff_freq = param * MAX_FREQ # Frequency cutoff for the low-pass filter (in Hz)
nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
# Design the low-pass filter
b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
# Apply the low-pass filter to the audio data
filtered_audio = signal.lfilter(b, a, audio_data)
return filtered_audio
try:
freq = get_dominant_freq(data)
# avg_db, max_db = get_max_average_db(data)
# temp_gain = freq/MAX_FREQ
# output = add_echo(temp_gain)
output = shift_pitch(0.5 + freq/MAX_FREQ)
# output = high_filter(0.5)
# print(int(freq), int(avg_db), int(max_db))
return (output.astype(np.int16).tobytes(), pyaudio.paContinue)
except:
print("exception occured")
return data
p = pyaudio.PyAudio()
......@@ -75,9 +112,11 @@ stream = p.open(format=p.get_format_from_width(2),
channels=1 if sys.platform == 'darwin' else 2,
rate=RATE,
input=True,
input_device_index=input_device_index,
output_device_index=output_device_index,
output=True,
frames_per_buffer=CHUNK,
stream_callback=add_echo
stream_callback=do_process
)
print('* recording')
......
......@@ -7,3 +7,5 @@
1. 실제 소리변형으로 뭔가 만들기
- 실제 소리가 들릴거냐 말거냐
2. identification만 하고, 다른 소리 재생도 할 수 있음.
- 소리 identification을 진행?
......