노현욱

feat : finish dev

......@@ -6,7 +6,12 @@ import time
import librosa
import tkinter as tk
ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/s1.mp3").raw_data, dtype=np.int16)
ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
# 상수 설정
CUSTOM_AUDIO_DURATION = 1 # 사용자의 소리를 녹음할 시간
SOURCE_MODE = "frequency" # height, decibel or frequency
MODE = "pitch_shift" # low_filter, echo or pitch_shift
# 파라미터 설정
RATE = 44100 # 샘플링 주파수
......@@ -14,14 +19,13 @@ CHUNK = 1024 # 읽을 샘플의 수
THRESHOLD = 128 # 피크를 검출하기 위한 threshold 값
WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈
HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈)
DELAY = 0.1 # Delay time in seconds
DELAY = 0.1 # Delay time in seconds for echo
MAX_FREQ = 10000 # max freq for pitch shifting
MAX_HEIGHT = 10000 # max height for pitch shifting
MAX_DECIBEL = 50 # max decibel for decibel shifting
SOURCE_MODE = "decibel" # height, decibel or frequency
MODE = "low_filter" # low_filter, echo or pitch_shift
SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이
sound_idx = 0
......@@ -46,7 +50,7 @@ def get_user_audio(duration):
return (in_data, pyaudio.paComplete)
# 녹음 진행
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=1, stream_callback=add_to_frame)
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
time.sleep(1)
stream.start_stream()
......@@ -61,7 +65,7 @@ def get_user_audio(duration):
return sound
if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
ORIGIN_SOUND = get_user_audio(0.2)
ORIGIN_SOUND = get_user_audio(CUSTOM_AUDIO_DURATION)
sound = ORIGIN_SOUND.copy()
......@@ -71,6 +75,9 @@ p = pyaudio.PyAudio()
last_time = 0
print("current input : ", SOURCE_MODE)
print("current distortion : ", MODE)
# 콜백 함수 정의
def process_audio(in_data, frame_count, time_info, status):
global buffer
......@@ -117,6 +124,7 @@ def process_audio(in_data, frame_count, time_info, status):
def shift_pitch(frequency):
pitch_shift_factor = frequency * 3
audio_array = ORIGIN_SOUND.copy()
print("pitch shift over : ", pitch_shift_factor, "octave")
# Resample the audio array to change the pitch
resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1)
return np.array(resampled_array, dtype=np.int16)
......@@ -124,8 +132,8 @@ def process_audio(in_data, frame_count, time_info, status):
def low_filter(param):
audio_data = np.array(ORIGIN_SOUND.copy(), dtype=np.float32)
# Define the filter parameters
cutoff_freq = param * RATE # Frequency cutoff for the low-pass filter (in Hz)
print("cut of below : ", cutoff_freq)
cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
print("cut off over : ", cutoff_freq)
nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
......@@ -153,7 +161,7 @@ def process_audio(in_data, frame_count, time_info, status):
amp = np.max(np.abs(data)) # 신호의 진폭 추정
decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환
if(decibel > 10) and height > 100:
if(decibel > 10) and height > 100 :
last_time = time_info['current_time']
print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}, time_info {:.2f}".format(height, freq, amp, decibel, time_info['current_time']))
new_sound = get_distortion(height, freq, decibel)
......
import pyaudio
import numpy as np
pa = pyaudio.PyAudio()
delay_buffer = np.zeros((44100, 2), dtype=np.float32)
def callback(in_data, frame_count, time_info, status):
global delay_buffer
audio_data = np.frombuffer(in_data, dtype=np.float32).reshape(frame_count, 2)
delayed_data = np.concatenate((delay_buffer, audio_data))
delay_buffer = delayed_data[frame_count:]
return (audio_data + 0.5 * delay_buffer).tobytes(), pyaudio.paContinue
RATE = 44100
CHUNK = 1024
stream = pa.open(format=pyaudio.paFloat32,
channels=1,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK,
stream_callback=callback)
stream.start_stream()
# keep the stream running for a few seconds
for i in range(0, int(RATE / CHUNK * 30)):
stream.write(stream.read(CHUNK))
stream.stop()
stream.close()
pa.terminate()
from pydub import AudioSegment
import pyaudio
import numpy as np
# Usage:
file_path = "./sounds/s1.mp3"
audio_file = AudioSegment.from_mp3(file_path)
print(audio_file.frame_rate)
print(audio_file.sample_width)
print(audio_file.channels)
raw_audio_data = np.frombuffer(AudioSegment.from_mp3("./sounds/wooAk.mp3").raw_data, dtype=np.int16)
p = pyaudio.PyAudio()
idx = 0
def callback(in_data, frame_count, time_info, status):
global raw_audio_data
global idx
idx += frame_count
chunk = raw_audio_data[idx:idx+frame_count]
if len(chunk) < frame_count:
chunk = np.concatenate((chunk, np.zeros(frame_count - len(chunk), dtype=np.int16)))
idx = 0
return (chunk, pyaudio.paContinue)
stream = p.open(format=p.get_format_from_width(audio_file.sample_width),
channels=audio_file.channels,
rate=audio_file.frame_rate,
output_device_index=2,
output=True,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
pass
stream.stop_stream()
stream.close()
p.terminate()
import pyaudio
import numpy as np
import tkinter as tk
from threading import Thread
from queue import Queue
from mutagen.mp3 import MP3
class MP3Player:
def __init__(self):
self.filename = "./sounds/s4.mp3"
self.audio = pyaudio.PyAudio()
self.stream = None
self.isPlaying = False
self.chunk = 1024
self.queue = Queue()
def load_mp3_file(self):
audio_info = MP3(self.filename)
self.sample_rate = audio_info.info.sample_rate
self.channels = audio_info.info.channels
def start_stream(self):
self.stream = self.audio.open(format=pyaudio.paFloat32,
channels=self.channels,
rate=self.sample_rate,
output=True,
output_device_index=2,
stream_callback=self.callback)
self.stream.start_stream()
def stop_stream(self):
if self.stream:
self.stream.stop_stream()
self.stream.close()
self.stream = None
def play(self):
self.load_mp3_file()
self.start_stream()
self.isPlaying = True
def pause(self):
if self.isPlaying:
self.stop_stream()
self.isPlaying = False
def callback(self, in_data, frame_count, time_info, status):
data = self.queue.get()
return (data, pyaudio.paContinue)
def enqueue_data(self, data):
self.queue.put(data)
def create_gui(self):
self.root = tk.Tk()
self.root.title("MP3 Player")
self.play_button = tk.Button(self.root, text="Play", command=self.play)
self.play_button.pack(pady=10)
self.pause_button = tk.Button(self.root, text="Pause", command=self.pause)
self.pause_button.pack(pady=10)
self.root.mainloop()
if __name__ == "__main__":
player = MP3Player()
player.create_gui()
import pyaudio
import numpy as np
import librosa
import queue
import pydub
from scipy import signal
from scipy.fft import fft, fftfreq
import math
import time
RATE = 44100
CHUNK = 1024
MAX_FREQ = 40 # max freq for pitch shifting
MAX_AMPLITUDE = 10000 # max amplitude for pitch shifting
MAX_DECIBEL = 50 # max decibel for decibel shifting
EFFECT_LEVEL = 20 # number of effect level
INPUT_DEVICE_INDEX = 0
OUTPUT_DEVICE_INDEX = 1
CUSTOM_AUDIO_DURATION = 0.5 # seconds
# make sound queue, chunk 단위로 들어감
Q = queue.Queue()
Q.put(np.zeros(CHUNK, dtype=np.int16))
sound_idx = 0
# 사용자의 목소리를 duration 초간 녹음.
def get_user_audio(duration):
frames = []
p = pyaudio.PyAudio()
# 카운터 시작
print("ready for recording...")
for _ in range(3, 0, -1):
print(_)
time.sleep(1)
print("start...")
# 실제 녹음 콜백 함수
def add_to_frame(in_data, frame_count, time_info, status):
frames.append(np.frombuffer(in_data, dtype=np.int16))
if(len(frames) < RATE/CHUNK * duration):
return (in_data, pyaudio.paContinue)
return (in_data, pyaudio.paComplete)
# 녹음 진행
stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK, input_device_index=0, stream_callback=add_to_frame)
time.sleep(1)
stream.start_stream()
sound = np.frombuffer(b''.join(frames), dtype=np.int16)
# stream및 객체들 정리
stream.stop_stream()
stream.close()
p.terminate()
return sound
class Effector:
def __init__(self, sound):
self.sound = sound.copy()
np.set_printoptions(threshold=np.inf)
self.sound = np.concatenate((self.sound, np.zeros(CHUNK - len(sound)%CHUNK, dtype=np.int16)))
self.target = "frequency"
self.effect = "pitch_shift"
self.echoed_sounds = list()
for i in range(EFFECT_LEVEL):
self.echoed_sounds.append(self.make_echo(i/EFFECT_LEVEL))
self.pitch_shifted_sounds = list()
for i in range(EFFECT_LEVEL):
self.pitch_shifted_sounds.append(self.pitch_shift(i/EFFECT_LEVEL))
self.low_filtered_sounds = list()
for i in range(EFFECT_LEVEL):
self.low_filtered_sounds.append(self.low_filter(i/EFFECT_LEVEL))
self.augumented_sound = {
"echo": self.echoed_sounds,
"pitch_shift": self.pitch_shifted_sounds,
"low_filter": self.low_filtered_sounds
}
def change_target(self, target):
if target not in ["decibel", "frequency", "amplitude"]:
raise Exception("Invalid target")
self.target = target
def change_effect(self, effect):
if effect not in ["echo", "pitch_shift", "low_filter"]:
raise Exception("Invalid effect")
self.effect = effect
def make_echo(self, decay):
delay = int(0.01 * RATE) # Echo delay in samples
sound = self.sound
echoed_audio = np.zeros_like(sound, dtype=np.int16)
echoed_audio[:-delay] = sound[:-delay] + decay * sound[delay:]
echoed_audio[-delay:] = decay*sound[-delay:]
return np.array(echoed_audio, dtype=np.int16)
def pitch_shift(self, shift):
sound = librosa.effects.pitch_shift(y=np.array(self.sound, np.float32), sr=RATE, n_steps=shift*3, bins_per_octave=1)
return np.array(sound, dtype=np.int16)
def low_filter(self, param):
param = max(param, 0.1)
audio_data = np.array(self.sound.copy(), dtype=np.int16)
# Define the filter parameters
cutoff_freq = param * RATE / 8 # Frequency cutoff for the low-pass filter (in Hz)
# print("cut off over : ", cutoff_freq)
nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate)
normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency
# Design the low-pass filter
b, a = signal.butter(4, normalized_cutoff, btype='low', analog=False, output='ba')
# Apply the low-pass filter to the audio data
filtered_audio = signal.lfilter(b, a, audio_data)
return np.array(filtered_audio, dtype=np.int16)
def get_distortion_rate(self, db, main_frequency, amplitude):
print("current target is ", self.target)
param = 0
print(MAX_FREQ, main_frequency)
if self.target == "frequency":
param = min(MAX_FREQ-1, main_frequency) / MAX_FREQ
elif self.target == "amplitude":
param = min(MAX_AMPLITUDE-1, amplitude) / MAX_AMPLITUDE
elif self.target == "decibel":
param = min(MAX_DECIBEL-1, db) / MAX_DECIBEL
param = max(param, 0)
return param
def get_decibel_freq_amplitude(self, active_sound):
samples = active_sound.copy()
fft_data = fft(samples)
frequencies = fftfreq(len(samples))
# Find the index of the main frequency component (excluding the DC component)
main_freq_index = np.argmax(np.abs(fft_data[1:len(samples)//2]))
# Calculate the main frequency in Hz
main_frequency = abs(frequencies[main_freq_index])*1000
# Calculate the amplitude of the main frequency component
amplitude = abs(fft_data[main_freq_index])
# Convert amplitude to decibels (dB)
db = 20 * math.log10(amplitude)
return db, main_frequency, amplitude
def add_to_queue(self, active_sound):
global Q
db, main_frequency, amplitude = self.get_decibel_freq_amplitude(active_sound)
print("touched, db {}, main_frequency {}, amplitude {}".format(db, main_frequency, amplitude))
param = self.get_distortion_rate(db, main_frequency, amplitude)
print("param : ", param)
for i in range(0, len(self.sound), CHUNK):
temp_chunk = self.augumented_sound[self.effect][int(param*EFFECT_LEVEL)][i:i+CHUNK]
Q.put(temp_chunk)
# get file
audio_file_path = "./sounds/short_wooAk.mp3"
audio = np.frombuffer(pydub.AudioSegment.from_mp3(audio_file_path).raw_data, dtype=np.int16)
if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "):
audio = get_user_audio(CUSTOM_AUDIO_DURATION)
# make effect
effector = Effector(audio)
idx = 0
prev_touched = False
pa = pyaudio.PyAudio()
acitve_sound_buffer = np.zeros(0, dtype=np.int16)
def callback(in_data, frame_count, time_info, status):
global idx, effector, Q, prev_touched, acitve_sound_buffer
audio_data = np.frombuffer(in_data, dtype=np.int16)
raw_power = sum([abs(x) for x in audio_data])
if raw_power > 2**20 and not prev_touched:
prev_touched = True
acitve_sound_buffer = audio_data
elif raw_power > 2**20 and prev_touched:
acitve_sound_buffer = np.concatenate((acitve_sound_buffer, audio_data))
elif prev_touched and raw_power <= 2**20:
effector.add_to_queue(acitve_sound_buffer)
acitve_sound_buffer = np.zeros(0, dtype=np.int16)
prev_touched = False
elif not prev_touched and raw_power <= 2**20:
prev_touched = False
acitve_sound_buffer = np.zeros(0, dtype=np.int16)
if Q.qsize() == 1:
Q.put(np.zeros(CHUNK, dtype=np.int16))
else:
print(Q.qsize())
return (Q.get(), pyaudio.paContinue)
stream = pa.open(format=pa.get_format_from_width(2),
channels=1,
rate=RATE,
input_device_index=INPUT_DEVICE_INDEX,
output_device_index=OUTPUT_DEVICE_INDEX,
input=True,
output=True,
frames_per_buffer=CHUNK,
stream_callback=callback
)
stream.start_stream()
# keep the stream running for a few seconds
while stream.is_active():
pass
stream.close()
pa.terminate()
# Capstone Design 2
# Sound Augumentation을 이용한 악기
1. peak 추정방법 다르게 만들기
2. 피크 추정시에만 fft 돌릴수 있도록
3. delay 최소화를 진행
\ No newline at end of file
......
No preview for this file type
No preview for this file type