Showing
8 changed files
with
213 additions
and
39 deletions
... | @@ -6,17 +6,21 @@ import time | ... | @@ -6,17 +6,21 @@ import time |
6 | import librosa | 6 | import librosa |
7 | import tkinter as tk | 7 | import tkinter as tk |
8 | 8 | ||
9 | -ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16) | 9 | +ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/s1.mp3").raw_data, dtype=np.int16) |
10 | 10 | ||
11 | # 파라미터 설정 | 11 | # 파라미터 설정 |
12 | RATE = 44100 # 샘플링 주파수 | 12 | RATE = 44100 # 샘플링 주파수 |
13 | CHUNK = 1024 # 읽을 샘플의 수 | 13 | CHUNK = 1024 # 읽을 샘플의 수 |
14 | -THRESHOLD = 256 # 피크를 검출하기 위한 threshold 값 | 14 | +THRESHOLD = 128 # 피크를 검출하기 위한 threshold 값 |
15 | WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈 | 15 | WIN_SIZE = 1024 # STFT를 적용할 윈도우 사이즈 |
16 | HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈) | 16 | HOP_SIZE = 512 # STFT에서 윈도우 사이의 거리 (오버랩 사이즈) |
17 | DELAY = 0.1 # Delay time in seconds | 17 | DELAY = 0.1 # Delay time in seconds |
18 | -MAX_FREQ = 3000 # max freq for pitch shifting | 18 | +MAX_FREQ = 10000 # max freq for pitch shifting |
19 | -MAX_HEIGHT = 2000 # max height for pitch shifting | 19 | +MAX_HEIGHT = 10000 # max height for pitch shifting |
20 | +MAX_DECIBEL = 50 # max decibel for decibel shifting | ||
21 | +SOURCE_MODE = "decibel" # height, decibel or frequency | ||
22 | +MODE = "low_filter" # low_filter, echo or pitch_shift | ||
23 | +SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이 | ||
20 | 24 | ||
21 | sound_idx = 0 | 25 | sound_idx = 0 |
22 | 26 | ||
... | @@ -56,10 +60,8 @@ def get_user_audio(duration): | ... | @@ -56,10 +60,8 @@ def get_user_audio(duration): |
56 | 60 | ||
57 | return sound | 61 | return sound |
58 | 62 | ||
59 | -ORIGIN_SOUND = get_user_audio(0.5) | 63 | +if "y" == input("직접 녹음을 하시겠습니까? (y/n) : "): |
60 | -SOURCE_MODE = "decibel" # decibel or frequency | 64 | + ORIGIN_SOUND = get_user_audio(0.2) |
61 | -MODE = "high_filter" # echo or pitch_shift | ||
62 | -SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이 | ||
63 | 65 | ||
64 | sound = ORIGIN_SOUND.copy() | 66 | sound = ORIGIN_SOUND.copy() |
65 | 67 | ||
... | @@ -67,24 +69,27 @@ print(type(sound), len(sound)) | ... | @@ -67,24 +69,27 @@ print(type(sound), len(sound)) |
67 | 69 | ||
68 | p = pyaudio.PyAudio() | 70 | p = pyaudio.PyAudio() |
69 | 71 | ||
70 | -last_frame = 0 | 72 | +last_time = 0 |
71 | 73 | ||
72 | # 콜백 함수 정의 | 74 | # 콜백 함수 정의 |
73 | def process_audio(in_data, frame_count, time_info, status): | 75 | def process_audio(in_data, frame_count, time_info, status): |
74 | global buffer | 76 | global buffer |
75 | global sound | 77 | global sound |
76 | global sound_idx | 78 | global sound_idx |
77 | - global last_frame | 79 | + global last_time |
78 | 80 | ||
79 | 81 | ||
80 | - def get_distortion(height, frequency): | 82 | + def get_distortion(height, frequency, decibel): |
81 | - height = min(height, MAX_HEIGHT) / MAX_HEIGHT | 83 | + height = min(height, MAX_HEIGHT) / RATE |
82 | frequency = min(frequency, MAX_FREQ) / MAX_FREQ | 84 | frequency = min(frequency, MAX_FREQ) / MAX_FREQ |
85 | + decibel = min(decibel, MAX_DECIBEL) / MAX_DECIBEL | ||
83 | 86 | ||
84 | - if SOURCE_MODE == "decibel": | 87 | + if SOURCE_MODE == "height": |
85 | param = height | 88 | param = height |
86 | elif SOURCE_MODE == "frequency": | 89 | elif SOURCE_MODE == "frequency": |
87 | param = frequency | 90 | param = frequency |
91 | + elif SOURCE_MODE == "decibel": | ||
92 | + param = decibel | ||
88 | else: | 93 | else: |
89 | return ORIGIN_SOUND | 94 | return ORIGIN_SOUND |
90 | 95 | ||
... | @@ -110,16 +115,17 @@ def process_audio(in_data, frame_count, time_info, status): | ... | @@ -110,16 +115,17 @@ def process_audio(in_data, frame_count, time_info, status): |
110 | return echoed_samples | 115 | return echoed_samples |
111 | 116 | ||
112 | def shift_pitch(frequency): | 117 | def shift_pitch(frequency): |
113 | - pitch_shift_factor = frequency | 118 | + pitch_shift_factor = frequency * 3 |
114 | audio_array = ORIGIN_SOUND.copy() | 119 | audio_array = ORIGIN_SOUND.copy() |
115 | # Resample the audio array to change the pitch | 120 | # Resample the audio array to change the pitch |
116 | resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1) | 121 | resampled_array = librosa.effects.pitch_shift(np.array(audio_array, dtype=np.float32), sr=RATE, n_steps=pitch_shift_factor, bins_per_octave=1) |
117 | return np.array(resampled_array, dtype=np.int16) | 122 | return np.array(resampled_array, dtype=np.int16) |
118 | 123 | ||
119 | def low_filter(param): | 124 | def low_filter(param): |
120 | - audio_data = data | 125 | + audio_data = np.array(ORIGIN_SOUND.copy(), dtype=np.float32) |
121 | # Define the filter parameters | 126 | # Define the filter parameters |
122 | - cutoff_freq = param * MAX_FREQ # Frequency cutoff for the low-pass filter (in Hz) | 127 | + cutoff_freq = param * RATE # Frequency cutoff for the low-pass filter (in Hz) |
128 | + print("cut of below : ", cutoff_freq) | ||
123 | nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate) | 129 | nyquist_freq = 0.5 * RATE # Nyquist frequency (half of the sampling rate) |
124 | normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency | 130 | normalized_cutoff = cutoff_freq / nyquist_freq # Normalized cutoff frequency |
125 | 131 | ||
... | @@ -129,7 +135,7 @@ def process_audio(in_data, frame_count, time_info, status): | ... | @@ -129,7 +135,7 @@ def process_audio(in_data, frame_count, time_info, status): |
129 | # Apply the low-pass filter to the audio data | 135 | # Apply the low-pass filter to the audio data |
130 | filtered_audio = signal.lfilter(b, a, audio_data) | 136 | filtered_audio = signal.lfilter(b, a, audio_data) |
131 | 137 | ||
132 | - return filtered_audio | 138 | + return np.array(filtered_audio, dtype=np.int16) |
133 | 139 | ||
134 | # 오디오 데이터 변환 | 140 | # 오디오 데이터 변환 |
135 | data = np.frombuffer(in_data, dtype=np.int16) | 141 | data = np.frombuffer(in_data, dtype=np.int16) |
... | @@ -140,18 +146,19 @@ def process_audio(in_data, frame_count, time_info, status): | ... | @@ -140,18 +146,19 @@ def process_audio(in_data, frame_count, time_info, status): |
140 | # 피크 검출 | 146 | # 피크 검출 |
141 | peaks, _ = signal.find_peaks(np.abs(np.mean(Zxx, axis=1)), height=THRESHOLD, distance=WIN_SIZE) | 147 | peaks, _ = signal.find_peaks(np.abs(np.mean(Zxx, axis=1)), height=THRESHOLD, distance=WIN_SIZE) |
142 | # 파라미터 추정 | 148 | # 파라미터 추정 |
143 | - if len(peaks) > 0 and last_frame+1 != frame_count: | 149 | + if len(peaks) > 0 and last_time+0.1 < time_info['current_time']: |
144 | - last_frame = frame_count | ||
145 | peak_idx = peaks[0] # 첫 번째 피크 선택 | 150 | peak_idx = peaks[0] # 첫 번째 피크 선택 |
146 | height = np.abs(Zxx[peak_idx, 0]) # 피크의 높이 추정 | 151 | height = np.abs(Zxx[peak_idx, 0]) # 피크의 높이 추정 |
147 | freq = f[peak_idx] # 피크의 주파수 추정 | 152 | freq = f[peak_idx] # 피크의 주파수 추정 |
148 | amp = np.max(np.abs(data)) # 신호의 진폭 추정 | 153 | amp = np.max(np.abs(data)) # 신호의 진폭 추정 |
149 | decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환 | 154 | decibel = np.mean(librosa.amplitude_to_db(np.abs(Zxx))) # 진폭을 데시벨로 변환 |
150 | 155 | ||
151 | - if(decibel > 20): | 156 | + if(decibel > 10) and height > 100: |
152 | - print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}".format(height, freq, amp, decibel)) | 157 | + last_time = time_info['current_time'] |
153 | - new_sound = get_distortion(height, freq) | 158 | + print("Height: {:.2f}, 주파수: {:.2f}, Amplitude: {:.2f}, Decibel: {:.2f}, time_info {:.2f}".format(height, freq, amp, decibel, time_info['current_time'])) |
159 | + new_sound = get_distortion(height, freq, decibel) | ||
154 | if(sound_idx > len(sound)): | 160 | if(sound_idx > len(sound)): |
161 | + sound = new_sound | ||
155 | sound_idx = 0 | 162 | sound_idx = 0 |
156 | else: | 163 | else: |
157 | mixed_end = min(len(sound), sound_idx + len(new_sound)) | 164 | mixed_end = min(len(sound), sound_idx + len(new_sound)) |
... | @@ -161,7 +168,7 @@ def process_audio(in_data, frame_count, time_info, status): | ... | @@ -161,7 +168,7 @@ def process_audio(in_data, frame_count, time_info, status): |
161 | result = np.concatenate((sound, new_sound[mixed_end-sound_idx:]),axis=0) | 168 | result = np.concatenate((sound, new_sound[mixed_end-sound_idx:]),axis=0) |
162 | sound = result | 169 | sound = result |
163 | elif len(peaks) > 0: | 170 | elif len(peaks) > 0: |
164 | - last_frame = frame_count | 171 | + last_time = time_info['current_time'] |
165 | 172 | ||
166 | sound_idx += 1024 | 173 | sound_idx += 1024 |
167 | if sound_idx > len(sound): | 174 | if sound_idx > len(sound): | ... | ... |
... | @@ -5,6 +5,7 @@ import pydub | ... | @@ -5,6 +5,7 @@ import pydub |
5 | import time | 5 | import time |
6 | import librosa | 6 | import librosa |
7 | import tkinter as tk | 7 | import tkinter as tk |
8 | +import threading | ||
8 | 9 | ||
9 | ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16) | 10 | ORIGIN_SOUND = np.frombuffer(pydub.AudioSegment.from_mp3("./sounds/ping.mp3").raw_data, dtype=np.int16) |
10 | 11 | ||
... | @@ -25,29 +26,26 @@ window.title("Sound Effect") | ... | @@ -25,29 +26,26 @@ window.title("Sound Effect") |
25 | window.geometry("640x400+100+100") | 26 | window.geometry("640x400+100+100") |
26 | window.resizable(False, False) | 27 | window.resizable(False, False) |
27 | 28 | ||
28 | -info_text = tk.StringVar() | ||
29 | -info_text.set("welcome! please press record button.") | ||
30 | 29 | ||
31 | -info_label = tk.Button(window, textvariable=info_text, foreground="black", background="white") | 30 | +info_label = tk.Button(window, width=50, height=10, bg="white", fg="black") |
32 | info_label.pack() | 31 | info_label.pack() |
33 | 32 | ||
33 | +info_text = tk.StringVar() | ||
34 | +info_text.set("ready for recording...") | ||
35 | +info_label.config(textvariable=info_text) | ||
36 | + | ||
34 | def set_source_mode(mode): | 37 | def set_source_mode(mode): |
35 | global SOURCE_MODE | 38 | global SOURCE_MODE |
36 | SOURCE_MODE = mode | 39 | SOURCE_MODE = mode |
37 | 40 | ||
38 | # 사용자의 목소리를 duration 초간 녹음. | 41 | # 사용자의 목소리를 duration 초간 녹음. |
39 | def get_user_audio(duration): | 42 | def get_user_audio(duration): |
40 | - global info_text, info_label | 43 | + global info_text, info_label, ORIGIN_SOUND |
41 | frames = [] | 44 | frames = [] |
42 | p = pyaudio.PyAudio() | 45 | p = pyaudio.PyAudio() |
43 | 46 | ||
44 | # 카운터 시작 | 47 | # 카운터 시작 |
45 | - | ||
46 | info_text.set("ready for recording...") | 48 | info_text.set("ready for recording...") |
47 | - for _ in range(3, 0, -1): | ||
48 | - info_text.set(str(_)) | ||
49 | - time.sleep(1) | ||
50 | - info_text.set("start...") | ||
51 | 49 | ||
52 | # 실제 녹음 콜백 함수 | 50 | # 실제 녹음 콜백 함수 |
53 | def add_to_frame(in_data, frame_count, time_info, status): | 51 | def add_to_frame(in_data, frame_count, time_info, status): |
... | @@ -69,17 +67,19 @@ def get_user_audio(duration): | ... | @@ -69,17 +67,19 @@ def get_user_audio(duration): |
69 | stream.close() | 67 | stream.close() |
70 | p.terminate() | 68 | p.terminate() |
71 | 69 | ||
70 | + info_text.set("start...") | ||
71 | + | ||
72 | return sound | 72 | return sound |
73 | 73 | ||
74 | def record(): | 74 | def record(): |
75 | - global ORIGIN_SOUND | 75 | + global ORIGIN_SOUND, SOURCE_MODE, MODE |
76 | - global SOURCE_MODE | 76 | + |
77 | ORIGIN_SOUND = get_user_audio(0.5) | 77 | ORIGIN_SOUND = get_user_audio(0.5) |
78 | - SOURCE_MODE = "decibel" # decibel or frequency | 78 | + SOURCE_MODE = "frequency" # decibel or frequency |
79 | + MODE = "pitch_shift" | ||
79 | 80 | ||
80 | def start(): | 81 | def start(): |
81 | - global MODE, SOUND_SIZE, sound_idx, sound | 82 | + global MODE, SOUND_SIZE, sound_idx, sound, ORIGIN_SOUND, last_frame, SOURCE_MODE, MODE |
82 | - MODE = "high_filter" # echo or pitch_shift | ||
83 | SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이 | 83 | SOUND_SIZE = len(ORIGIN_SOUND) # 음원 길이 |
84 | 84 | ||
85 | sound = ORIGIN_SOUND.copy() | 85 | sound = ORIGIN_SOUND.copy() |
... | @@ -92,8 +92,7 @@ def start(): | ... | @@ -92,8 +92,7 @@ def start(): |
92 | 92 | ||
93 | # 콜백 함수 정의 | 93 | # 콜백 함수 정의 |
94 | def process_audio(in_data, frame_count, time_info, status): | 94 | def process_audio(in_data, frame_count, time_info, status): |
95 | - | 95 | + global sound_idx, sound, last_frame, ORIGIN_SOUND, MODE, SOURCE_MODE |
96 | - | ||
97 | def get_distortion(height, frequency): | 96 | def get_distortion(height, frequency): |
98 | height = min(height, MAX_HEIGHT) / MAX_HEIGHT | 97 | height = min(height, MAX_HEIGHT) / MAX_HEIGHT |
99 | frequency = min(frequency, MAX_FREQ) / MAX_FREQ | 98 | frequency = min(frequency, MAX_FREQ) / MAX_FREQ |
... | @@ -216,6 +215,9 @@ def start(): | ... | @@ -216,6 +215,9 @@ def start(): |
216 | p.terminate() | 215 | p.terminate() |
217 | 216 | ||
218 | 217 | ||
218 | + | ||
219 | + | ||
220 | + | ||
219 | record_button = tk.Button(window, text="Record", width=10, height=2, command=lambda: record()) | 221 | record_button = tk.Button(window, text="Record", width=10, height=2, command=lambda: record()) |
220 | record_button.pack() | 222 | record_button.pack() |
221 | 223 | ||
... | @@ -225,6 +227,7 @@ decibel_button.pack() | ... | @@ -225,6 +227,7 @@ decibel_button.pack() |
225 | frequency_button = tk.Button(window, text="Frequency", width=10, height=2, command = lambda: set_source_mode("frequency")) | 227 | frequency_button = tk.Button(window, text="Frequency", width=10, height=2, command = lambda: set_source_mode("frequency")) |
226 | frequency_button.pack() | 228 | frequency_button.pack() |
227 | 229 | ||
230 | +#start_button = tk.Button(window, text="Start", width=10, height=2, command=lambda: threading.Thread(target=start).start()) | ||
228 | start_button = tk.Button(window, text="Start", width=10, height=2, command=lambda: start()) | 231 | start_button = tk.Button(window, text="Start", width=10, height=2, command=lambda: start()) |
229 | start_button.pack() | 232 | start_button.pack() |
230 | 233 | ... | ... |
No preview for this file type
No preview for this file type
No preview for this file type
spectograms/echo_added_spectogram.py
0 → 100644
1 | +import numpy as np | ||
2 | +import matplotlib.pyplot as plt | ||
3 | +import librosa | ||
4 | +import librosa.display | ||
5 | + | ||
6 | +# Load the audio file | ||
7 | +audio_file = "./sounds/s4.mp3" | ||
8 | +audio, sr = librosa.load(audio_file, sr=None) | ||
9 | + | ||
10 | +# Echo parameters | ||
11 | +delay = int(0.2 * sr) # Echo delay in samples | ||
12 | +decay1 = 0.8 # Echo decay factor 1 | ||
13 | +decay2 = 0.3 # Echo decay factor 2 | ||
14 | + | ||
15 | +# Apply echo effect with decay factor 1 | ||
16 | +echoed_audio1 = np.zeros_like(audio) | ||
17 | +echoed_audio1[delay:] = audio[:-delay] + decay1 * audio[delay:] | ||
18 | + | ||
19 | +# Apply echo effect with decay factor 2 | ||
20 | +echoed_audio2 = np.zeros_like(audio) | ||
21 | +echoed_audio2[delay:] = audio[:-delay] + decay2 * audio[delay:] | ||
22 | + | ||
23 | +# Compute the Mel spectrogram of the original audio | ||
24 | +mel_spec_orig = librosa.feature.melspectrogram(y=audio, sr=sr) | ||
25 | + | ||
26 | +# Compute the Mel spectrogram of the echoed audio with decay factor 1 | ||
27 | +mel_spec_echoed1 = librosa.feature.melspectrogram(y=echoed_audio1, sr=sr) | ||
28 | + | ||
29 | +# Compute the Mel spectrogram of the echoed audio with decay factor 2 | ||
30 | +mel_spec_echoed2 = librosa.feature.melspectrogram(y=echoed_audio2, sr=sr) | ||
31 | + | ||
32 | +# Convert to dB scale | ||
33 | +mel_spec_orig_db = librosa.power_to_db(mel_spec_orig, ref=np.max) | ||
34 | +mel_spec_echoed1_db = librosa.power_to_db(S=mel_spec_echoed1, ref=np.max) | ||
35 | +mel_spec_echoed2_db = librosa.power_to_db(S=mel_spec_echoed2, ref=np.max) | ||
36 | + | ||
37 | +# Display the Mel spectrograms | ||
38 | +plt.figure(figsize=(10, 4)) | ||
39 | + | ||
40 | +plt.subplot(1, 2, 1) | ||
41 | +librosa.display.specshow(mel_spec_orig_db, sr=sr, x_axis='time', y_axis='mel') | ||
42 | +plt.colorbar(format='%+2.0f dB') | ||
43 | +plt.title('Original Mel Spectrogram') | ||
44 | + | ||
45 | +plt.subplot(1, 2, 2) | ||
46 | +librosa.display.specshow(mel_spec_echoed1_db, sr=sr, x_axis='time', y_axis='mel') | ||
47 | +plt.colorbar(format='%+2.0f dB') | ||
48 | +plt.title('Echoed Mel Spectrogram (Decay 1)') | ||
49 | + | ||
50 | +# plt.subplot(1, 3, 3) | ||
51 | +# librosa.display.specshow(mel_spec_echoed2_db, sr=sr, x_axis='time', y_axis='mel') | ||
52 | +# plt.colorbar(format='%+2.0f dB') | ||
53 | +# plt.title('Echoed Mel Spectrogram (Decay 2)') | ||
54 | + | ||
55 | +plt.tight_layout() | ||
56 | +plt.show() |
spectograms/low_filter_spectogram.py
0 → 100644
1 | +import numpy as np | ||
2 | +import matplotlib.pyplot as plt | ||
3 | +import librosa | ||
4 | +import librosa.display | ||
5 | +from scipy.signal import butter, lfilter | ||
6 | +import sounddevice as sd | ||
7 | + | ||
8 | +def apply_low_pass_filter(data, cutoff_freq, sample_rate, order=5): | ||
9 | + print(type(data), data.shape, data.dtype) | ||
10 | + | ||
11 | + # Calculate the digital cutoff frequency | ||
12 | + nyquist_freq = 0.5 * sample_rate | ||
13 | + digital_cutoff = cutoff_freq / nyquist_freq | ||
14 | + | ||
15 | + # Create the filter coefficients using Butterworth filter | ||
16 | + b, a = butter(order, digital_cutoff, btype='low', analog=False) | ||
17 | + | ||
18 | + # Apply the filter to the data | ||
19 | + filtered_data = lfilter(b, a, data) | ||
20 | + | ||
21 | + return filtered_data | ||
22 | + | ||
23 | +# Load the audio file | ||
24 | +audio_file = "./sounds/s4.mp3" | ||
25 | +audio, sr = librosa.load(audio_file, sr=None) | ||
26 | + | ||
27 | +# Filter parameters | ||
28 | +cutoff_frequency = 5000 # Cutoff frequency in Hz | ||
29 | +sample_rate = sr | ||
30 | + | ||
31 | +# Apply the low-pass filter | ||
32 | +filtered_audio = apply_low_pass_filter(audio, cutoff_frequency, sample_rate) | ||
33 | + | ||
34 | +# Play the filtered audio | ||
35 | +sd.play(filtered_audio, sample_rate) | ||
36 | + | ||
37 | +# Compute the Mel spectrogram of the original audio | ||
38 | +mel_spec_orig = librosa.feature.melspectrogram(y=audio, sr=sr) | ||
39 | +mel_spec_orig_db = librosa.power_to_db(S=mel_spec_orig, ref=np.max) | ||
40 | + | ||
41 | +# Compute the Mel spectrogram of the filtered audio | ||
42 | +mel_spec_filtered = librosa.feature.melspectrogram(y=filtered_audio, sr=sr) | ||
43 | +mel_spec_filtered_db = librosa.power_to_db(S=mel_spec_filtered, ref=np.max) | ||
44 | + | ||
45 | +# Plotting the original and filtered signals | ||
46 | +plt.figure(figsize=(12, 8)) | ||
47 | + | ||
48 | +# Original Signal | ||
49 | +plt.subplot(2, 2, 1) | ||
50 | +plt.plot(audio) | ||
51 | +plt.title('Original Signal') | ||
52 | +plt.xlabel('Time') | ||
53 | +plt.ylabel('Amplitude') | ||
54 | + | ||
55 | +# Filtered Signal | ||
56 | +plt.subplot(2, 2, 2) | ||
57 | +plt.plot(filtered_audio) | ||
58 | +plt.title('Filtered Signal') | ||
59 | +plt.xlabel('Time') | ||
60 | +plt.ylabel('Amplitude') | ||
61 | + | ||
62 | +# Plotting the Mel spectrograms | ||
63 | +plt.subplot(2, 2, 3) | ||
64 | +librosa.display.specshow(mel_spec_orig_db, sr=sr, x_axis='time', y_axis='mel') | ||
65 | +plt.colorbar(format='%+2.0f dB') | ||
66 | +plt.title('Original Mel Spectrogram') | ||
67 | + | ||
68 | +plt.subplot(2, 2, 4) | ||
69 | +librosa.display.specshow(mel_spec_filtered_db, sr=sr, x_axis='time', y_axis='mel') | ||
70 | +plt.colorbar(format='%+2.0f dB') | ||
71 | +plt.title('Filtered Mel Spectrogram') | ||
72 | + | ||
73 | +plt.tight_layout() | ||
74 | +plt.show() |
spectograms/pitch_shifted_spectogram.py
0 → 100644
1 | +import numpy as np | ||
2 | +import librosa | ||
3 | +import librosa.display | ||
4 | +import matplotlib.pyplot as plt | ||
5 | + | ||
6 | +# Load the audio file | ||
7 | +audio_file = "./sounds/s4.mp3" | ||
8 | +audio, sr = librosa.load(audio_file, sr=None) | ||
9 | + | ||
10 | +# Compute the original Mel spectrogram | ||
11 | +mel_spec_orig = librosa.feature.melspectrogram(y=audio, sr=sr) | ||
12 | + | ||
13 | +# Pitch shift parameters | ||
14 | +pitch_shift_factor = 2.0 | ||
15 | + | ||
16 | +# Apply pitch shifting | ||
17 | +audio_pitch_shifted = librosa.effects.pitch_shift(y=audio, sr=sr, n_steps=pitch_shift_factor, bins_per_octave=1) | ||
18 | +mel_spec_pitch_shifted = librosa.feature.melspectrogram(y=audio_pitch_shifted, sr=sr) | ||
19 | + | ||
20 | +# Display the original Mel spectrogram | ||
21 | +plt.figure(figsize=(10, 4)) | ||
22 | +plt.subplot(1, 2, 1) | ||
23 | +librosa.display.specshow(librosa.power_to_db(mel_spec_orig, ref=np.max), sr=sr, x_axis='time', y_axis='mel') | ||
24 | +plt.title('Original Mel Spectrogram') | ||
25 | +#plt.colorbar(format='%+2.0f dB') | ||
26 | + | ||
27 | +# Display the pitch-shifted Mel spectrogram | ||
28 | +plt.subplot(1, 2, 2) | ||
29 | +librosa.display.specshow(librosa.power_to_db(mel_spec_pitch_shifted, ref=np.max), sr=sr, x_axis='time', y_axis='mel') | ||
30 | +plt.title('Pitch-Shifted Mel Spectrogram') | ||
31 | +#plt.colorbar(format='%+2.0f dB') | ||
32 | + | ||
33 | +plt.tight_layout() | ||
34 | +plt.show() |
-
Please register or login to post a comment