preprocessing.py
2.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Mel-Sepctrogram 이미지를 학습에 쓰이기 전에 전처리
@FUNCTION create_training_data : 이미지를 전처리하는 함수
"""
import os
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pickle
dataset_path = "C:/Users/nokh9/Desktop/dog_sound_mel/mobile_net_" # 데이터가 있는 경로
CATEGORIES = ["barking", "howling", "whimpering"] # 감정에 대한 카테고리
training_data = []
"""
이미지를 전처리
@brief : preprocessing image for training
"""
def create_training_data():
for category in CATEGORIES:
path = os.path.join(dataset_path, category)
class_num = CATEGORIES.index(category)
print('-'*50)
print(category + " is started...")
print('-'*50)
image_list = os.listdir(path)
listdir_num = len(image_list)
# 데이터가 있는 폴더에서 모든 이미지들을 전처리함
for number in range(listdir_num):
try:
image_path = path + '/' + category + '_' + str(number+1) + '.jpg'
image_array = cv2.imread(image_path, cv2.IMREAD_COLOR)
new_array = cv2.resize(image_array, (62, 78))
training_data.append([new_array, class_num])
if((number+1)%100 == 0):
print(str(number+1) + 'is finished')
except Exception as e:
pass
# 메인 함수
if __name__ == "__main__":
create_training_data()
print(len(training_data))
# 데이터를 training에 쓰일 데이터와 validation에 쓰일 데이터를 나눔
random.shuffle(training_data)
X = []
y = []
for features, label in training_data:
X.append(features)
y.append(label)
X = np.array(X).reshape(-1, 62, 78, 3)
# pickle 데이터로 x와 y를 따로 저장해줌
pickle_out = open("X_final2.pickle", "wb")
pickle.dump(X, pickle_out)
pickle_out.close()
pickle_out = open("y_final2.pickle", "wb")
pickle.dump(y, pickle_out)
pickle_out.close()