1 +{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"augmentation.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyM0lQ+UqU7Sa/uXs3VwvxDl"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"wYGqloqDkEoU","colab_type":"code","outputId":"0d22028c-119d-4823-df3f-35d439ca019f","executionInfo":{"status":"ok","timestamp":1591716524562,"user_tz":-540,"elapsed":32835,"user":{"displayName":"‍신아형[학생](소프트웨어융합대학 컴퓨터공학과)","photoUrl":"","userId":"02057930622140727302"}},"colab":{"base_uri":"https://localhost:8080/","height":127}},"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"cPpLXwOPGYkB","colab_type":"code","colab":{}},"source":["import imgaug; print(imgaug.__file__)\n","!cd /content/../usr/local/lib/python3.6/dist-packages/\n","!pip uninstall imgaug\n","!pip install git+https://github.com/aleju/imgaug.git\n","!pip3 install pascal_voc_writer"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3WumbxkdSAYo","colab_type":"code","colab":{}},"source":["# xml파일 파싱해서 filename, bounding-box 정보 반환\n","\n","import xml.etree.ElementTree as ET\n","\n","def read_anntation(xml_file: str):\n"," tree = ET.parse(xml_file)\n"," root = tree.getroot()\n","\n"," bounding_box_list = []\n","\n"," file_name = root.find('filename').text\n"," for obj in root.iter('object'):\n","\n"," object_label = obj.find(\"name\").text\n"," for box in obj.findall(\"bndbox\"):\n"," x_min = int(box.find(\"xmin\").text)\n"," y_min = int(box.find(\"ymin\").text)\n"," x_max = int(box.find(\"xmax\").text)\n"," y_max = int(box.find(\"ymax\").text)\n","\n"," bounding_box = [object_label, x_min, y_min, x_max, y_max]\n"," bounding_box_list.append(bounding_box)\n","\n"," return bounding_box_list, file_name"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"A-982sMgSOCV","colab_type":"code","colab":{}},"source":["# 4차원 nparray로 변환된 이미지, [bounding-box 리스트, xml 파일명, 이미지 파일명] 리스트 반환\n","\n","from os import listdir\n","import cv2\n","import numpy as np\n","\n","def read_train_dataset(dir):\n"," images = []\n"," annotations = []\n","\n"," for file in listdir(dir):\n"," if 'jpg' in file.lower() or 'png' in file.lower():\n"," images.append(cv2.imread(dir + file, 1))\n"," annotation_file = file.replace(file.split('.')[-1], 'xml')\n"," bounding_box_list, file_name = read_anntation(dir + annotation_file)\n"," annotations.append((bounding_box_list, annotation_file, file_name))\n","\n"," images = np.array(images)\n","\n"," return images, annotations"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"5Kz66PqlST6Q","colab_type":"code","colab":{}},"source":["import imgaug as ia\n","from imgaug import augmenters as iaa\n","from pascal_voc_writer import Writer\n","\n","ia.seed(1)\n","\n","\n","dir = '/content/gdrive/My Drive/capstone/data/' # input 이미지 디렉토리\n","output_dir='/content/gdrive/My Drive/capstone/data/' # output 이미지 디렉토리\n","\n","images, annotations = read_train_dataset(dir)\n","\n","for idx in range(len(images)):\n"," image = images[idx]\n"," boxes = annotations[idx][0]\n","\n"," ia_bounding_boxes = []\n"," for box in boxes:\n"," ia_bounding_boxes.append(ia.BoundingBox(x1=box[1], y1=box[2], x2=box[3], y2=box[4]))\n"," bbs = ia.BoundingBoxesOnImage(ia_bounding_boxes, shape=image.shape)\n","\n"," ######어둡게\n"," seq = iaa.MultiplyAndAddToBrightness(mul=(0.5, 0.5), add=(-10, 10))\n","\n"," ######밝게\n"," #seq = iaa.MultiplyAndAddToBrightness(mul= (1, 2.5), add=(-15, 15))\n","\n"," seq_det = seq.to_deterministic()\n","\n"," image_aug = seq_det.augment_images([image])[0]\n"," bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]\n","\n"," new_image_file = output_dir + 'dark_' + annotations[idx][2]\n"," cv2.imwrite(new_image_file, image_aug)\n","\n"," h, w = np.shape(image_aug)[0:2]\n"," voc_writer = Writer(new_image_file, w, h)\n","\n"," for i in range(len(bbs_aug.bounding_boxes)):\n"," bb_box = bbs_aug.bounding_boxes[i]\n"," voc_writer.addObject(boxes[i][0], int(bb_box.x1), int(bb_box.y1), int(bb_box.x2), int(bb_box.y2))\n","\n"," voc_writer.save(output_dir + 'dark_' + annotations[idx][1])\n"," print(output_dir + 'dark_' + annotations[idx][1])"],"execution_count":0,"outputs":[]}]}
1 +"""
2 +Usage:
3 +
4 +# Create train data:
5 +python generate_tfrecord.py --label=<LABEL> --csv_input=<PATH_TO_ANNOTATIONS_FOLDER>/train_labels.csv --output_path=<PATH_TO_ANNOTATIONS_FOLDER>/train.record <PATH_TO_ANNOTATIONS_FOLDER>/label_map.pbtxt
6 +
7 +# Create test data:
8 +python generate_tfrecord.py --label=<LABEL> --csv_input=<PATH_TO_ANNOTATIONS_FOLDER>/test_labels.csv --output_path=<PATH_TO_ANNOTATIONS_FOLDER>/test.record --label_map <PATH_TO_ANNOTATIONS_FOLDER>/label_map.pbtxt
9 +"""
10 +
11 +from __future__ import division
12 +from __future__ import print_function
13 +from __future__ import absolute_import
14 +
15 +import os
16 +import io
17 +import pandas as pd
18 +import tensorflow as tf
19 +import sys
20 +
21 +sys.path.append("../../models/research")
22 +
23 +from PIL import Image
24 +from object_detection.utils import dataset_util
25 +from collections import namedtuple, OrderedDict
26 +
27 +flags = tf.app.flags
28 +flags.DEFINE_string("csv_input", "", "Path to the CSV input")
29 +flags.DEFINE_string("output_path", "", "Path to output TFRecord")
30 +flags.DEFINE_string(
31 + "label_map",
32 + "",
33 + "Path to the `label_map.pbtxt` contains the <class_name>:<class_index> pairs generated by `xml_to_csv.py` or manually.",
34 +)
35 +# if your image has more labels input them as
36 +# flags.DEFINE_string('label0', '', 'Name of class[0] label')
37 +# flags.DEFINE_string('label1', '', 'Name of class[1] label')
38 +# and so on.
39 +flags.DEFINE_string("img_path", "", "Path to images")
40 +FLAGS = flags.FLAGS
41 +
42 +
43 +def split(df, group):
44 + data = namedtuple("data", ["filename", "object"])
45 + gb = df.groupby(group)
46 + return [
47 + data(filename, gb.get_group(x))
48 + for filename, x in zip(gb.groups.keys(), gb.groups)
49 + ]
50 +
51 +
52 +def create_tf_example(group, path, label_map):
53 + with tf.gfile.GFile(os.path.join(path, "{}".format(group.filename)), "rb") as fid:
54 + encoded_jpg = fid.read()
55 + encoded_jpg_io = io.BytesIO(encoded_jpg)
56 + image = Image.open(encoded_jpg_io)
57 + width, height = image.size
58 +
59 + filename = group.filename.encode("utf8")
60 + image_format = b"jpg"
61 + # check if the image format is matching with your images.
62 + xmins = []
63 + xmaxs = []
64 + ymins = []
65 + ymaxs = []
66 + classes_text = []
67 + classes = []
68 +
69 + for index, row in group.object.iterrows():
70 + xmins.append(row["xmin"] / width)
71 + xmaxs.append(row["xmax"] / width)
72 + ymins.append(row["ymin"] / height)
73 + ymaxs.append(row["ymax"] / height)
74 + classes_text.append(row["class"].encode("utf8"))
75 + class_index = label_map.get(row["class"])
76 + assert (
77 + class_index is not None
78 + ), "class label: `{}` not found in label_map: {}".format(
79 + row["class"], label_map
80 + )
81 + classes.append(class_index)
82 +
83 + tf_example = tf.train.Example(
84 + features=tf.train.Features(
85 + feature={
86 + "image/height": dataset_util.int64_feature(height),
87 + "image/width": dataset_util.int64_feature(width),
88 + "image/filename": dataset_util.bytes_feature(filename),
89 + "image/source_id": dataset_util.bytes_feature(filename),
90 + "image/encoded": dataset_util.bytes_feature(encoded_jpg),
91 + "image/format": dataset_util.bytes_feature(image_format),
92 + "image/object/bbox/xmin": dataset_util.float_list_feature(xmins),
93 + "image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs),
94 + "image/object/bbox/ymin": dataset_util.float_list_feature(ymins),
95 + "image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs),
96 + "image/object/class/text": dataset_util.bytes_list_feature(
97 + classes_text
98 + ),
99 + "image/object/class/label": dataset_util.int64_list_feature(classes),
100 + }
101 + )
102 + )
103 + return tf_example
104 +
105 +
106 +def main(_):
107 + writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
108 + path = os.path.join(os.getcwd(), FLAGS.img_path)
109 + examples = pd.read_csv(FLAGS.csv_input)
110 +
111 + # Load the `label_map` from pbtxt file.
112 + from object_detection.utils import label_map_util
113 +
114 + label_map = label_map_util.load_labelmap(FLAGS.label_map)
115 + categories = label_map_util.convert_label_map_to_categories(
116 + label_map, max_num_classes=90, use_display_name=True
117 + )
118 + category_index = label_map_util.create_category_index(categories)
119 + label_map = {}
120 + for k, v in category_index.items():
121 + label_map[v.get("name")] = v.get("id")
122 + grouped = split(examples, "filename")
123 + for group in grouped:
124 + tf_example = create_tf_example(group, path, label_map)
125 + writer.write(tf_example.SerializeToString())
126 +
127 + writer.close()
128 + output_path = os.path.join(os.getcwd(), FLAGS.output_path)
129 + print("Successfully created the TFRecords: {}".format(output_path))
130 +
131 +
132 +if __name__ == "__main__":
133 + tf.app.run()
1 +"""
2 +Usage:
3 +# Create train data:
4 +python xml_to_csv.py -i [PATH_TO_IMAGES_FOLDER]/train -o [PATH_TO_ANNOTATIONS_FOLDER]/train_labels.csv
5 +
6 +# Create test data:
7 +python xml_to_csv.py -i [PATH_TO_IMAGES_FOLDER]/test -o [PATH_TO_ANNOTATIONS_FOLDER]/test_labels.csv
8 +"""
9 +
10 +import os
11 +import glob
12 +import pandas as pd
13 +import argparse
14 +import xml.etree.ElementTree as ET
15 +
16 +
17 +def xml_to_csv(path):
18 + """Iterates through all .xml files (generated by labelImg) in a given directory and combines them in a single Pandas datagrame.
19 +
20 + Parameters:
21 + ----------
22 + path : {str}
23 + The path containing the .xml files
24 + Returns
25 + -------
26 + Pandas DataFrame
27 + The produced dataframe
28 + """
29 + classes_names = []
30 + xml_list = []
31 + for xml_file in glob.glob(path + "/*.xml"):
32 + tree = ET.parse(xml_file)
33 + root = tree.getroot()
34 + for member in root.findall("object"):
35 + classes_names.append(member[0].text)
36 + value = (
37 + root.find("filename").text,
38 + int(root.find("size")[0].text),
39 + int(root.find("size")[1].text),
40 + member[0].text,
41 + int(member[4][0].text),
42 + int(member[4][1].text),
43 + int(member[4][2].text),
44 + int(member[4][3].text),
45 + )
46 + xml_list.append(value)
47 + column_name = [
48 + "filename",
49 + "width",
50 + "height",
51 + "class",
52 + "xmin",
53 + "ymin",
54 + "xmax",
55 + "ymax",
56 + ]
57 + xml_df = pd.DataFrame(xml_list, columns=column_name)
58 + classes_names = list(set(classes_names))
59 + classes_names.sort()
60 + return xml_df, classes_names
61 +
62 +
63 +def main():
64 + # Initiate argument parser
65 + parser = argparse.ArgumentParser(
66 + description="Sample TensorFlow XML-to-CSV converter"
67 + )
68 + parser.add_argument(
69 + "-i",
70 + "--inputDir",
71 + help="Path to the folder where the input .xml files are stored",
72 + type=str,
73 + )
74 + parser.add_argument(
75 + "-o", "--outputFile", help="Name of output .csv file (including path)", type=str
76 + )
77 +
78 + parser.add_argument(
79 + "-l",
80 + "--labelMapDir",
81 + help="Directory path to save label_map.pbtxt file is specified.",
82 + type=str,
83 + default="",
84 + )
85 +
86 + args = parser.parse_args()
87 +
88 + if args.inputDir is None:
89 + args.inputDir = os.getcwd()
90 + if args.outputFile is None:
91 + args.outputFile = args.inputDir + "/labels.csv"
92 +
93 + assert os.path.isdir(args.inputDir)
94 + os.makedirs(os.path.dirname(args.outputFile), exist_ok=True)
95 + xml_df, classes_names = xml_to_csv(args.inputDir)
96 + xml_df.to_csv(args.outputFile, index=None)
97 + print("Successfully converted xml to csv.")
98 + if args.labelMapDir:
99 + os.makedirs(args.labelMapDir, exist_ok=True)
100 + label_map_path = os.path.join(args.labelMapDir, "label_map.pbtxt")
101 + print("Generate `{}`".format(label_map_path))
102 +
103 + # Create the `label_map.pbtxt` file
104 + pbtxt_content = ""
105 + for i, class_name in enumerate(classes_names):
106 + pbtxt_content = (
107 + pbtxt_content
108 + + "item {{\n id: {0}\n name: '{1}'\n}}\n\n".format(
109 + i + 1, class_name
110 + )
111 + )
112 + pbtxt_content = pbtxt_content.strip()
113 + with open(label_map_path, "w") as f:
114 + f.write(pbtxt_content)
115 +
116 +
117 +if __name__ == "__main__":
118 + main()