신아형

add code

{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"augmentation.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyM0lQ+UqU7Sa/uXs3VwvxDl"},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"wYGqloqDkEoU","colab_type":"code","outputId":"0d22028c-119d-4823-df3f-35d439ca019f","executionInfo":{"status":"ok","timestamp":1591716524562,"user_tz":-540,"elapsed":32835,"user":{"displayName":"‍신아형[학생](소프트웨어융합대학 컴퓨터공학과)","photoUrl":"","userId":"02057930622140727302"}},"colab":{"base_uri":"https://localhost:8080/","height":127}},"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"],"execution_count":0,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"cPpLXwOPGYkB","colab_type":"code","colab":{}},"source":["import imgaug; print(imgaug.__file__)\n","!cd /content/../usr/local/lib/python3.6/dist-packages/\n","!pip uninstall imgaug\n","!pip install git+https://github.com/aleju/imgaug.git\n","!pip3 install pascal_voc_writer"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3WumbxkdSAYo","colab_type":"code","colab":{}},"source":["# xml파일 파싱해서 filename, bounding-box 정보 반환\n","\n","import xml.etree.ElementTree as ET\n","\n","def read_anntation(xml_file: str):\n"," tree = ET.parse(xml_file)\n"," root = tree.getroot()\n","\n"," bounding_box_list = []\n","\n"," file_name = root.find('filename').text\n"," for obj in root.iter('object'):\n","\n"," object_label = obj.find(\"name\").text\n"," for box in obj.findall(\"bndbox\"):\n"," x_min = int(box.find(\"xmin\").text)\n"," y_min = int(box.find(\"ymin\").text)\n"," x_max = int(box.find(\"xmax\").text)\n"," y_max = int(box.find(\"ymax\").text)\n","\n"," bounding_box = [object_label, x_min, y_min, x_max, y_max]\n"," bounding_box_list.append(bounding_box)\n","\n"," return bounding_box_list, file_name"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"A-982sMgSOCV","colab_type":"code","colab":{}},"source":["# 4차원 nparray로 변환된 이미지, [bounding-box 리스트, xml 파일명, 이미지 파일명] 리스트 반환\n","\n","from os import listdir\n","import cv2\n","import numpy as np\n","\n","def read_train_dataset(dir):\n"," images = []\n"," annotations = []\n","\n"," for file in listdir(dir):\n"," if 'jpg' in file.lower() or 'png' in file.lower():\n"," images.append(cv2.imread(dir + file, 1))\n"," annotation_file = file.replace(file.split('.')[-1], 'xml')\n"," bounding_box_list, file_name = read_anntation(dir + annotation_file)\n"," annotations.append((bounding_box_list, annotation_file, file_name))\n","\n"," images = np.array(images)\n","\n"," return images, annotations"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"5Kz66PqlST6Q","colab_type":"code","colab":{}},"source":["import imgaug as ia\n","from imgaug import augmenters as iaa\n","from pascal_voc_writer import Writer\n","\n","ia.seed(1)\n","\n","\n","dir = '/content/gdrive/My Drive/capstone/data/' # input 이미지 디렉토리\n","output_dir='/content/gdrive/My Drive/capstone/data/' # output 이미지 디렉토리\n","\n","images, annotations = read_train_dataset(dir)\n","\n","for idx in range(len(images)):\n"," image = images[idx]\n"," boxes = annotations[idx][0]\n","\n"," ia_bounding_boxes = []\n"," for box in boxes:\n"," ia_bounding_boxes.append(ia.BoundingBox(x1=box[1], y1=box[2], x2=box[3], y2=box[4]))\n"," bbs = ia.BoundingBoxesOnImage(ia_bounding_boxes, shape=image.shape)\n","\n"," ######어둡게\n"," seq = iaa.MultiplyAndAddToBrightness(mul=(0.5, 0.5), add=(-10, 10))\n","\n"," ######밝게\n"," #seq = iaa.MultiplyAndAddToBrightness(mul= (1, 2.5), add=(-15, 15))\n","\n"," seq_det = seq.to_deterministic()\n","\n"," image_aug = seq_det.augment_images([image])[0]\n"," bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]\n","\n"," new_image_file = output_dir + 'dark_' + annotations[idx][2]\n"," cv2.imwrite(new_image_file, image_aug)\n","\n"," h, w = np.shape(image_aug)[0:2]\n"," voc_writer = Writer(new_image_file, w, h)\n","\n"," for i in range(len(bbs_aug.bounding_boxes)):\n"," bb_box = bbs_aug.bounding_boxes[i]\n"," voc_writer.addObject(boxes[i][0], int(bb_box.x1), int(bb_box.y1), int(bb_box.x2), int(bb_box.y2))\n","\n"," voc_writer.save(output_dir + 'dark_' + annotations[idx][1])\n"," print(output_dir + 'dark_' + annotations[idx][1])"],"execution_count":0,"outputs":[]}]}
\ No newline at end of file
"""
Usage:
# Create train data:
python generate_tfrecord.py --label=<LABEL> --csv_input=<PATH_TO_ANNOTATIONS_FOLDER>/train_labels.csv --output_path=<PATH_TO_ANNOTATIONS_FOLDER>/train.record <PATH_TO_ANNOTATIONS_FOLDER>/label_map.pbtxt
# Create test data:
python generate_tfrecord.py --label=<LABEL> --csv_input=<PATH_TO_ANNOTATIONS_FOLDER>/test_labels.csv --output_path=<PATH_TO_ANNOTATIONS_FOLDER>/test.record --label_map <PATH_TO_ANNOTATIONS_FOLDER>/label_map.pbtxt
"""
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import os
import io
import pandas as pd
import tensorflow as tf
import sys
sys.path.append("../../models/research")
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
flags = tf.app.flags
flags.DEFINE_string("csv_input", "", "Path to the CSV input")
flags.DEFINE_string("output_path", "", "Path to output TFRecord")
flags.DEFINE_string(
"label_map",
"",
"Path to the `label_map.pbtxt` contains the <class_name>:<class_index> pairs generated by `xml_to_csv.py` or manually.",
)
# if your image has more labels input them as
# flags.DEFINE_string('label0', '', 'Name of class[0] label')
# flags.DEFINE_string('label1', '', 'Name of class[1] label')
# and so on.
flags.DEFINE_string("img_path", "", "Path to images")
FLAGS = flags.FLAGS
def split(df, group):
data = namedtuple("data", ["filename", "object"])
gb = df.groupby(group)
return [
data(filename, gb.get_group(x))
for filename, x in zip(gb.groups.keys(), gb.groups)
]
def create_tf_example(group, path, label_map):
with tf.gfile.GFile(os.path.join(path, "{}".format(group.filename)), "rb") as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode("utf8")
image_format = b"jpg"
# check if the image format is matching with your images.
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row["xmin"] / width)
xmaxs.append(row["xmax"] / width)
ymins.append(row["ymin"] / height)
ymaxs.append(row["ymax"] / height)
classes_text.append(row["class"].encode("utf8"))
class_index = label_map.get(row["class"])
assert (
class_index is not None
), "class label: `{}` not found in label_map: {}".format(
row["class"], label_map
)
classes.append(class_index)
tf_example = tf.train.Example(
features=tf.train.Features(
feature={
"image/height": dataset_util.int64_feature(height),
"image/width": dataset_util.int64_feature(width),
"image/filename": dataset_util.bytes_feature(filename),
"image/source_id": dataset_util.bytes_feature(filename),
"image/encoded": dataset_util.bytes_feature(encoded_jpg),
"image/format": dataset_util.bytes_feature(image_format),
"image/object/bbox/xmin": dataset_util.float_list_feature(xmins),
"image/object/bbox/xmax": dataset_util.float_list_feature(xmaxs),
"image/object/bbox/ymin": dataset_util.float_list_feature(ymins),
"image/object/bbox/ymax": dataset_util.float_list_feature(ymaxs),
"image/object/class/text": dataset_util.bytes_list_feature(
classes_text
),
"image/object/class/label": dataset_util.int64_list_feature(classes),
}
)
)
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(os.getcwd(), FLAGS.img_path)
examples = pd.read_csv(FLAGS.csv_input)
# Load the `label_map` from pbtxt file.
from object_detection.utils import label_map_util
label_map = label_map_util.load_labelmap(FLAGS.label_map)
categories = label_map_util.convert_label_map_to_categories(
label_map, max_num_classes=90, use_display_name=True
)
category_index = label_map_util.create_category_index(categories)
label_map = {}
for k, v in category_index.items():
label_map[v.get("name")] = v.get("id")
grouped = split(examples, "filename")
for group in grouped:
tf_example = create_tf_example(group, path, label_map)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print("Successfully created the TFRecords: {}".format(output_path))
if __name__ == "__main__":
tf.app.run()
This diff could not be displayed because it is too large.
This diff is collapsed. Click to expand it.
"""
Usage:
# Create train data:
python xml_to_csv.py -i [PATH_TO_IMAGES_FOLDER]/train -o [PATH_TO_ANNOTATIONS_FOLDER]/train_labels.csv
# Create test data:
python xml_to_csv.py -i [PATH_TO_IMAGES_FOLDER]/test -o [PATH_TO_ANNOTATIONS_FOLDER]/test_labels.csv
"""
import os
import glob
import pandas as pd
import argparse
import xml.etree.ElementTree as ET
def xml_to_csv(path):
"""Iterates through all .xml files (generated by labelImg) in a given directory and combines them in a single Pandas datagrame.
Parameters:
----------
path : {str}
The path containing the .xml files
Returns
-------
Pandas DataFrame
The produced dataframe
"""
classes_names = []
xml_list = []
for xml_file in glob.glob(path + "/*.xml"):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall("object"):
classes_names.append(member[0].text)
value = (
root.find("filename").text,
int(root.find("size")[0].text),
int(root.find("size")[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text),
)
xml_list.append(value)
column_name = [
"filename",
"width",
"height",
"class",
"xmin",
"ymin",
"xmax",
"ymax",
]
xml_df = pd.DataFrame(xml_list, columns=column_name)
classes_names = list(set(classes_names))
classes_names.sort()
return xml_df, classes_names
def main():
# Initiate argument parser
parser = argparse.ArgumentParser(
description="Sample TensorFlow XML-to-CSV converter"
)
parser.add_argument(
"-i",
"--inputDir",
help="Path to the folder where the input .xml files are stored",
type=str,
)
parser.add_argument(
"-o", "--outputFile", help="Name of output .csv file (including path)", type=str
)
parser.add_argument(
"-l",
"--labelMapDir",
help="Directory path to save label_map.pbtxt file is specified.",
type=str,
default="",
)
args = parser.parse_args()
if args.inputDir is None:
args.inputDir = os.getcwd()
if args.outputFile is None:
args.outputFile = args.inputDir + "/labels.csv"
assert os.path.isdir(args.inputDir)
os.makedirs(os.path.dirname(args.outputFile), exist_ok=True)
xml_df, classes_names = xml_to_csv(args.inputDir)
xml_df.to_csv(args.outputFile, index=None)
print("Successfully converted xml to csv.")
if args.labelMapDir:
os.makedirs(args.labelMapDir, exist_ok=True)
label_map_path = os.path.join(args.labelMapDir, "label_map.pbtxt")
print("Generate `{}`".format(label_map_path))
# Create the `label_map.pbtxt` file
pbtxt_content = ""
for i, class_name in enumerate(classes_names):
pbtxt_content = (
pbtxt_content
+ "item {{\n id: {0}\n name: '{1}'\n}}\n\n".format(
i + 1, class_name
)
)
pbtxt_content = pbtxt_content.strip()
with open(label_map_path, "w") as f:
f.write(pbtxt_content)
if __name__ == "__main__":
main()