김연수

retinaNet

Showing 86 changed files with 10409 additions and 0 deletions
#see https://github.com/codecov/support/wiki/Codecov-Yaml
codecov:
notify:
require_ci_to_pass: yes
coverage:
precision: 0 # 2 = xx.xx%, 0 = xx%
round: nearest # how coverage is rounded: down/up/nearest
range: 40...100 # custom range of coverage colors from red -> yellow -> green
status:
# https://codecov.readme.io/v1.0/docs/commit-status
project:
default:
against: auto
target: 90% # specify the target coverage for each commit status
threshold: 20% # allow this little decrease on project
# https://github.com/codecov/support/wiki/Filtering-Branches
# branches: master
if_ci_failed: error
# https://github.com/codecov/support/wiki/Patch-Status
patch:
default:
against: auto
target: 40% # specify the target "X%" coverage to hit
# threshold: 50% # allow this much decrease on patch
changes: false
parsers:
gcov:
branch_detection:
conditional: true
loop: true
macro: false
method: false
javascript:
enable_partials: false
comment:
layout: header, diff
require_changes: false
behavior: default # update if exists else create new
# branches: *
\ No newline at end of file
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Distribution / packaging
.Python
/build/
/dist/
/eggs/
/*-eggs/
.eggs/
/sdist/
/wheels/
/*.egg-info/
.installed.cfg
*.egg
# Unit test / coverage reports
.coverage
.coverage.*
coverage.xml
*.cover
\ No newline at end of file
[submodule "tests/test-data"]
path = tests/test-data
url = https://github.com/fizyr/keras-retinanet-test-data.git
language: python
sudo: required
python:
- '3.6'
- '3.7'
install:
- pip install -r requirements.txt
- pip install -r tests/requirements.txt
cache: pip
script:
- python setup.py check -m -s
- python setup.py build_ext --inplace
- coverage run --source keras_retinanet -m py.test keras_retinanet tests --doctest-modules --forked --flake8
after_success:
- coverage xml
- coverage report
- codecov
# Contributors
This is a list of people who contributed patches to keras-retinanet.
If you feel you should be listed here or if you have any other questions/comments on your listing here,
please create an issue or pull request at https://github.com/fizyr/keras-retinanet/
* Hans Gaiser <h.gaiser@fizyr.com>
* Maarten de Vries <maarten@de-vri.es>
* Valerio Carpani
* Ashley Williamson
* Yann Henon
* Valeriu Lacatusu
* András Vidosits
* Cristian Gratie
* jjiunlin
* Sorin Panduru
* Rodrigo Meira de Andrade
* Enrico Liscio <e.liscio@fizyr.com>
* Mihai Morariu
* pedroconceicao
* jjiun
* Wudi Fang
* Mike Clark
* hannesedvartsen
* Max Van Sande
* Pierre Dérian
* ori
* mxvs
* mwilder
* Muhammed Kocabas
* Koen Vijverberg
* iver56
* hnsywangxin
* Guillaume Erhard
* Eduardo Ramos
* DiegoAgher
* Alexander Pacha
* Agastya Kalra
* Jiri BOROVEC
* ntsagko
* charlie / tianqi
* jsemric
* Martin Zlocha
* Raghav Bhardwaj
* bw4sz
* Morten Back Nielsen
* dshahrokhian
* Alex / adreo00
* simone.merello
* Matt Wilder
* Jinwoo Baek
* Etienne Meunier
* Denis Dowling
* cclauss
* Andrew Grigorev
* ZFTurbo
* UgoLouche
* Richard Higgins
* Rajat / rajat.goel
* philipp.marquardt
* peacherwu
* Paul / pauldesigaud
* Martin Genet
* Leo / leonardvandriel
* Laurens Hagendoorn
* Julius / juliussimonelli
* HolyGuacamole
* Fausto Morales
* borakrc
* Ben Weinstein
* Anil Karaka
* Andrea Panizza
* Bruno Santos
\ No newline at end of file
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# Keras RetinaNet [![Build Status](https://travis-ci.org/fizyr/keras-retinanet.svg?branch=master)](https://travis-ci.org/fizyr/keras-retinanet) [![DOI](https://zenodo.org/badge/100249425.svg)](https://zenodo.org/badge/latestdoi/100249425)
Keras implementation of RetinaNet object detection as described in [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002)
by Tsung-Yi Lin, Priya Goyal, Ross Girshick, Kaiming He and Piotr Dollár.
## :warning: Deprecated
This repository is deprecated in favor of the [torchvision](https://github.com/pytorch/vision/) module.
This project should work with keras 2.4 and tensorflow 2.3.0, newer versions might break support.
For more information, check [here](https://github.com/fizyr/keras-retinanet/issues/1471#issuecomment-704187205).
## Installation
1) Clone this repository.
2) In the repository, execute `pip install . --user`.
Note that due to inconsistencies with how `tensorflow` should be installed,
this package does not define a dependency on `tensorflow` as it will try to install that (which at least on Arch Linux results in an incorrect installation).
Please make sure `tensorflow` is installed as per your systems requirements.
3) Alternatively, you can run the code directly from the cloned repository, however you need to run `python setup.py build_ext --inplace` to compile Cython code first.
4) Optionally, install `pycocotools` if you want to train / test on the MS COCO dataset by running `pip install --user git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI`.
## Testing
An example of testing the network can be seen in [this Notebook](https://github.com/delftrobotics/keras-retinanet/blob/master/examples/ResNet50RetinaNet.ipynb).
In general, inference of the network works as follows:
```python
boxes, scores, labels = model.predict_on_batch(inputs)
```
Where `boxes` are shaped `(None, None, 4)` (for `(x1, y1, x2, y2)`), scores is shaped `(None, None)` (classification score) and labels is shaped `(None, None)` (label corresponding to the score). In all three outputs, the first dimension represents the shape and the second dimension indexes the list of detections.
Loading models can be done in the following manner:
```python
from keras_retinanet.models import load_model
model = load_model('/path/to/model.h5', backbone_name='resnet50')
```
Execution time on NVIDIA Pascal Titan X is roughly 75msec for an image of shape `1000x800x3`.
### Converting a training model to inference model
The training procedure of `keras-retinanet` works with *training models*. These are stripped down versions compared to the *inference model* and only contains the layers necessary for training (regression and classification values). If you wish to do inference on a model (perform object detection on an image), you need to convert the trained model to an inference model. This is done as follows:
```shell
# Running directly from the repository:
keras_retinanet/bin/convert_model.py /path/to/training/model.h5 /path/to/save/inference/model.h5
# Using the installed script:
retinanet-convert-model /path/to/training/model.h5 /path/to/save/inference/model.h5
```
Most scripts (like `retinanet-evaluate`) also support converting on the fly, using the `--convert-model` argument.
## Training
`keras-retinanet` can be trained using [this](https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/bin/train.py) script.
Note that the train script uses relative imports since it is inside the `keras_retinanet` package.
If you want to adjust the script for your own use outside of this repository,
you will need to switch it to use absolute imports.
If you installed `keras-retinanet` correctly, the train script will be installed as `retinanet-train`.
However, if you make local modifications to the `keras-retinanet` repository, you should run the script directly from the repository.
That will ensure that your local changes will be used by the train script.
The default backbone is `resnet50`. You can change this using the `--backbone=xxx` argument in the running script.
`xxx` can be one of the backbones in resnet models (`resnet50`, `resnet101`, `resnet152`), mobilenet models (`mobilenet128_1.0`, `mobilenet128_0.75`, `mobilenet160_1.0`, etc), densenet models or vgg models. The different options are defined by each model in their corresponding python scripts (`resnet.py`, `mobilenet.py`, etc).
Trained models can't be used directly for inference. To convert a trained model to an inference model, check [here](https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model).
### Usage
For training on [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/), run:
```shell
# Running directly from the repository:
keras_retinanet/bin/train.py pascal /path/to/VOCdevkit/VOC2007
# Using the installed script:
retinanet-train pascal /path/to/VOCdevkit/VOC2007
```
For training on [MS COCO](http://cocodataset.org/#home), run:
```shell
# Running directly from the repository:
keras_retinanet/bin/train.py coco /path/to/MS/COCO
# Using the installed script:
retinanet-train coco /path/to/MS/COCO
```
For training on Open Images Dataset [OID](https://storage.googleapis.com/openimages/web/index.html)
or taking place to the [OID challenges](https://storage.googleapis.com/openimages/web/challenge.html), run:
```shell
# Running directly from the repository:
keras_retinanet/bin/train.py oid /path/to/OID
# Using the installed script:
retinanet-train oid /path/to/OID
# You can also specify a list of labels if you want to train on a subset
# by adding the argument 'labels_filter':
keras_retinanet/bin/train.py oid /path/to/OID --labels-filter=Helmet,Tree
# You can also specify a parent label if you want to train on a branch
# from the semantic hierarchical tree (i.e a parent and all children)
(https://storage.googleapis.com/openimages/challenge_2018/bbox_labels_500_hierarchy_visualizer/circle.html)
# by adding the argument 'parent-label':
keras_retinanet/bin/train.py oid /path/to/OID --parent-label=Boat
```
For training on [KITTI](http://www.cvlibs.net/datasets/kitti/eval_object.php), run:
```shell
# Running directly from the repository:
keras_retinanet/bin/train.py kitti /path/to/KITTI
# Using the installed script:
retinanet-train kitti /path/to/KITTI
If you want to prepare the dataset you can use the following script:
https://github.com/NVIDIA/DIGITS/blob/master/examples/object-detection/prepare_kitti_data.py
```
For training on a [custom dataset], a CSV file can be used as a way to pass the data.
See below for more details on the format of these CSV files.
To train using your CSV, run:
```shell
# Running directly from the repository:
keras_retinanet/bin/train.py csv /path/to/csv/file/containing/annotations /path/to/csv/file/containing/classes
# Using the installed script:
retinanet-train csv /path/to/csv/file/containing/annotations /path/to/csv/file/containing/classes
```
In general, the steps to train on your own datasets are:
1) Create a model by calling for instance `keras_retinanet.models.backbone('resnet50').retinanet(num_classes=80)` and compile it.
Empirically, the following compile arguments have been found to work well:
```python
model.compile(
loss={
'regression' : keras_retinanet.losses.smooth_l1(),
'classification': keras_retinanet.losses.focal()
},
optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001)
)
```
2) Create generators for training and testing data (an example is show in [`keras_retinanet.preprocessing.pascal_voc.PascalVocGenerator`](https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/preprocessing/pascal_voc.py)).
3) Use `model.fit_generator` to start training.
## Pretrained models
All models can be downloaded from the [releases page](https://github.com/fizyr/keras-retinanet/releases).
### MS COCO
Results using the `cocoapi` are shown below (note: according to the paper, this configuration should achieve a mAP of 0.357).
```
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.350
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.537
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.374
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.191
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.383
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.472
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.306
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.491
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.533
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.345
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.577
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.681
```
### Open Images Dataset
There are 3 RetinaNet models based on ResNet50, ResNet101 and ResNet152 trained on all [500 classes](https://github.com/ZFTurbo/Keras-RetinaNet-for-Open-Images-Challenge-2018/blob/master/a00_utils_and_constants.py#L130) of the Open Images Dataset (thanks to @ZFTurbo).
| Backbone | Image Size (px) | Small validation mAP | LB (Public) |
| --------- | --------------- | -------------------- | ----------- |
| ResNet50 | 768 - 1024 | 0.4594 | 0.4223 |
| ResNet101 | 768 - 1024 | 0.4986 | 0.4520 |
| ResNet152 | 600 - 800 | 0.4991 | 0.4651 |
For more information, check [@ZFTurbo's](https://github.com/ZFTurbo/Keras-RetinaNet-for-Open-Images-Challenge-2018) repository.
## CSV datasets
The `CSVGenerator` provides an easy way to define your own datasets.
It uses two CSV files: one file containing annotations and one file containing a class name to ID mapping.
### Annotations format
The CSV file with annotations should contain one annotation per line.
Images with multiple bounding boxes should use one row per bounding box.
Note that indexing for pixel values starts at 0.
The expected format of each line is:
```
path/to/image.jpg,x1,y1,x2,y2,class_name
```
By default the CSV generator will look for images relative to the directory of the annotations file.
Some images may not contain any labeled objects.
To add these images to the dataset as negative examples,
add an annotation where `x1`, `y1`, `x2`, `y2` and `class_name` are all empty:
```
path/to/image.jpg,,,,,
```
A full example:
```
/data/imgs/img_001.jpg,837,346,981,456,cow
/data/imgs/img_002.jpg,215,312,279,391,cat
/data/imgs/img_002.jpg,22,5,89,84,bird
/data/imgs/img_003.jpg,,,,,
```
This defines a dataset with 3 images.
`img_001.jpg` contains a cow.
`img_002.jpg` contains a cat and a bird.
`img_003.jpg` contains no interesting objects/animals.
### Class mapping format
The class name to ID mapping file should contain one mapping per line.
Each line should use the following format:
```
class_name,id
```
Indexing for classes starts at 0.
Do not include a background class as it is implicit.
For example:
```
cow,0
cat,1
bird,2
```
## Anchor optimization
In some cases, the default anchor configuration is not suitable for detecting objects in your dataset, for example, if your objects are smaller than the 32x32px (size of the smallest anchors). In this case, it might be suitable to modify the anchor configuration, this can be done automatically by following the steps in the [anchor-optimization](https://github.com/martinzlocha/anchor-optimization/) repository. To use the generated configuration check [here](https://github.com/fizyr/keras-retinanet-test-data/blob/master/config/config.ini) for an example config file and then pass it to `train.py` using the `--config` parameter.
## Debugging
Creating your own dataset does not always work out of the box. There is a [`debug.py`](https://github.com/fizyr/keras-retinanet/blob/master/keras_retinanet/bin/debug.py) tool to help find the most common mistakes.
Particularly helpful is the `--annotations` flag which displays your annotations on the images from your dataset. Annotations are colored in green when there are anchors available and colored in red when there are no anchors available. If an annotation doesn't have anchors available, it means it won't contribute to training. It is normal for a small amount of annotations to show up in red, but if most or all annotations are red there is cause for concern. The most common issues are that the annotations are too small or too oddly shaped (stretched out).
## Results
### MS COCO
## Status
Example output images using `keras-retinanet` are shown below.
<p align="center">
<img src="https://github.com/delftrobotics/keras-retinanet/blob/master/images/coco1.png" alt="Example result of RetinaNet on MS COCO"/>
<img src="https://github.com/delftrobotics/keras-retinanet/blob/master/images/coco2.png" alt="Example result of RetinaNet on MS COCO"/>
<img src="https://github.com/delftrobotics/keras-retinanet/blob/master/images/coco3.png" alt="Example result of RetinaNet on MS COCO"/>
</p>
### Projects using keras-retinanet
* [Improving Apple Detection and Counting Using RetinaNet](https://github.com/nikostsagk/Apple-detection). This work aims to investigate the apple detection problem through the deployment of the Keras RetinaNet.
* [Improving RetinaNet for CT Lesion Detection with Dense Masks from Weak RECIST Labels](https://arxiv.org/abs/1906.02283). Research project for detecting lesions in CT using keras-retinanet.
* [NudeNet](https://github.com/bedapudi6788/NudeNet). Project that focuses on detecting and censoring of nudity.
* [Individual tree-crown detection in RGB imagery using self-supervised deep learning neural networks](https://www.biorxiv.org/content/10.1101/532952v1). Research project focused on improving the performance of remotely sensed tree surveys.
* [ESRI Object Detection Challenge 2019](https://github.com/kunwar31/ESRI_Object_Detection). Winning implementation of the ESRI Object Detection Challenge 2019.
* [Lunar Rockfall Detector Project](https://ieeexplore.ieee.org/document/8587120). The aim of this project is to [map lunar rockfalls on a global scale](https://www.nature.com/articles/s41467-020-16653-3) using the available > 2 million satellite images.
* [Mars Rockfall Detector Project](https://ieeexplore.ieee.org/document/9103997). The aim of this project is to map rockfalls on Mars.
* [NATO Innovation Challenge](https://medium.com/data-from-the-trenches/object-detection-with-deep-learning-on-aerial-imagery-2465078db8a9). The winning team of the NATO Innovation Challenge used keras-retinanet to detect cars in aerial images ([COWC dataset](https://gdo152.llnl.gov/cowc/)).
* [Microsoft Research for Horovod on Azure](https://blogs.technet.microsoft.com/machinelearning/2018/06/20/how-to-do-distributed-deep-learning-for-object-detection-using-horovod-on-azure/). A research project by Microsoft, using keras-retinanet to distribute training over multiple GPUs using Horovod on Azure.
* [Anno-Mage](https://virajmavani.github.io/saiat/). A tool that helps you annotate images, using input from the keras-retinanet COCO model as suggestions.
* [Telenav.AI](https://github.com/Telenav/Telenav.AI/tree/master/retinanet). For the detection of traffic signs using keras-retinanet.
* [Towards Deep Placental Histology Phenotyping](https://github.com/Nellaker-group/TowardsDeepPhenotyping). This research project uses keras-retinanet for analysing the placenta at a cellular level.
* [4k video example](https://www.youtube.com/watch?v=KYueHEMGRos). This demo shows the use of keras-retinanet on a 4k input video.
* [boring-detector](https://github.com/lexfridman/boring-detector). I suppose not all projects need to solve life's biggest questions. This project detects the "The Boring Company" hats in videos.
* [comet.ml](https://towardsdatascience.com/how-i-monitor-and-track-my-machine-learning-experiments-from-anywhere-described-in-13-tweets-ec3d0870af99). Using keras-retinanet in combination with [comet.ml](https://comet.ml) to interactively inspect and compare experiments.
* [Weights and Biases](https://app.wandb.ai/syllogismos/keras-retinanet/reports?view=carey%2FObject%20Detection%20with%20RetinaNet). Trained keras-retinanet on coco dataset from beginning on resnet50 and resnet101 backends.
* [Google Open Images Challenge 2018 15th place solution](https://github.com/ZFTurbo/Keras-RetinaNet-for-Open-Images-Challenge-2018). Pretrained weights for keras-retinanet based on ResNet50, ResNet101 and ResNet152 trained on open images dataset.
* [poke.AI](https://github.com/Raghav-B/poke.AI). An experimental AI that attempts to master the 3rd Generation Pokemon games. Using keras-retinanet for in-game mapping and localization.
* [retinanetjs](https://github.com/faustomorales/retinanetjs). A wrapper to run RetinaNet inference in the browser / Node.js. You can also take a look at the [example app](https://faustomorales.github.io/retinanetjs-example-app/).
* [CRFNet](https://github.com/TUMFTM/CameraRadarFusionNet). This network fuses radar and camera data to perform object detection for autonomous driving applications.
* [LogoDet](https://github.com/notAI-tech/LogoDet). Project for detecting company logos in images.
If you have a project based on `keras-retinanet` and would like to have it published here, shoot me a message on Slack.
### Notes
* This repository requires Tensorflow 2.3.0 or higher.
* This repository is [tested](https://github.com/fizyr/keras-retinanet/blob/master/.travis.yml) using OpenCV 3.4.
* This repository is [tested](https://github.com/fizyr/keras-retinanet/blob/master/.travis.yml) using Python 2.7 and 3.6.
Contributions to this project are welcome.
### Discussions
Feel free to join the `#keras-retinanet` [Keras Slack](https://keras-slack-autojoin.herokuapp.com/) channel for discussions and questions.
## FAQ
* **I get the warning `UserWarning: No training configuration found in save file: the model was not compiled. Compile it manually.`, should I be worried?** This warning can safely be ignored during inference.
* **I get the error `ValueError: not enough values to unpack (expected 3, got 2)` during inference, what to do?**. This is because you are using a train model to do inference. See https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model for more information.
* **How do I do transfer learning?** The easiest solution is to use the `--weights` argument when training. Keras will load models, even if the number of classes don't match (it will simply skip loading of weights when there is a mismatch). Run for example `retinanet-train --weights snapshots/some_coco_model.h5 pascal /path/to/pascal` to transfer weights from a COCO model to a PascalVOC training session. If your dataset is small, you can also use the `--freeze-backbone` argument to freeze the backbone layers.
* **How do I change the number / shape of the anchors?** The train tool allows to pass a configuration file, where the anchor parameters can be adjusted. Check [here](https://github.com/fizyr/keras-retinanet-test-data/blob/master/config/config.ini) for an example config file.
* **I get a loss of `0`, what is going on?** This mostly happens when none of the anchors "fit" on your objects, because they are most likely too small or elongated. You can verify this using the [debug](https://github.com/fizyr/keras-retinanet#debugging) tool.
* **I have an older model, can I use it after an update of keras-retinanet?** This depends on what has changed. If it is a change that doesn't affect the weights then you can "update" models by creating a new retinanet model, loading your old weights using `model.load_weights(weights_path, by_name=True)` and saving this model. If the change has been too significant, you should retrain your model (you can try to load in the weights from your old model when starting training, this might be a better starting position than ImageNet).
* **I get the error `ModuleNotFoundError: No module named 'keras_retinanet.utils.compute_overlap'`, how do I fix this?** Most likely you are running the code from the cloned repository. This is fine, but you need to compile some extensions for this to work (`python setup.py build_ext --inplace`).
* **How do I train on my own dataset?** The steps to train on your dataset are roughly as follows:
* 1. Prepare your dataset in the CSV format (a training and validation split is advised).
* 2. Check that your dataset is correct using `retinanet-debug`.
* 3. Train retinanet, preferably using the pretrained COCO weights (this gives a **far** better starting point, making training much quicker and accurate). You can optionally perform evaluation of your validation set during training to keep track of how well it performs (advised).
* 4. Convert your training model to an inference model.
* 5. Evaluate your inference model on your test or validation set.
* 6. Profit!
[anchor_parameters]
# Sizes should correlate to how the network processes an image, it is not advised to change these!
sizes = 64 128 256
# Strides should correlate to how the network strides over an image, it is not advised to change these!
strides = 16 32 64
# The different ratios to use per anchor location.
ratios = 0.5 1 2 3
# The different scaling factors to use per anchor location.
scales = 1 1.2 1.6
[pyramid_levels]
levels = 3 4 5
\ No newline at end of file
[.ShellClassInfo]
IconResource=C:\WINDOWS\System32\SHELL32.dll,3
[ViewState]
Mode=
Vid=
FolderType=Generic
This diff could not be displayed because it is too large.
#!/usr/bin/env python
# coding: utf-8
# Load necessary modules
import sys
sys.path.insert(0, "../")
# import keras_retinanet
from keras_retinanet import models
from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
from keras_retinanet.utils.visualization import draw_box, draw_caption
from keras_retinanet.utils.colors import label_color
from keras_retinanet.utils.gpu import setup_gpu
# import miscellaneous modules
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import time
# set tf backend to allow memory to grow, instead of claiming everything
import tensorflow as tf
# use this to change which GPU to use
gpu = 0
# set the modified tf session as backend in keras
setup_gpu(gpu)
# ## Load RetinaNet model
# In[ ]:
# adjust this to point to your downloaded/trained model
# models can be downloaded here: https://github.com/fizyr/keras-retinanet/releases
model_path = os.path.join("..", "snapshots", "resnet50_coco_best_v2.1.0.h5")
# load retinanet model
model = models.load_model(model_path, backbone_name="resnet50")
# if the model is not converted to an inference model, use the line below
# see: https://github.com/fizyr/keras-retinanet#converting-a-training-model-to-inference-model
# model = models.convert_model(model)
# print(model.summary())
# load label to names mapping for visualization purposes
labels_to_names = {
0: "person",
1: "bicycle",
2: "car",
3: "motorcycle",
4: "airplane",
5: "bus",
6: "train",
7: "truck",
8: "boat",
9: "traffic light",
10: "fire hydrant",
11: "stop sign",
12: "parking meter",
13: "bench",
14: "bird",
15: "cat",
16: "dog",
17: "horse",
18: "sheep",
19: "cow",
20: "elephant",
21: "bear",
22: "zebra",
23: "giraffe",
24: "backpack",
25: "umbrella",
26: "handbag",
27: "tie",
28: "suitcase",
29: "frisbee",
30: "skis",
31: "snowboard",
32: "sports ball",
33: "kite",
34: "baseball bat",
35: "baseball glove",
36: "skateboard",
37: "surfboard",
38: "tennis racket",
39: "bottle",
40: "wine glass",
41: "cup",
42: "fork",
43: "knife",
44: "spoon",
45: "bowl",
46: "banana",
47: "apple",
48: "sandwich",
49: "orange",
50: "broccoli",
51: "carrot",
52: "hot dog",
53: "pizza",
54: "donut",
55: "cake",
56: "chair",
57: "couch",
58: "potted plant",
59: "bed",
60: "dining table",
61: "toilet",
62: "tv",
63: "laptop",
64: "mouse",
65: "remote",
66: "keyboard",
67: "cell phone",
68: "microwave",
69: "oven",
70: "toaster",
71: "sink",
72: "refrigerator",
73: "book",
74: "clock",
75: "vase",
76: "scissors",
77: "teddy bear",
78: "hair drier",
79: "toothbrush",
}
# ## Run detection on example
# In[ ]:
# load image
image = read_image_bgr("000000008021.jpg")
# copy to draw on
draw = image.copy()
draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)
# preprocess image for network
image = preprocess_image(image)
image, scale = resize_image(image)
# process image
start = time.time()
boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
print("processing time: ", time.time() - start)
# correct for image scale
boxes /= scale
# visualize detections
for box, score, label in zip(boxes[0], scores[0], labels[0]):
# scores are sorted so we can break
if score < 0.5:
break
color = label_color(label)
b = box.astype(int)
draw_box(draw, b, color=color)
caption = "{} {:.3f}".format(labels_to_names[label], score)
draw_caption(draw, b, caption)
plt.figure(figsize=(15, 15))
plt.axis("off")
plt.imshow(draw)
plt.show()
# In[ ]:
# In[ ]:
from .backend import * # noqa: F401,F403
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
def bbox_transform_inv(boxes, deltas, mean=None, std=None):
""" Applies deltas (usually regression results) to boxes (usually anchors).
Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed.
The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes.
Args
boxes : np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2).
deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height.
mean : The mean value used when computing deltas (defaults to [0, 0, 0, 0]).
std : The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]).
Returns
A np.array of the same shape as boxes, but with deltas applied to each box.
The mean and std are used during training to normalize the regression values (networks love normalization).
"""
if mean is None:
mean = [0, 0, 0, 0]
if std is None:
std = [0.2, 0.2, 0.2, 0.2]
width = boxes[:, :, 2] - boxes[:, :, 0]
height = boxes[:, :, 3] - boxes[:, :, 1]
x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width
y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height
x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width
y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height
pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2)
return pred_boxes
def shift(shape, stride, anchors):
""" Produce shifted anchors based on shape of the map and stride size.
Args
shape : Shape to shift the anchors over.
stride : Stride to shift the anchors with over the shape.
anchors: The anchors to apply at each location.
"""
shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
shift_x, shift_y = tensorflow.meshgrid(shift_x, shift_y)
shift_x = keras.backend.reshape(shift_x, [-1])
shift_y = keras.backend.reshape(shift_y, [-1])
shifts = keras.backend.stack([
shift_x,
shift_y,
shift_x,
shift_y
], axis=0)
shifts = keras.backend.transpose(shifts)
number_of_anchors = keras.backend.shape(anchors)[0]
k = keras.backend.shape(shifts)[0] # number of base points = feat_h * feat_w
shifted_anchors = keras.backend.reshape(anchors, [1, number_of_anchors, 4]) + keras.backend.cast(keras.backend.reshape(shifts, [k, 1, 4]), keras.backend.floatx())
shifted_anchors = keras.backend.reshape(shifted_anchors, [k * number_of_anchors, 4])
return shifted_anchors
def map_fn(*args, **kwargs):
""" See https://www.tensorflow.org/api_docs/python/tf/map_fn .
"""
if "shapes" in kwargs:
shapes = kwargs.pop("shapes")
dtype = kwargs.pop("dtype")
sig = [tensorflow.TensorSpec(shapes[i], dtype=t) for i, t in
enumerate(dtype)]
# Try to use the new feature fn_output_signature in TF 2.3, use fallback if this is not available
try:
return tensorflow.map_fn(*args, **kwargs, fn_output_signature=sig)
except TypeError:
kwargs["dtype"] = dtype
return tensorflow.map_fn(*args, **kwargs)
def resize_images(images, size, method='bilinear', align_corners=False):
""" See https://www.tensorflow.org/versions/r1.14/api_docs/python/tf/image/resize_images .
Args
method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area').
"""
methods = {
'bilinear': tensorflow.image.ResizeMethod.BILINEAR,
'nearest' : tensorflow.image.ResizeMethod.NEAREST_NEIGHBOR,
'bicubic' : tensorflow.image.ResizeMethod.BICUBIC,
'area' : tensorflow.image.ResizeMethod.AREA,
}
return tensorflow.compat.v1.image.resize_images(images, size, methods[method], align_corners)
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import models
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.gpu import setup_gpu
from ..utils.tf_version import check_tf_version
def parse_args(args):
parser = argparse.ArgumentParser(description='Script for converting a training model to an inference model.')
parser.add_argument('model_in', help='The model to convert.')
parser.add_argument('model_out', help='Path to save the converted model to.')
parser.add_argument('--backbone', help='The backbone of the model to convert.', default='resnet50')
parser.add_argument('--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false')
parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', dest='class_specific_filter', action='store_false')
parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
parser.add_argument('--nms-threshold', help='Value for non maximum suppression threshold.', type=float, default=0.5)
parser.add_argument('--score-threshold', help='Threshold for prefiltering boxes.', type=float, default=0.05)
parser.add_argument('--max-detections', help='Maximum number of detections to keep.', type=int, default=300)
parser.add_argument('--parallel-iterations', help='Number of batch items to process in parallel.', type=int, default=32)
return parser.parse_args(args)
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# make sure tensorflow is the minimum required version
check_tf_version()
# set modified tf session to avoid using the GPUs
setup_gpu('cpu')
# optionally load config parameters
anchor_parameters = None
pyramid_levels = None
if args.config:
args.config = read_config_file(args.config)
if 'anchor_parameters' in args.config:
anchor_parameters = parse_anchor_parameters(args.config)
if 'pyramid_levels' in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
# load the model
model = models.load_model(args.model_in, backbone_name=args.backbone)
# check if this is indeed a training model
models.check_training_model(model)
# convert the model
model = models.convert_model(
model,
nms=args.nms,
class_specific_filter=args.class_specific_filter,
anchor_params=anchor_parameters,
pyramid_levels=pyramid_levels,
nms_threshold=args.nms_threshold,
score_threshold=args.score_threshold,
max_detections=args.max_detections,
parallel_iterations=args.parallel_iterations
)
# save model
model.save(args.model_out)
if __name__ == '__main__':
main()
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
import cv2
# Set keycodes for changing images
# 81, 83 are left and right arrows on linux in Ascii code (probably not needed)
# 65361, 65363 are left and right arrows in linux
# 2424832, 2555904 are left and right arrows on Windows
# 110, 109 are 'n' and 'm' on mac, windows, linux
# (unfortunately arrow keys not picked up on mac)
leftkeys = (81, 110, 65361, 2424832)
rightkeys = (83, 109, 65363, 2555904)
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.kitti import KittiGenerator
from ..preprocessing.open_images import OpenImagesGenerator
from ..utils.anchors import anchors_for_shape, compute_gt_annotations
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.image import random_visual_effect_generator
from ..utils.tf_version import check_tf_version
from ..utils.transform import random_transform_generator
from ..utils.visualization import draw_annotations, draw_boxes, draw_caption
def create_generator(args):
""" Create the data generators.
Args:
args: parseargs arguments object.
"""
common_args = {
'config' : args.config,
'image_min_side' : args.image_min_side,
'image_max_side' : args.image_max_side,
'group_method' : args.group_method
}
# create random transform generator for augmenting training data
transform_generator = random_transform_generator(
min_rotation=-0.1,
max_rotation=0.1,
min_translation=(-0.1, -0.1),
max_translation=(0.1, 0.1),
min_shear=-0.1,
max_shear=0.1,
min_scaling=(0.9, 0.9),
max_scaling=(1.1, 1.1),
flip_x_chance=0.5,
flip_y_chance=0.5,
)
visual_effect_generator = random_visual_effect_generator(
contrast_range=(0.9, 1.1),
brightness_range=(-.1, .1),
hue_range=(-0.05, 0.05),
saturation_range=(0.95, 1.05)
)
if args.dataset_type == 'coco':
# import here to prevent unnecessary dependency on cocoapi
from ..preprocessing.coco import CocoGenerator
generator = CocoGenerator(
args.coco_path,
args.coco_set,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
elif args.dataset_type == 'pascal':
generator = PascalVocGenerator(
args.pascal_path,
args.pascal_set,
image_extension=args.image_extension,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
elif args.dataset_type == 'csv':
generator = CSVGenerator(
args.annotations,
args.classes,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
elif args.dataset_type == 'oid':
generator = OpenImagesGenerator(
args.main_dir,
subset=args.subset,
version=args.version,
labels_filter=args.labels_filter,
parent_label=args.parent_label,
annotation_cache_dir=args.annotation_cache_dir,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
elif args.dataset_type == 'kitti':
generator = KittiGenerator(
args.kitti_path,
subset=args.subset,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
else:
raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
return generator
def parse_args(args):
""" Parse the arguments.
"""
parser = argparse.ArgumentParser(description='Debug script for a RetinaNet network.')
subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
subparsers.required = True
coco_parser = subparsers.add_parser('coco')
coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')
coco_parser.add_argument('--coco-set', help='Name of the set to show (defaults to val2017).', default='val2017')
pascal_parser = subparsers.add_parser('pascal')
pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
pascal_parser.add_argument('--pascal-set', help='Name of the set to show (defaults to test).', default='test')
pascal_parser.add_argument('--image-extension', help='Declares the dataset images\' extension.', default='.jpg')
kitti_parser = subparsers.add_parser('kitti')
kitti_parser.add_argument('kitti_path', help='Path to dataset directory (ie. /tmp/kitti).')
kitti_parser.add_argument('subset', help='Argument for loading a subset from train/val.')
def csv_list(string):
return string.split(',')
oid_parser = subparsers.add_parser('oid')
oid_parser.add_argument('main_dir', help='Path to dataset directory.')
oid_parser.add_argument('subset', help='Argument for loading a subset from train/validation/test.')
oid_parser.add_argument('--version', help='The current dataset version is v4.', default='v4')
oid_parser.add_argument('--labels-filter', help='A list of labels to filter.', type=csv_list, default=None)
oid_parser.add_argument('--annotation-cache-dir', help='Path to store annotation cache.', default='.')
oid_parser.add_argument('--parent-label', help='Use the hierarchy children of this label.', default=None)
csv_parser = subparsers.add_parser('csv')
csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
parser.add_argument('--no-resize', help='Disable image resizing.', dest='resize', action='store_false')
parser.add_argument('--anchors', help='Show positive anchors on the image.', action='store_true')
parser.add_argument('--display-name', help='Display image name on the bottom left corner.', action='store_true')
parser.add_argument('--show-annotations', help='Show annotations on the image. Green annotations have anchors, red annotations don\'t and therefore don\'t contribute to training.', action='store_true')
parser.add_argument('--random-transform', help='Randomly transform image and annotations.', action='store_true')
parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
parser.add_argument('--config', help='Path to a configuration parameters .ini file.')
parser.add_argument('--no-gui', help='Do not open a GUI window. Save images to an output directory instead.', action='store_true')
parser.add_argument('--output-dir', help='The output directory to save images to if --no-gui is specified.', default='.')
parser.add_argument('--flatten-output', help='Flatten the folder structure of saved output images into a single folder.', action='store_true')
parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])
return parser.parse_args(args)
def run(generator, args, anchor_params, pyramid_levels):
""" Main loop.
Args
generator: The generator to debug.
args: parseargs args object.
"""
# display images, one at a time
i = 0
while True:
# load the data
image = generator.load_image(i)
annotations = generator.load_annotations(i)
if len(annotations['labels']) > 0 :
# apply random transformations
if args.random_transform:
image, annotations = generator.random_transform_group_entry(image, annotations)
image, annotations = generator.random_visual_effect_group_entry(image, annotations)
# resize the image and annotations
if args.resize:
image, image_scale = generator.resize_image(image)
annotations['bboxes'] *= image_scale
anchors = anchors_for_shape(image.shape, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
positive_indices, _, max_indices = compute_gt_annotations(anchors, annotations['bboxes'])
# draw anchors on the image
if args.anchors:
draw_boxes(image, anchors[positive_indices], (255, 255, 0), thickness=1)
# draw annotations on the image
if args.show_annotations:
# draw annotations in red
draw_annotations(image, annotations, color=(0, 0, 255), label_to_name=generator.label_to_name)
# draw regressed anchors in green to override most red annotations
# result is that annotations without anchors are red, with anchors are green
draw_boxes(image, annotations['bboxes'][max_indices[positive_indices], :], (0, 255, 0))
# display name on the image
if args.display_name:
draw_caption(image, [0, image.shape[0]], os.path.basename(generator.image_path(i)))
# write to file and advance if no-gui selected
if args.no_gui:
output_path = make_output_path(args.output_dir, generator.image_path(i), flatten=args.flatten_output)
os.makedirs(os.path.dirname(output_path), exist_ok=True)
cv2.imwrite(output_path, image)
i += 1
if i == generator.size(): # have written all images
break
else:
continue
# if we are using the GUI, then show an image
cv2.imshow('Image', image)
key = cv2.waitKeyEx()
# press right for next image and left for previous (linux or windows, doesn't work for macOS)
# if you run macOS, press "n" or "m" (will also work on linux and windows)
if key in rightkeys:
i = (i + 1) % generator.size()
if key in leftkeys:
i -= 1
if i < 0:
i = generator.size() - 1
# press q or Esc to quit
if (key == ord('q')) or (key == 27):
return False
return True
def make_output_path(output_dir, image_path, flatten = False):
""" Compute the output path for a debug image. """
# If the output hierarchy is flattened to a single folder, throw away all leading folders.
if flatten:
path = os.path.basename(image_path)
# Otherwise, make sure absolute paths are taken relative to the filesystem root.
else:
# Make sure to drop drive letters on Windows, otherwise relpath wil fail.
_, path = os.path.splitdrive(image_path)
if os.path.isabs(path):
path = os.path.relpath(path, '/')
# In all cases, append "_debug" to the filename, before the extension.
base, extension = os.path.splitext(path)
path = base + "_debug" + extension
# Finally, join the whole thing to the output directory.
return os.path.join(output_dir, path)
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# make sure tensorflow is the minimum required version
check_tf_version()
# create the generator
generator = create_generator(args)
# optionally load config parameters
if args.config:
args.config = read_config_file(args.config)
# optionally load anchor parameters
anchor_params = None
if args.config and 'anchor_parameters' in args.config:
anchor_params = parse_anchor_parameters(args.config)
pyramid_levels = None
if args.config and 'pyramid_levels' in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
# create the display window if necessary
if not args.no_gui:
cv2.namedWindow('Image', cv2.WINDOW_NORMAL)
run(generator, args, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
if __name__ == '__main__':
main()
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..'))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import models
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..utils.anchors import make_shapes_callback
from ..utils.config import read_config_file, parse_anchor_parameters, parse_pyramid_levels
from ..utils.eval import evaluate
from ..utils.gpu import setup_gpu
from ..utils.tf_version import check_tf_version
def create_generator(args, preprocess_image):
""" Create generators for evaluation.
"""
common_args = {
'config' : args.config,
'image_min_side' : args.image_min_side,
'image_max_side' : args.image_max_side,
'no_resize' : args.no_resize,
'preprocess_image' : preprocess_image,
'group_method' : args.group_method
}
if args.dataset_type == 'coco':
# import here to prevent unnecessary dependency on cocoapi
from ..preprocessing.coco import CocoGenerator
validation_generator = CocoGenerator(
args.coco_path,
'val2017',
shuffle_groups=False,
**common_args
)
elif args.dataset_type == 'pascal':
validation_generator = PascalVocGenerator(
args.pascal_path,
'test',
image_extension=args.image_extension,
shuffle_groups=False,
**common_args
)
elif args.dataset_type == 'csv':
validation_generator = CSVGenerator(
args.annotations,
args.classes,
shuffle_groups=False,
**common_args
)
else:
raise ValueError('Invalid data type received: {}'.format(args.dataset_type))
return validation_generator
def parse_args(args):
""" Parse the arguments.
"""
parser = argparse.ArgumentParser(description='Evaluation script for a RetinaNet network.')
subparsers = parser.add_subparsers(help='Arguments for specific dataset types.', dest='dataset_type')
subparsers.required = True
coco_parser = subparsers.add_parser('coco')
coco_parser.add_argument('coco_path', help='Path to dataset directory (ie. /tmp/COCO).')
pascal_parser = subparsers.add_parser('pascal')
pascal_parser.add_argument('pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).')
pascal_parser.add_argument('--image-extension', help='Declares the dataset images\' extension.', default='.jpg')
csv_parser = subparsers.add_parser('csv')
csv_parser.add_argument('annotations', help='Path to CSV file containing annotations for evaluation.')
csv_parser.add_argument('classes', help='Path to a CSV file containing class label mapping.')
parser.add_argument('model', help='Path to RetinaNet model.')
parser.add_argument('--convert-model', help='Convert the model to an inference model (ie. the input is a training model).', action='store_true')
parser.add_argument('--backbone', help='The backbone of the model.', default='resnet50')
parser.add_argument('--gpu', help='Id of the GPU to use (as reported by nvidia-smi).')
parser.add_argument('--score-threshold', help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float)
parser.add_argument('--iou-threshold', help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float)
parser.add_argument('--max-detections', help='Max Detections per image (defaults to 100).', default=100, type=int)
parser.add_argument('--save-path', help='Path for saving images with detections (doesn\'t work for COCO).')
parser.add_argument('--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800)
parser.add_argument('--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333)
parser.add_argument('--no-resize', help='Don''t rescale the image.', action='store_true')
parser.add_argument('--config', help='Path to a configuration parameters .ini file (only used with --convert-model).')
parser.add_argument('--group-method', help='Determines how images are grouped together', type=str, default='ratio', choices=['none', 'random', 'ratio'])
return parser.parse_args(args)
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# make sure tensorflow is the minimum required version
check_tf_version()
# optionally choose specific GPU
if args.gpu:
setup_gpu(args.gpu)
# make save path if it doesn't exist
if args.save_path is not None and not os.path.exists(args.save_path):
os.makedirs(args.save_path)
# optionally load config parameters
if args.config:
args.config = read_config_file(args.config)
# create the generator
backbone = models.backbone(args.backbone)
generator = create_generator(args, backbone.preprocess_image)
# optionally load anchor parameters
anchor_params = None
pyramid_levels = None
if args.config and 'anchor_parameters' in args.config:
anchor_params = parse_anchor_parameters(args.config)
if args.config and 'pyramid_levels' in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
# load the model
print('Loading model, this may take a second...')
model = models.load_model(args.model, backbone_name=args.backbone)
generator.compute_shapes = make_shapes_callback(model)
# optionally convert the model
if args.convert_model:
model = models.convert_model(model, anchor_params=anchor_params, pyramid_levels=pyramid_levels)
# print model summary
# print(model.summary())
# start evaluation
if args.dataset_type == 'coco':
from ..utils.coco_eval import evaluate_coco
evaluate_coco(generator, model, args.score_threshold)
else:
average_precisions, inference_time = evaluate(
generator,
model,
iou_threshold=args.iou_threshold,
score_threshold=args.score_threshold,
max_detections=args.max_detections,
save_path=args.save_path
)
# print evaluation
total_instances = []
precisions = []
for label, (average_precision, num_annotations) in average_precisions.items():
print('{:.0f} instances of class'.format(num_annotations),
generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)
if sum(total_instances) == 0:
print('No test instances found.')
return
print('Inference time for {:.0f} images: {:.4f}'.format(generator.size(), inference_time))
print('mAP using the weighted average of precisions among classes: {:.4f}'.format(sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)))
print('mAP: {:.4f}'.format(sum(precisions) / sum(x > 0 for x in total_instances)))
if __name__ == '__main__':
main()
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
import warnings
from tensorflow import keras
import tensorflow as tf
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import layers # noqa: F401
from .. import losses
from .. import models
from ..callbacks import RedirectModel
from ..callbacks.eval import Evaluate
from ..models.retinanet import retinanet_bbox
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.kitti import KittiGenerator
from ..preprocessing.open_images import OpenImagesGenerator
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..utils.anchors import make_shapes_callback
from ..utils.config import (
read_config_file,
parse_anchor_parameters,
parse_pyramid_levels,
)
from ..utils.gpu import setup_gpu
from ..utils.image import random_visual_effect_generator
from ..utils.model import freeze as freeze_model
from ..utils.tf_version import check_tf_version
from ..utils.transform import random_transform_generator
#######################
from ..models import submodel
def makedirs(path):
# Intended behavior: try to create the directory,
# pass if the directory exists already, fails otherwise.
# Meant for Python 2.7/3.n compatibility.
try:
os.makedirs(path)
except OSError:
if not os.path.isdir(path):
raise
def model_with_weights(model, weights, skip_mismatch):
"""Load weights for model.
Args
model : The model to load weights for.
weights : The weights to load.
skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model.
"""
if weights is not None:
model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)
return model
def create_models(
backbone_retinanet,
num_classes,
weights,
multi_gpu=0,
freeze_backbone=False,
lr=1e-5,
optimizer_clipnorm=0.001,
config=None,
submodels=None,
):
"""Creates three models (model, training_model, prediction_model).
Args
backbone_retinanet : A function to call to create a retinanet model with a given backbone.
num_classes : The number of classes to train.
weights : The weights to load into the model.
multi_gpu : The number of GPUs to use for training.
freeze_backbone : If True, disables learning for the backbone.
config : Config parameters, None indicates the default configuration.
Returns
model : The base model. This is also the model that is saved in snapshots.
training_model : The training model. If multi_gpu=0, this is identical to model.
prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
"""
modifier = freeze_model if freeze_backbone else None
# load anchor parameters, or pass None (so that defaults will be used)
anchor_params = None
num_anchors = None
pyramid_levels = None
if config and "anchor_parameters" in config:
anchor_params = parse_anchor_parameters(config)
num_anchors = anchor_params.num_anchors()
if config and "pyramid_levels" in config:
pyramid_levels = parse_pyramid_levels(config)
# Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
# optionally wrap in a parallel model
if multi_gpu > 1:
from keras.utils import multi_gpu_model
with tf.device("/cpu:0"):
model = model_with_weights(
backbone_retinanet(
num_classes,
num_anchors=num_anchors,
modifier=modifier,
pyramid_levels=pyramid_levels,
),
weights=weights,
skip_mismatch=True,
)
training_model = multi_gpu_model(model, gpus=multi_gpu)
else:
model = model_with_weights(
backbone_retinanet(
num_classes,
num_anchors=num_anchors,
modifier=modifier,
pyramid_levels=pyramid_levels,
submodels=submodels,
),
weights=weights,
skip_mismatch=True,
)
training_model = model
# make prediction model
prediction_model = retinanet_bbox(
model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels
)
# compile model
training_model.compile(
loss={"regression": losses.smooth_l1(), "classification": losses.focal()},
optimizer=keras.optimizers.Adam(lr=lr, clipnorm=optimizer_clipnorm),
)
return model, training_model, prediction_model
def create_callbacks(
model, training_model, prediction_model, validation_generator, args
):
"""Creates the callbacks to use during training.
Args
model: The base model.
training_model: The model that is used for training.
prediction_model: The model that should be used for validation.
validation_generator: The generator for creating validation data.
args: parseargs args object.
Returns:
A list of callbacks used for training.
"""
callbacks = []
tensorboard_callback = None
if args.tensorboard_dir:
makedirs(args.tensorboard_dir)
update_freq = args.tensorboard_freq
if update_freq not in ["epoch", "batch"]:
update_freq = int(update_freq)
tensorboard_callback = keras.callbacks.TensorBoard(
log_dir=args.tensorboard_dir,
histogram_freq=0,
batch_size=args.batch_size,
write_graph=True,
write_grads=False,
write_images=False,
update_freq=update_freq,
embeddings_freq=0,
embeddings_layer_names=None,
embeddings_metadata=None,
)
if args.evaluation and validation_generator:
if args.dataset_type == "coco":
from ..callbacks.coco import CocoEval
# use prediction model for evaluation
evaluation = CocoEval(
validation_generator, tensorboard=tensorboard_callback
)
else:
evaluation = Evaluate(
validation_generator,
tensorboard=tensorboard_callback,
weighted_average=args.weighted_average,
)
evaluation = RedirectModel(evaluation, prediction_model)
callbacks.append(evaluation)
# save the model
if args.snapshots:
# ensure directory created first; otherwise h5py will error after epoch.
makedirs(args.snapshot_path)
checkpoint = keras.callbacks.ModelCheckpoint(
os.path.join(
args.snapshot_path,
"{backbone}_{dataset_type}_{{epoch:02d}}.h5".format(
backbone=args.backbone, dataset_type=args.dataset_type
),
),
verbose=1,
# save_best_only=True,
# monitor="mAP",
# mode='max'
)
checkpoint = RedirectModel(checkpoint, model)
callbacks.append(checkpoint)
callbacks.append(
keras.callbacks.ReduceLROnPlateau(
monitor="loss",
factor=args.reduce_lr_factor,
patience=args.reduce_lr_patience,
verbose=1,
mode="auto",
min_delta=0.0001,
cooldown=0,
min_lr=0,
)
)
if args.evaluation and validation_generator:
callbacks.append(
keras.callbacks.EarlyStopping(
monitor="mAP", patience=5, mode="max", min_delta=0.01
)
)
if args.tensorboard_dir:
callbacks.append(tensorboard_callback)
return callbacks
def create_generators(args, preprocess_image):
"""Create generators for training and validation.
Args
args : parseargs object containing configuration for generators.
preprocess_image : Function that preprocesses an image for the network.
"""
common_args = {
"batch_size": args.batch_size,
"config": args.config,
"image_min_side": args.image_min_side,
"image_max_side": args.image_max_side,
"no_resize": args.no_resize,
"preprocess_image": preprocess_image,
"group_method": args.group_method,
}
# create random transform generator for augmenting training data
if args.random_transform:
transform_generator = random_transform_generator(
min_rotation=-0.1,
max_rotation=0.1,
min_translation=(-0.1, -0.1),
max_translation=(0.1, 0.1),
min_shear=-0.1,
max_shear=0.1,
min_scaling=(0.9, 0.9),
max_scaling=(1.1, 1.1),
flip_x_chance=0.5,
flip_y_chance=0.5,
)
visual_effect_generator = random_visual_effect_generator(
contrast_range=(0.9, 1.1),
brightness_range=(-0.1, 0.1),
hue_range=(-0.05, 0.05),
saturation_range=(0.95, 1.05),
)
else:
transform_generator = random_transform_generator(flip_x_chance=0.5)
visual_effect_generator = None
if args.dataset_type == "coco":
# import here to prevent unnecessary dependency on cocoapi
from ..preprocessing.coco import CocoGenerator
train_generator = CocoGenerator(
args.coco_path,
"train2017",
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = CocoGenerator(
args.coco_path, "val2017", shuffle_groups=False, **common_args
)
elif args.dataset_type == "pascal":
train_generator = PascalVocGenerator(
args.pascal_path,
"train",
image_extension=args.image_extension,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = PascalVocGenerator(
args.pascal_path,
"val",
image_extension=args.image_extension,
shuffle_groups=False,
**common_args
)
elif args.dataset_type == "csv":
train_generator = CSVGenerator(
args.annotations,
args.classes,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
if args.val_annotations:
validation_generator = CSVGenerator(
args.val_annotations, args.classes, shuffle_groups=False, **common_args
)
else:
validation_generator = None
elif args.dataset_type == "oid":
train_generator = OpenImagesGenerator(
args.main_dir,
subset="train",
version=args.version,
labels_filter=args.labels_filter,
annotation_cache_dir=args.annotation_cache_dir,
parent_label=args.parent_label,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = OpenImagesGenerator(
args.main_dir,
subset="validation",
version=args.version,
labels_filter=args.labels_filter,
annotation_cache_dir=args.annotation_cache_dir,
parent_label=args.parent_label,
shuffle_groups=False,
**common_args
)
elif args.dataset_type == "kitti":
train_generator = KittiGenerator(
args.kitti_path,
subset="train",
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = KittiGenerator(
args.kitti_path, subset="val", shuffle_groups=False, **common_args
)
else:
raise ValueError("Invalid data type received: {}".format(args.dataset_type))
return train_generator, validation_generator
def check_args(parsed_args):
"""Function to check for inherent contradictions within parsed arguments.
For example, batch_size < num_gpus
Intended to raise errors prior to backend initialisation.
Args
parsed_args: parser.parse_args()
Returns
parsed_args
"""
if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu:
raise ValueError(
"Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(
parsed_args.batch_size, parsed_args.multi_gpu
)
)
if parsed_args.multi_gpu > 1 and parsed_args.snapshot:
raise ValueError(
"Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(
parsed_args.multi_gpu, parsed_args.snapshot
)
)
if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force:
raise ValueError(
"Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue."
)
if "resnet" not in parsed_args.backbone:
warnings.warn(
"Using experimental backbone {}. Only resnet50 has been properly tested.".format(
parsed_args.backbone
)
)
return parsed_args
def parse_args(args):
"""Parse the arguments."""
parser = argparse.ArgumentParser(
description="Simple training script for training a RetinaNet network."
)
subparsers = parser.add_subparsers(
help="Arguments for specific dataset types.", dest="dataset_type"
)
subparsers.required = True
coco_parser = subparsers.add_parser("coco")
coco_parser.add_argument(
"coco_path", help="Path to dataset directory (ie. /tmp/COCO)."
)
pascal_parser = subparsers.add_parser("pascal")
pascal_parser.add_argument(
"pascal_path", help="Path to dataset directory (ie. /tmp/VOCdevkit)."
)
pascal_parser.add_argument(
"--image-extension",
help="Declares the dataset images' extension.",
default=".jpg",
)
kitti_parser = subparsers.add_parser("kitti")
kitti_parser.add_argument(
"kitti_path", help="Path to dataset directory (ie. /tmp/kitti)."
)
def csv_list(string):
return string.split(",")
oid_parser = subparsers.add_parser("oid")
oid_parser.add_argument("main_dir", help="Path to dataset directory.")
oid_parser.add_argument(
"--version", help="The current dataset version is v4.", default="v4"
)
oid_parser.add_argument(
"--labels-filter",
help="A list of labels to filter.",
type=csv_list,
default=None,
)
oid_parser.add_argument(
"--annotation-cache-dir", help="Path to store annotation cache.", default="."
)
oid_parser.add_argument(
"--parent-label", help="Use the hierarchy children of this label.", default=None
)
csv_parser = subparsers.add_parser("csv")
csv_parser.add_argument(
"annotations", help="Path to CSV file containing annotations for training."
)
csv_parser.add_argument(
"classes", help="Path to a CSV file containing class label mapping."
)
csv_parser.add_argument(
"--val-annotations",
help="Path to CSV file containing annotations for validation (optional).",
)
group = parser.add_mutually_exclusive_group()
group.add_argument("--snapshot", help="Resume training from a snapshot.")
group.add_argument(
"--imagenet-weights",
help="Initialize the model with pretrained imagenet weights. This is the default behaviour.",
action="store_const",
const=True,
default=True,
)
group.add_argument(
"--weights", help="Initialize the model with weights from a file."
)
group.add_argument(
"--no-weights",
help="Don't initialize the model with any weights.",
dest="imagenet_weights",
action="store_const",
const=False,
)
parser.add_argument(
"--backbone",
help="Backbone model used by retinanet.",
default="resnet50",
type=str,
)
parser.add_argument(
"--batch-size", help="Size of the batches.", default=1, type=int
)
parser.add_argument(
"--gpu", help="Id of the GPU to use (as reported by nvidia-smi)."
)
parser.add_argument(
"--multi-gpu",
help="Number of GPUs to use for parallel processing.",
type=int,
default=0,
)
parser.add_argument(
"--multi-gpu-force",
help="Extra flag needed to enable (experimental) multi-gpu support.",
action="store_true",
)
parser.add_argument(
"--initial-epoch",
help="Epoch from which to begin the train, useful if resuming from snapshot.",
type=int,
default=0,
)
parser.add_argument(
"--epochs", help="Number of epochs to train.", type=int, default=50
)
parser.add_argument(
"--steps", help="Number of steps per epoch.", type=int, default=10000
)
parser.add_argument("--lr", help="Learning rate.", type=float, default=1e-5)
parser.add_argument(
"--optimizer-clipnorm",
help="Clipnorm parameter for optimizer.",
type=float,
default=0.001,
)
parser.add_argument(
"--snapshot-path",
help="Path to store snapshots of models during training (defaults to './snapshots')",
default="./snapshots",
)
parser.add_argument(
"--tensorboard-dir", help="Log directory for Tensorboard output", default=""
) # default='./logs') => https://github.com/tensorflow/tensorflow/pull/34870
parser.add_argument(
"--tensorboard-freq",
help="Update frequency for Tensorboard output. Values 'epoch', 'batch' or int",
default="epoch",
)
parser.add_argument(
"--no-snapshots",
help="Disable saving snapshots.",
dest="snapshots",
action="store_false",
)
parser.add_argument(
"--no-evaluation",
help="Disable per epoch evaluation.",
dest="evaluation",
action="store_false",
)
parser.add_argument(
"--freeze-backbone",
help="Freeze training of backbone layers.",
action="store_true",
)
parser.add_argument(
"--random-transform",
help="Randomly transform image and annotations.",
action="store_true",
)
parser.add_argument(
"--image-min-side",
help="Rescale the image so the smallest side is min_side.",
type=int,
default=800,
)
parser.add_argument(
"--image-max-side",
help="Rescale the image if the largest side is larger than max_side.",
type=int,
default=1333,
)
parser.add_argument(
"--no-resize", help="Don" "t rescale the image.", action="store_true"
)
parser.add_argument(
"--config", help="Path to a configuration parameters .ini file."
)
parser.add_argument(
"--weighted-average",
help="Compute the mAP using the weighted average of precisions among classes.",
action="store_true",
)
parser.add_argument(
"--compute-val-loss",
help="Compute validation loss during training",
dest="compute_val_loss",
action="store_true",
)
parser.add_argument(
"--reduce-lr-patience",
help="Reduce learning rate after validation loss decreases over reduce_lr_patience epochs",
type=int,
default=2,
)
parser.add_argument(
"--reduce-lr-factor",
help="When learning rate is reduced due to reduce_lr_patience, multiply by reduce_lr_factor",
type=float,
default=0.1,
)
parser.add_argument(
"--group-method",
help="Determines how images are grouped together",
type=str,
default="ratio",
choices=["none", "random", "ratio"],
)
# Fit generator arguments
parser.add_argument(
"--multiprocessing",
help="Use multiprocessing in fit_generator.",
action="store_true",
)
parser.add_argument(
"--workers", help="Number of generator workers.", type=int, default=1
)
parser.add_argument(
"--max-queue-size",
help="Queue length for multiprocessing workers in fit_generator.",
type=int,
default=10,
)
return check_args(parser.parse_args(args))
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# create object that stores backbone information
backbone = models.backbone(args.backbone)
# make sure tensorflow is the minimum required version
check_tf_version()
# optionally choose specific GPU
if args.gpu is not None:
setup_gpu(args.gpu)
# optionally load config parameters
if args.config:
args.config = read_config_file(args.config)
# create the generators
train_generator, validation_generator = create_generators(
args, backbone.preprocess_image
)
# create the model
if args.snapshot is not None:
print("Loading model, this may take a second...")
model = models.load_model(args.snapshot, backbone_name=args.backbone)
training_model = model
anchor_params = None
pyramid_levels = None
if args.config and "anchor_parameters" in args.config:
anchor_params = parse_anchor_parameters(args.config)
if args.config and "pyramid_levels" in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
prediction_model = retinanet_bbox(
model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels
)
else:
weights = args.weights
# default to imagenet if nothing else is specified
if weights is None and args.imagenet_weights:
weights = backbone.download_imagenet()
################
subclass1 = submodel.custom_classification_model(num_classes=51, num_anchors=None, name="classification_submodel1")
subregress1 = submodel.custom_regression_model(num_values=4, num_anchors=None, name="regression_submodel1")
subclass2 = submodel.custom_classification_model(num_classes=10, num_anchors=None, name="classification_submodel2")
subregress2 = submodel.custom_regression_model(num_values=4, num_anchors=None, name="regression_submodel2")
subclass3 = submodel.custom_classification_model(num_classes=16, num_anchors=None, name="classification_submodel3")
subregress3 = submodel.custom_regression_model(num_values=4, num_anchors=None, name="regression_submodel3")
submodels = [
("regression", subregress1), ("classification", subclass1),
("regression", subregress2), ("classification", subclass2),
("regression", subregress3), ("classification", subclass3),
]
s1 = submodel.custom_default_submodels(51, None)
s2 = submodel.custom_default_submodels(10, None)
s3 = submodel.custom_default_submodels(16, None)
submodels = s1 + s2 + s3
#################
print("Creating model, this may take a second...")
model, training_model, prediction_model = create_models(
backbone_retinanet=backbone.retinanet,
num_classes=train_generator.num_classes(),
weights=weights,
multi_gpu=args.multi_gpu,
freeze_backbone=args.freeze_backbone,
lr=args.lr,
optimizer_clipnorm=args.optimizer_clipnorm,
config=args.config,
submodels=submodels,
)
# print model summary
print(model.summary())
# this lets the generator compute backbone layer shapes using the actual backbone model
if "vgg" in args.backbone or "densenet" in args.backbone:
train_generator.compute_shapes = make_shapes_callback(model)
if validation_generator:
validation_generator.compute_shapes = train_generator.compute_shapes
# create the callbacks
callbacks = create_callbacks(
model,
training_model,
prediction_model,
validation_generator,
args,
)
if not args.compute_val_loss:
validation_generator = None
# start training
return training_model.fit_generator(
generator=train_generator,
steps_per_epoch=args.steps,
epochs=args.epochs,
verbose=1,
callbacks=callbacks,
workers=args.workers,
use_multiprocessing=args.multiprocessing,
max_queue_size=args.max_queue_size,
validation_data=validation_generator,
initial_epoch=args.initial_epoch,
)
if __name__ == "__main__":
main()
#!/usr/bin/env python
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import argparse
import os
import sys
import warnings
from tensorflow import keras
import tensorflow as tf
from ../models import submodel
# Allow relative imports when being executed as script.
if __name__ == "__main__" and __package__ is None:
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
import keras_retinanet.bin # noqa: F401
__package__ = "keras_retinanet.bin"
# Change these to absolute imports if you copy this script outside the keras_retinanet package.
from .. import layers # noqa: F401
from .. import losses
from .. import models
from ..callbacks import RedirectModel
from ..callbacks.eval import Evaluate
from ..models.retinanet import retinanet_bbox
from ..preprocessing.csv_generator import CSVGenerator
from ..preprocessing.kitti import KittiGenerator
from ..preprocessing.open_images import OpenImagesGenerator
from ..preprocessing.pascal_voc import PascalVocGenerator
from ..utils.anchors import make_shapes_callback
from ..utils.config import (
read_config_file,
parse_anchor_parameters,
parse_pyramid_levels,
)
from ..utils.gpu import setup_gpu
from ..utils.image import random_visual_effect_generator
from ..utils.model import freeze as freeze_model
from ..utils.tf_version import check_tf_version
from ..utils.transform import random_transform_generator
#######################
from ..models import submodel
def makedirs(path):
# Intended behavior: try to create the directory,
# pass if the directory exists already, fails otherwise.
# Meant for Python 2.7/3.n compatibility.
try:
os.makedirs(path)
except OSError:
if not os.path.isdir(path):
raise
def model_with_weights(model, weights, skip_mismatch):
"""Load weights for model.
Args
model : The model to load weights for.
weights : The weights to load.
skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model.
"""
if weights is not None:
model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)
return model
def create_models(
backbone_retinanet,
num_classes,
weights,
multi_gpu=0,
freeze_backbone=False,
lr=1e-5,
optimizer_clipnorm=0.001,
config=None,
submodels=None,
):
"""Creates three models (model, training_model, prediction_model).
Args
backbone_retinanet : A function to call to create a retinanet model with a given backbone.
num_classes : The number of classes to train.
weights : The weights to load into the model.
multi_gpu : The number of GPUs to use for training.
freeze_backbone : If True, disables learning for the backbone.
config : Config parameters, None indicates the default configuration.
Returns
model : The base model. This is also the model that is saved in snapshots.
training_model : The training model. If multi_gpu=0, this is identical to model.
prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS).
"""
modifier = freeze_model if freeze_backbone else None
# load anchor parameters, or pass None (so that defaults will be used)
anchor_params = None
num_anchors = None
pyramid_levels = None
if config and "anchor_parameters" in config:
anchor_params = parse_anchor_parameters(config)
num_anchors = anchor_params.num_anchors()
if config and "pyramid_levels" in config:
pyramid_levels = parse_pyramid_levels(config)
# Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors.
# optionally wrap in a parallel model
if multi_gpu > 1:
from keras.utils import multi_gpu_model
with tf.device("/cpu:0"):
model = model_with_weights(
backbone_retinanet(
num_classes,
num_anchors=num_anchors,
modifier=modifier,
pyramid_levels=pyramid_levels,
),
weights=weights,
skip_mismatch=True,
)
training_model = multi_gpu_model(model, gpus=multi_gpu)
else:
model = model_with_weights(
backbone_retinanet(
num_classes,
num_anchors=num_anchors,
modifier=modifier,
pyramid_levels=pyramid_levels,
submodels=submodels,
),
weights=weights,
skip_mismatch=True,
)
training_model = model
# make prediction model
prediction_model = retinanet_bbox(
model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels
)
# compile model
training_model.compile(
loss={"regression": losses.smooth_l1(), "classification": losses.focal()},
optimizer=keras.optimizers.Adam(lr=lr, clipnorm=optimizer_clipnorm),
)
return model, training_model, prediction_model
def create_callbacks(
model, training_model, prediction_model, validation_generator, args
):
"""Creates the callbacks to use during training.
Args
model: The base model.
training_model: The model that is used for training.
prediction_model: The model that should be used for validation.
validation_generator: The generator for creating validation data.
args: parseargs args object.
Returns:
A list of callbacks used for training.
"""
callbacks = []
tensorboard_callback = None
if args.tensorboard_dir:
makedirs(args.tensorboard_dir)
update_freq = args.tensorboard_freq
if update_freq not in ["epoch", "batch"]:
update_freq = int(update_freq)
tensorboard_callback = keras.callbacks.TensorBoard(
log_dir=args.tensorboard_dir,
histogram_freq=0,
batch_size=args.batch_size,
write_graph=True,
write_grads=False,
write_images=False,
update_freq=update_freq,
embeddings_freq=0,
embeddings_layer_names=None,
embeddings_metadata=None,
)
if args.evaluation and validation_generator:
if args.dataset_type == "coco":
from ..callbacks.coco import CocoEval
# use prediction model for evaluation
evaluation = CocoEval(
validation_generator, tensorboard=tensorboard_callback
)
else:
evaluation = Evaluate(
validation_generator,
tensorboard=tensorboard_callback,
weighted_average=args.weighted_average,
)
evaluation = RedirectModel(evaluation, prediction_model)
callbacks.append(evaluation)
# save the model
if args.snapshots:
# ensure directory created first; otherwise h5py will error after epoch.
makedirs(args.snapshot_path)
checkpoint = keras.callbacks.ModelCheckpoint(
os.path.join(
args.snapshot_path,
"{backbone}_{dataset_type}_{{epoch:02d}}.h5".format(
backbone=args.backbone, dataset_type=args.dataset_type
),
),
verbose=1,
# save_best_only=True,
# monitor="mAP",
# mode='max'
)
checkpoint = RedirectModel(checkpoint, model)
callbacks.append(checkpoint)
callbacks.append(
keras.callbacks.ReduceLROnPlateau(
monitor="loss",
factor=args.reduce_lr_factor,
patience=args.reduce_lr_patience,
verbose=1,
mode="auto",
min_delta=0.0001,
cooldown=0,
min_lr=0,
)
)
if args.evaluation and validation_generator:
callbacks.append(
keras.callbacks.EarlyStopping(
monitor="mAP", patience=5, mode="max", min_delta=0.01
)
)
if args.tensorboard_dir:
callbacks.append(tensorboard_callback)
return callbacks
def create_generators(args, preprocess_image):
"""Create generators for training and validation.
Args
args : parseargs object containing configuration for generators.
preprocess_image : Function that preprocesses an image for the network.
"""
common_args = {
"batch_size": args.batch_size,
"config": args.config,
"image_min_side": args.image_min_side,
"image_max_side": args.image_max_side,
"no_resize": args.no_resize,
"preprocess_image": preprocess_image,
"group_method": args.group_method,
}
# create random transform generator for augmenting training data
if args.random_transform:
transform_generator = random_transform_generator(
min_rotation=-0.1,
max_rotation=0.1,
min_translation=(-0.1, -0.1),
max_translation=(0.1, 0.1),
min_shear=-0.1,
max_shear=0.1,
min_scaling=(0.9, 0.9),
max_scaling=(1.1, 1.1),
flip_x_chance=0.5,
flip_y_chance=0.5,
)
visual_effect_generator = random_visual_effect_generator(
contrast_range=(0.9, 1.1),
brightness_range=(-0.1, 0.1),
hue_range=(-0.05, 0.05),
saturation_range=(0.95, 1.05),
)
else:
transform_generator = random_transform_generator(flip_x_chance=0.5)
visual_effect_generator = None
if args.dataset_type == "coco":
# import here to prevent unnecessary dependency on cocoapi
from ..preprocessing.coco import CocoGenerator
train_generator = CocoGenerator(
args.coco_path,
"train2017",
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = CocoGenerator(
args.coco_path, "val2017", shuffle_groups=False, **common_args
)
elif args.dataset_type == "pascal":
train_generator = PascalVocGenerator(
args.pascal_path,
"train",
image_extension=args.image_extension,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = PascalVocGenerator(
args.pascal_path,
"val",
image_extension=args.image_extension,
shuffle_groups=False,
**common_args
)
elif args.dataset_type == "csv":
train_generator = CSVGenerator(
args.annotations,
args.classes,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
if args.val_annotations:
validation_generator = CSVGenerator(
args.val_annotations, args.classes, shuffle_groups=False, **common_args
)
else:
validation_generator = None
elif args.dataset_type == "oid":
train_generator = OpenImagesGenerator(
args.main_dir,
subset="train",
version=args.version,
labels_filter=args.labels_filter,
annotation_cache_dir=args.annotation_cache_dir,
parent_label=args.parent_label,
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = OpenImagesGenerator(
args.main_dir,
subset="validation",
version=args.version,
labels_filter=args.labels_filter,
annotation_cache_dir=args.annotation_cache_dir,
parent_label=args.parent_label,
shuffle_groups=False,
**common_args
)
elif args.dataset_type == "kitti":
train_generator = KittiGenerator(
args.kitti_path,
subset="train",
transform_generator=transform_generator,
visual_effect_generator=visual_effect_generator,
**common_args
)
validation_generator = KittiGenerator(
args.kitti_path, subset="val", shuffle_groups=False, **common_args
)
else:
raise ValueError("Invalid data type received: {}".format(args.dataset_type))
return train_generator, validation_generator
def check_args(parsed_args):
"""Function to check for inherent contradictions within parsed arguments.
For example, batch_size < num_gpus
Intended to raise errors prior to backend initialisation.
Args
parsed_args: parser.parse_args()
Returns
parsed_args
"""
if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu:
raise ValueError(
"Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(
parsed_args.batch_size, parsed_args.multi_gpu
)
)
if parsed_args.multi_gpu > 1 and parsed_args.snapshot:
raise ValueError(
"Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(
parsed_args.multi_gpu, parsed_args.snapshot
)
)
if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force:
raise ValueError(
"Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue."
)
if "resnet" not in parsed_args.backbone:
warnings.warn(
"Using experimental backbone {}. Only resnet50 has been properly tested.".format(
parsed_args.backbone
)
)
return parsed_args
def parse_args(args):
"""Parse the arguments."""
parser = argparse.ArgumentParser(
description="Simple training script for training a RetinaNet network."
)
subparsers = parser.add_subparsers(
help="Arguments for specific dataset types.", dest="dataset_type"
)
subparsers.required = True
coco_parser = subparsers.add_parser("coco")
coco_parser.add_argument(
"coco_path", help="Path to dataset directory (ie. /tmp/COCO)."
)
pascal_parser = subparsers.add_parser("pascal")
pascal_parser.add_argument(
"pascal_path", help="Path to dataset directory (ie. /tmp/VOCdevkit)."
)
pascal_parser.add_argument(
"--image-extension",
help="Declares the dataset images' extension.",
default=".jpg",
)
kitti_parser = subparsers.add_parser("kitti")
kitti_parser.add_argument(
"kitti_path", help="Path to dataset directory (ie. /tmp/kitti)."
)
def csv_list(string):
return string.split(",")
oid_parser = subparsers.add_parser("oid")
oid_parser.add_argument("main_dir", help="Path to dataset directory.")
oid_parser.add_argument(
"--version", help="The current dataset version is v4.", default="v4"
)
oid_parser.add_argument(
"--labels-filter",
help="A list of labels to filter.",
type=csv_list,
default=None,
)
oid_parser.add_argument(
"--annotation-cache-dir", help="Path to store annotation cache.", default="."
)
oid_parser.add_argument(
"--parent-label", help="Use the hierarchy children of this label.", default=None
)
csv_parser = subparsers.add_parser("csv")
csv_parser.add_argument(
"annotations", help="Path to CSV file containing annotations for training."
)
csv_parser.add_argument(
"classes", help="Path to a CSV file containing class label mapping."
)
csv_parser.add_argument(
"--val-annotations",
help="Path to CSV file containing annotations for validation (optional).",
)
group = parser.add_mutually_exclusive_group()
group.add_argument("--snapshot", help="Resume training from a snapshot.")
group.add_argument(
"--imagenet-weights",
help="Initialize the model with pretrained imagenet weights. This is the default behaviour.",
action="store_const",
const=True,
default=True,
)
group.add_argument(
"--weights", help="Initialize the model with weights from a file."
)
group.add_argument(
"--no-weights",
help="Don't initialize the model with any weights.",
dest="imagenet_weights",
action="store_const",
const=False,
)
parser.add_argument(
"--backbone",
help="Backbone model used by retinanet.",
default="resnet50",
type=str,
)
parser.add_argument(
"--batch-size", help="Size of the batches.", default=1, type=int
)
parser.add_argument(
"--gpu", help="Id of the GPU to use (as reported by nvidia-smi)."
)
parser.add_argument(
"--multi-gpu",
help="Number of GPUs to use for parallel processing.",
type=int,
default=0,
)
parser.add_argument(
"--multi-gpu-force",
help="Extra flag needed to enable (experimental) multi-gpu support.",
action="store_true",
)
parser.add_argument(
"--initial-epoch",
help="Epoch from which to begin the train, useful if resuming from snapshot.",
type=int,
default=0,
)
parser.add_argument(
"--epochs", help="Number of epochs to train.", type=int, default=50
)
parser.add_argument(
"--steps", help="Number of steps per epoch.", type=int, default=10000
)
parser.add_argument("--lr", help="Learning rate.", type=float, default=1e-5)
parser.add_argument(
"--optimizer-clipnorm",
help="Clipnorm parameter for optimizer.",
type=float,
default=0.001,
)
parser.add_argument(
"--snapshot-path",
help="Path to store snapshots of models during training (defaults to './snapshots')",
default="./snapshots",
)
parser.add_argument(
"--tensorboard-dir", help="Log directory for Tensorboard output", default=""
) # default='./logs') => https://github.com/tensorflow/tensorflow/pull/34870
parser.add_argument(
"--tensorboard-freq",
help="Update frequency for Tensorboard output. Values 'epoch', 'batch' or int",
default="epoch",
)
parser.add_argument(
"--no-snapshots",
help="Disable saving snapshots.",
dest="snapshots",
action="store_false",
)
parser.add_argument(
"--no-evaluation",
help="Disable per epoch evaluation.",
dest="evaluation",
action="store_false",
)
parser.add_argument(
"--freeze-backbone",
help="Freeze training of backbone layers.",
action="store_true",
)
parser.add_argument(
"--random-transform",
help="Randomly transform image and annotations.",
action="store_true",
)
parser.add_argument(
"--image-min-side",
help="Rescale the image so the smallest side is min_side.",
type=int,
default=800,
)
parser.add_argument(
"--image-max-side",
help="Rescale the image if the largest side is larger than max_side.",
type=int,
default=1333,
)
parser.add_argument(
"--no-resize", help="Don" "t rescale the image.", action="store_true"
)
parser.add_argument(
"--config", help="Path to a configuration parameters .ini file."
)
parser.add_argument(
"--weighted-average",
help="Compute the mAP using the weighted average of precisions among classes.",
action="store_true",
)
parser.add_argument(
"--compute-val-loss",
help="Compute validation loss during training",
dest="compute_val_loss",
action="store_true",
)
parser.add_argument(
"--reduce-lr-patience",
help="Reduce learning rate after validation loss decreases over reduce_lr_patience epochs",
type=int,
default=2,
)
parser.add_argument(
"--reduce-lr-factor",
help="When learning rate is reduced due to reduce_lr_patience, multiply by reduce_lr_factor",
type=float,
default=0.1,
)
parser.add_argument(
"--group-method",
help="Determines how images are grouped together",
type=str,
default="ratio",
choices=["none", "random", "ratio"],
)
# Fit generator arguments
parser.add_argument(
"--multiprocessing",
help="Use multiprocessing in fit_generator.",
action="store_true",
)
parser.add_argument(
"--workers", help="Number of generator workers.", type=int, default=1
)
parser.add_argument(
"--max-queue-size",
help="Queue length for multiprocessing workers in fit_generator.",
type=int,
default=10,
)
return check_args(parser.parse_args(args))
def main(args=None):
# parse arguments
if args is None:
args = sys.argv[1:]
args = parse_args(args)
# create object that stores backbone information
backbone = models.backbone(args.backbone)
# make sure tensorflow is the minimum required version
check_tf_version()
# optionally choose specific GPU
if args.gpu is not None:
setup_gpu(args.gpu)
# optionally load config parameters
if args.config:
args.config = read_config_file(args.config)
# create the generators
train_generator, validation_generator = create_generators(
args, backbone.preprocess_image
)
# create the model
if args.snapshot is not None:
print("Loading model, this may take a second...")
model = models.load_model(args.snapshot, backbone_name=args.backbone)
training_model = model
anchor_params = None
pyramid_levels = None
if args.config and "anchor_parameters" in args.config:
anchor_params = parse_anchor_parameters(args.config)
if args.config and "pyramid_levels" in args.config:
pyramid_levels = parse_pyramid_levels(args.config)
prediction_model = retinanet_bbox(
model=model, anchor_params=anchor_params, pyramid_levels=pyramid_levels
)
else:
weights = args.weights
# default to imagenet if nothing else is specified
if weights is None and args.imagenet_weights:
weights = backbone.download_imagenet()
#################
# subclass1 = submodel.custom_classification_model(num_classes=51, num_anchors=None, name="classification_submodel1")
# subregress1 = submodel.custom_regression_model(num_values=4, num_anchors=None, name="regression_submodel1")
# subclass2 = submodel.custom_classification_model(num_classes=10, num_anchors=None, name="classification_submodel2")
# subregress2 = submodel.custom_regression_model(num_values=4, num_anchors=None, name="regression_submodel2")
# subclass3 = submodel.custom_classification_model(num_classes=16, num_anchors=None, name="classification_submodel3")
# subregress3 = submodel.custom_regression_model(num_values=4, num_anchors=None, name="regression_submodel3")
# submodels = [
# ("regression", subregress1), ("classification", subclass1),
# ("regression", subregress2), ("classification", subclass2),
# ("regression", subregress3), ("classification", subclass3),
# ]
# s1 = submodel.custom_default_submodels(51, None)
# s2 = submodel.custom_default_submodels(10, None)
# s3 = submodel.custom_default_submodels(16, None)
# submodels = s1 + s2 + s3
#################
print("Creating model, this may take a second...")
model, training_model, prediction_model = create_models(
backbone_retinanet=backbone.retinanet,
num_classes=train_generator.num_classes(),
weights=weights,
multi_gpu=args.multi_gpu,
freeze_backbone=args.freeze_backbone,
lr=args.lr,
optimizer_clipnorm=args.optimizer_clipnorm,
config=args.config,
submodels=submodel.custom_classification_model(76,),
)
# print model summary
print(model.summary())
# this lets the generator compute backbone layer shapes using the actual backbone model
if "vgg" in args.backbone or "densenet" in args.backbone:
train_generator.compute_shapes = make_shapes_callback(model)
if validation_generator:
validation_generator.compute_shapes = train_generator.compute_shapes
# create the callbacks
callbacks = create_callbacks(
model,
training_model,
prediction_model,
validation_generator,
args,
)
if not args.compute_val_loss:
validation_generator = None
# start training
return training_model.fit_generator(
generator=train_generator,
steps_per_epoch=args.steps,
epochs=args.epochs,
verbose=1,
callbacks=callbacks,
workers=args.workers,
use_multiprocessing=args.multiprocessing,
max_queue_size=args.max_queue_size,
validation_data=validation_generator,
initial_epoch=args.initial_epoch,
)
if __name__ == "__main__":
main()
from .common import * # noqa: F401,F403
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from ..utils.coco_eval import evaluate_coco
class CocoEval(keras.callbacks.Callback):
""" Performs COCO evaluation on each epoch.
"""
def __init__(self, generator, tensorboard=None, threshold=0.05):
""" CocoEval callback intializer.
Args
generator : The generator used for creating validation data.
tensorboard : If given, the results will be written to tensorboard.
threshold : The score threshold to use.
"""
self.generator = generator
self.threshold = threshold
self.tensorboard = tensorboard
super(CocoEval, self).__init__()
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
'AP @[ IoU=0.50 | area= all | maxDets=100 ]',
'AP @[ IoU=0.75 | area= all | maxDets=100 ]',
'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]',
'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]',
'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]',
'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]']
coco_eval_stats = evaluate_coco(self.generator, self.model, self.threshold)
if coco_eval_stats is not None:
for index, result in enumerate(coco_eval_stats):
logs[coco_tag[index]] = result
if self.tensorboard:
import tensorflow as tf
writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
with writer.as_default():
for index, result in enumerate(coco_eval_stats):
tf.summary.scalar('{}. {}'.format(index + 1, coco_tag[index]), result, step=epoch)
writer.flush()
from tensorflow import keras
class RedirectModel(keras.callbacks.Callback):
"""Callback which wraps another callback, but executed on a different model.
```python
model = keras.models.load_model('model.h5')
model_checkpoint = ModelCheckpoint(filepath='snapshot.h5')
parallel_model = multi_gpu_model(model, gpus=2)
parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)])
```
Args
callback : callback to wrap.
model : model to use when executing callbacks.
"""
def __init__(self,
callback,
model):
super(RedirectModel, self).__init__()
self.callback = callback
self.redirect_model = model
def on_epoch_begin(self, epoch, logs=None):
self.callback.on_epoch_begin(epoch, logs=logs)
def on_epoch_end(self, epoch, logs=None):
self.callback.on_epoch_end(epoch, logs=logs)
def on_batch_begin(self, batch, logs=None):
self.callback.on_batch_begin(batch, logs=logs)
def on_batch_end(self, batch, logs=None):
self.callback.on_batch_end(batch, logs=logs)
def on_train_begin(self, logs=None):
# overwrite the model with our custom model
self.callback.set_model(self.redirect_model)
self.callback.on_train_begin(logs=logs)
def on_train_end(self, logs=None):
self.callback.on_train_end(logs=logs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from ..utils.eval import evaluate
class Evaluate(keras.callbacks.Callback):
""" Evaluation callback for arbitrary datasets.
"""
def __init__(
self,
generator,
iou_threshold=0.5,
score_threshold=0.05,
max_detections=100,
save_path=None,
tensorboard=None,
weighted_average=False,
verbose=1
):
""" Evaluate a given dataset using a given model at the end of every epoch during training.
# Arguments
generator : The generator that represents the dataset to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
score_threshold : The score confidence threshold to use for detections.
max_detections : The maximum number of detections to use per image.
save_path : The path to save images with visualized detections to.
tensorboard : Instance of keras.callbacks.TensorBoard used to log the mAP value.
weighted_average : Compute the mAP using the weighted average of precisions among classes.
verbose : Set the verbosity level, by default this is set to 1.
"""
self.generator = generator
self.iou_threshold = iou_threshold
self.score_threshold = score_threshold
self.max_detections = max_detections
self.save_path = save_path
self.tensorboard = tensorboard
self.weighted_average = weighted_average
self.verbose = verbose
super(Evaluate, self).__init__()
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
# run evaluation
average_precisions, _ = evaluate(
self.generator,
self.model,
iou_threshold=self.iou_threshold,
score_threshold=self.score_threshold,
max_detections=self.max_detections,
save_path=self.save_path
)
# compute per class average precision
total_instances = []
precisions = []
for label, (average_precision, num_annotations) in average_precisions.items():
if self.verbose == 1:
print('{:.0f} instances of class'.format(num_annotations),
self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision))
total_instances.append(num_annotations)
precisions.append(average_precision)
if self.weighted_average:
self.mean_ap = sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances)
else:
self.mean_ap = sum(precisions) / sum(x > 0 for x in total_instances)
if self.tensorboard:
import tensorflow as tf
writer = tf.summary.create_file_writer(self.tensorboard.log_dir)
with writer.as_default():
tf.summary.scalar("mAP", self.mean_ap, step=epoch)
if self.verbose == 1:
for label, (average_precision, num_annotations) in average_precisions.items():
tf.summary.scalar("AP_" + self.generator.label_to_name(label), average_precision, step=epoch)
writer.flush()
logs['mAP'] = self.mean_ap
if self.verbose == 1:
print('mAP: {:.4f}'.format(self.mean_ap))
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import math
class PriorProbability(keras.initializers.Initializer):
""" Apply a prior probability to the weights.
"""
def __init__(self, probability=0.01):
self.probability = probability
def get_config(self):
return {
'probability': self.probability
}
def __call__(self, shape, dtype=None):
# set bias to -log((1 - p)/p) for foreground
result = keras.backend.ones(shape, dtype=dtype) * -math.log((1 - self.probability) / self.probability)
return result
from ._misc import RegressBoxes, UpsampleLike, Anchors, ClipBoxes # noqa: F401
from .filter_detections import FilterDetections # noqa: F401
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
from .. import backend
from ..utils import anchors as utils_anchors
import numpy as np
class Anchors(keras.layers.Layer):
""" Keras layer for generating achors for a given shape.
"""
def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs):
""" Initializer for an Anchors layer.
Args
size: The base size of the anchors to generate.
stride: The stride of the anchors to generate.
ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios).
scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales).
"""
self.size = size
self.stride = stride
self.ratios = ratios
self.scales = scales
if ratios is None:
self.ratios = utils_anchors.AnchorParameters.default.ratios
elif isinstance(ratios, list):
self.ratios = np.array(ratios)
if scales is None:
self.scales = utils_anchors.AnchorParameters.default.scales
elif isinstance(scales, list):
self.scales = np.array(scales)
self.num_anchors = len(self.ratios) * len(self.scales)
self.anchors = utils_anchors.generate_anchors(
base_size=self.size,
ratios=self.ratios,
scales=self.scales,
).astype(np.float32)
super(Anchors, self).__init__(*args, **kwargs)
def call(self, inputs, **kwargs):
features = inputs
features_shape = keras.backend.shape(features)
# generate proposals from bbox deltas and shifted anchors
if keras.backend.image_data_format() == 'channels_first':
anchors = backend.shift(features_shape[2:4], self.stride, self.anchors)
else:
anchors = backend.shift(features_shape[1:3], self.stride, self.anchors)
anchors = keras.backend.tile(keras.backend.expand_dims(anchors, axis=0), (features_shape[0], 1, 1))
return anchors
def compute_output_shape(self, input_shape):
if None not in input_shape[1:]:
if keras.backend.image_data_format() == 'channels_first':
total = np.prod(input_shape[2:4]) * self.num_anchors
else:
total = np.prod(input_shape[1:3]) * self.num_anchors
return (input_shape[0], total, 4)
else:
return (input_shape[0], None, 4)
def get_config(self):
config = super(Anchors, self).get_config()
config.update({
'size' : self.size,
'stride' : self.stride,
'ratios' : self.ratios.tolist(),
'scales' : self.scales.tolist(),
})
return config
class UpsampleLike(keras.layers.Layer):
""" Keras layer for upsampling a Tensor to be the same shape as another Tensor.
"""
def call(self, inputs, **kwargs):
source, target = inputs
target_shape = keras.backend.shape(target)
if keras.backend.image_data_format() == 'channels_first':
source = tensorflow.transpose(source, (0, 2, 3, 1))
output = backend.resize_images(source, (target_shape[2], target_shape[3]), method='nearest')
output = tensorflow.transpose(output, (0, 3, 1, 2))
return output
else:
return backend.resize_images(source, (target_shape[1], target_shape[2]), method='nearest')
def compute_output_shape(self, input_shape):
if keras.backend.image_data_format() == 'channels_first':
return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4]
else:
return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],)
class RegressBoxes(keras.layers.Layer):
""" Keras layer for applying regression values to boxes.
"""
def __init__(self, mean=None, std=None, *args, **kwargs):
""" Initializer for the RegressBoxes layer.
Args
mean: The mean value of the regression values which was used for normalization.
std: The standard value of the regression values which was used for normalization.
"""
if mean is None:
mean = np.array([0, 0, 0, 0])
if std is None:
std = np.array([0.2, 0.2, 0.2, 0.2])
if isinstance(mean, (list, tuple)):
mean = np.array(mean)
elif not isinstance(mean, np.ndarray):
raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))
if isinstance(std, (list, tuple)):
std = np.array(std)
elif not isinstance(std, np.ndarray):
raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))
self.mean = mean
self.std = std
super(RegressBoxes, self).__init__(*args, **kwargs)
def call(self, inputs, **kwargs):
anchors, regression = inputs
return backend.bbox_transform_inv(anchors, regression, mean=self.mean, std=self.std)
def compute_output_shape(self, input_shape):
return input_shape[0]
def get_config(self):
config = super(RegressBoxes, self).get_config()
config.update({
'mean': self.mean.tolist(),
'std' : self.std.tolist(),
})
return config
class ClipBoxes(keras.layers.Layer):
""" Keras layer to clip box values to lie inside a given shape.
"""
def call(self, inputs, **kwargs):
image, boxes = inputs
shape = keras.backend.cast(keras.backend.shape(image), keras.backend.floatx())
if keras.backend.image_data_format() == 'channels_first':
_, _, height, width = tensorflow.unstack(shape, axis=0)
else:
_, height, width, _ = tensorflow.unstack(shape, axis=0)
x1, y1, x2, y2 = tensorflow.unstack(boxes, axis=-1)
x1 = tensorflow.clip_by_value(x1, 0, width - 1)
y1 = tensorflow.clip_by_value(y1, 0, height - 1)
x2 = tensorflow.clip_by_value(x2, 0, width - 1)
y2 = tensorflow.clip_by_value(y2, 0, height - 1)
return keras.backend.stack([x1, y1, x2, y2], axis=2)
def compute_output_shape(self, input_shape):
return input_shape[1]
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
from .. import backend
def filter_detections(
boxes,
classification,
other = [],
class_specific_filter = True,
nms = True,
score_threshold = 0.05,
max_detections = 300,
nms_threshold = 0.5
):
""" Filter detections using the boxes and classification values.
Args
boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format.
classification : Tensor of shape (num_boxes, num_classes) containing the classification scores.
other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores.
class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
nms : Flag to enable/disable non maximum suppression.
score_threshold : Threshold used to prefilter the boxes with.
max_detections : Maximum number of detections to keep.
nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
Returns
A list of [boxes, scores, labels, other[0], other[1], ...].
boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes.
scores is shaped (max_detections,) and contains the scores of the predicted class.
labels is shaped (max_detections,) and contains the predicted label.
other[i] is shaped (max_detections, ...) and contains the filtered other[i] data.
In case there are less than max_detections detections, the tensors are padded with -1's.
"""
def _filter_detections(scores, labels):
# threshold based on score
indices = tensorflow.where(keras.backend.greater(scores, score_threshold))
if nms:
filtered_boxes = tensorflow.gather_nd(boxes, indices)
filtered_scores = keras.backend.gather(scores, indices)[:, 0]
# perform NMS
nms_indices = tensorflow.image.non_max_suppression(filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold)
# filter indices based on NMS
indices = keras.backend.gather(indices, nms_indices)
# add indices to list of all indices
labels = tensorflow.gather_nd(labels, indices)
indices = keras.backend.stack([indices[:, 0], labels], axis=1)
return indices
if class_specific_filter:
all_indices = []
# perform per class filtering
for c in range(int(classification.shape[1])):
scores = classification[:, c]
labels = c * tensorflow.ones((keras.backend.shape(scores)[0],), dtype='int64')
all_indices.append(_filter_detections(scores, labels))
# concatenate indices to single tensor
indices = keras.backend.concatenate(all_indices, axis=0)
else:
scores = keras.backend.max(classification, axis = 1)
labels = keras.backend.argmax(classification, axis = 1)
indices = _filter_detections(scores, labels)
# select top k
scores = tensorflow.gather_nd(classification, indices)
labels = indices[:, 1]
scores, top_indices = tensorflow.nn.top_k(scores, k=keras.backend.minimum(max_detections, keras.backend.shape(scores)[0]))
# filter input using the final set of indices
indices = keras.backend.gather(indices[:, 0], top_indices)
boxes = keras.backend.gather(boxes, indices)
labels = keras.backend.gather(labels, top_indices)
other_ = [keras.backend.gather(o, indices) for o in other]
# zero pad the outputs
pad_size = keras.backend.maximum(0, max_detections - keras.backend.shape(scores)[0])
boxes = tensorflow.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1)
scores = tensorflow.pad(scores, [[0, pad_size]], constant_values=-1)
labels = tensorflow.pad(labels, [[0, pad_size]], constant_values=-1)
labels = keras.backend.cast(labels, 'int32')
other_ = [tensorflow.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_]
# set shapes, since we know what they are
boxes.set_shape([max_detections, 4])
scores.set_shape([max_detections])
labels.set_shape([max_detections])
for o, s in zip(other_, [list(keras.backend.int_shape(o)) for o in other]):
o.set_shape([max_detections] + s[1:])
return [boxes, scores, labels] + other_
class FilterDetections(keras.layers.Layer):
""" Keras layer for filtering detections using score threshold and NMS.
"""
def __init__(
self,
nms = True,
class_specific_filter = True,
nms_threshold = 0.5,
score_threshold = 0.05,
max_detections = 300,
parallel_iterations = 32,
**kwargs
):
""" Filters detections using score threshold, NMS and selecting the top-k detections.
Args
nms : Flag to enable/disable NMS.
class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those.
nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
score_threshold : Threshold used to prefilter the boxes with.
max_detections : Maximum number of detections to keep.
parallel_iterations : Number of batch items to process in parallel.
"""
self.nms = nms
self.class_specific_filter = class_specific_filter
self.nms_threshold = nms_threshold
self.score_threshold = score_threshold
self.max_detections = max_detections
self.parallel_iterations = parallel_iterations
super(FilterDetections, self).__init__(**kwargs)
def call(self, inputs, **kwargs):
""" Constructs the NMS graph.
Args
inputs : List of [boxes, classification, other[0], other[1], ...] tensors.
"""
boxes = inputs[0]
classification = inputs[1]
other = inputs[2:]
# wrap nms with our parameters
def _filter_detections(args):
boxes = args[0]
classification = args[1]
other = args[2]
return filter_detections(
boxes,
classification,
other,
nms = self.nms,
class_specific_filter = self.class_specific_filter,
score_threshold = self.score_threshold,
max_detections = self.max_detections,
nms_threshold = self.nms_threshold,
)
# call filter_detections on each batch
dtypes = [keras.backend.floatx(), keras.backend.floatx(), 'int32'] + [o.dtype for o in other]
shapes = [(self.max_detections, 4), (self.max_detections,), (self.max_detections,)]
shapes.extend([(self.max_detections,) + o.shape[2:] for o in other])
outputs = backend.map_fn(
_filter_detections,
elems=[boxes, classification, other],
dtype=dtypes,
shapes=shapes,
parallel_iterations=self.parallel_iterations,
)
return outputs
def compute_output_shape(self, input_shape):
""" Computes the output shapes given the input shapes.
Args
input_shape : List of input shapes [boxes, classification, other[0], other[1], ...].
Returns
List of tuples representing the output shapes:
[filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...]
"""
return [
(input_shape[0][0], self.max_detections, 4),
(input_shape[1][0], self.max_detections),
(input_shape[1][0], self.max_detections),
] + [
tuple([input_shape[i][0], self.max_detections] + list(input_shape[i][2:])) for i in range(2, len(input_shape))
]
def compute_mask(self, inputs, mask=None):
""" This is required in Keras when there is more than 1 output.
"""
return (len(inputs) + 1) * [None]
def get_config(self):
""" Gets the configuration of this layer.
Returns
Dictionary containing the parameters of this layer.
"""
config = super(FilterDetections, self).get_config()
config.update({
'nms' : self.nms,
'class_specific_filter' : self.class_specific_filter,
'nms_threshold' : self.nms_threshold,
'score_threshold' : self.score_threshold,
'max_detections' : self.max_detections,
'parallel_iterations' : self.parallel_iterations,
})
return config
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow
from tensorflow import keras
def focal(alpha=0.25, gamma=2.0, cutoff=0.5):
""" Create a functor for computing the focal loss.
Args
alpha: Scale the focal weight with alpha.
gamma: Take the power of the focal weight with gamma.
cutoff: Positive prediction cutoff for soft targets
Returns
A functor that computes the focal loss using the alpha and gamma.
"""
def _focal(y_true, y_pred):
""" Compute the focal loss given the target tensor and the predicted tensor.
As defined in https://arxiv.org/abs/1708.02002
Args
y_true: Tensor of target data from the generator with shape (B, N, num_classes).
y_pred: Tensor of predicted data from the network with shape (B, N, num_classes).
Returns
The focal loss of y_pred w.r.t. y_true.
"""
labels = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1] # -1 for ignore, 0 for background, 1 for object
classification = y_pred
# filter out "ignore" anchors
indices = tensorflow.where(keras.backend.not_equal(anchor_state, -1))
labels = tensorflow.gather_nd(labels, indices)
classification = tensorflow.gather_nd(classification, indices)
# compute the focal loss
alpha_factor = keras.backend.ones_like(labels) * alpha
alpha_factor = tensorflow.where(keras.backend.greater(labels, cutoff), alpha_factor, 1 - alpha_factor)
focal_weight = tensorflow.where(keras.backend.greater(labels, cutoff), 1 - classification, classification)
focal_weight = alpha_factor * focal_weight ** gamma
cls_loss = focal_weight * keras.backend.binary_crossentropy(labels, classification)
# compute the normalizer: the number of positive anchors
normalizer = tensorflow.where(keras.backend.equal(anchor_state, 1))
normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx())
normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer)
return keras.backend.sum(cls_loss) / normalizer
return _focal
def smooth_l1(sigma=3.0):
""" Create a smooth L1 loss functor.
Args
sigma: This argument defines the point where the loss changes from L2 to L1.
Returns
A functor for computing the smooth L1 loss given target data and predicted data.
"""
sigma_squared = sigma ** 2
def _smooth_l1(y_true, y_pred):
""" Compute the smooth L1 loss of y_pred w.r.t. y_true.
Args
y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive).
y_pred: Tensor from the network of shape (B, N, 4).
Returns
The smooth L1 loss of y_pred w.r.t. y_true.
"""
# separate target and state
regression = y_pred
regression_target = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1]
# filter out "ignore" anchors
indices = tensorflow.where(keras.backend.equal(anchor_state, 1))
regression = tensorflow.gather_nd(regression, indices)
regression_target = tensorflow.gather_nd(regression_target, indices)
# compute smooth L1 loss
# f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma
# |x| - 0.5 / sigma / sigma otherwise
regression_diff = regression - regression_target
regression_diff = keras.backend.abs(regression_diff)
regression_loss = tensorflow.where(
keras.backend.less(regression_diff, 1.0 / sigma_squared),
0.5 * sigma_squared * keras.backend.pow(regression_diff, 2),
regression_diff - 0.5 / sigma_squared
)
# compute the normalizer: the number of positive anchors
normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0])
normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx())
return keras.backend.sum(regression_loss) / normalizer
return _smooth_l1
from __future__ import print_function
import sys
class Backbone(object):
""" This class stores additional information on backbones.
"""
def __init__(self, backbone):
# a dictionary mapping custom layer names to the correct classes
from .. import layers
from .. import losses
from .. import initializers
self.custom_objects = {
'UpsampleLike' : layers.UpsampleLike,
'PriorProbability' : initializers.PriorProbability,
'RegressBoxes' : layers.RegressBoxes,
'FilterDetections' : layers.FilterDetections,
'Anchors' : layers.Anchors,
'ClipBoxes' : layers.ClipBoxes,
'_smooth_l1' : losses.smooth_l1(),
'_focal' : losses.focal(),
}
self.backbone = backbone
self.validate()
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
raise NotImplementedError('retinanet method not implemented.')
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
"""
raise NotImplementedError('download_imagenet method not implemented.')
def validate(self):
""" Checks whether the backbone string is correct.
"""
raise NotImplementedError('validate method not implemented.')
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
Having this function in Backbone allows other backbones to define a specific preprocessing step.
"""
raise NotImplementedError('preprocess_image method not implemented.')
def backbone(backbone_name):
""" Returns a backbone object for the given backbone.
"""
if 'densenet' in backbone_name:
from .densenet import DenseNetBackbone as b
elif 'seresnext' in backbone_name or 'seresnet' in backbone_name or 'senet' in backbone_name:
from .senet import SeBackbone as b
elif 'resnet' in backbone_name:
from .resnet import ResNetBackbone as b
elif 'mobilenet' in backbone_name:
from .mobilenet import MobileNetBackbone as b
elif 'vgg' in backbone_name:
from .vgg import VGGBackbone as b
elif 'EfficientNet' in backbone_name:
from .effnet import EfficientNetBackbone as b
else:
raise NotImplementedError('Backbone class for \'{}\' not implemented.'.format(backbone))
return b(backbone_name)
def load_model(filepath, backbone_name='resnet50'):
""" Loads a retinanet model using the correct custom objects.
Args
filepath: one of the following:
- string, path to the saved model, or
- h5py.File object from which to load the model
backbone_name : Backbone with which the model was trained.
Returns
A keras.models.Model object.
Raises
ImportError: if h5py is not available.
ValueError: In case of an invalid savefile.
"""
from tensorflow import keras
return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects)
def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None, **kwargs):
""" Converts a training model to an inference model.
Args
model : A retinanet training model.
nms : Boolean, whether to add NMS filtering to the converted model.
class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
anchor_params : Anchor parameters object. If omitted, default values are used.
**kwargs : Inference and minimal retinanet model settings.
Returns
A keras.models.Model object.
Raises
ImportError: if h5py is not available.
ValueError: In case of an invalid savefile.
"""
from .retinanet import retinanet_bbox
return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, anchor_params=anchor_params, **kwargs)
def assert_training_model(model):
""" Assert that the model is a training model.
"""
assert(all(output in model.output_names for output in ['regression', 'classification'])), \
"Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format(model.output_names)
def check_training_model(model):
""" Check that model is a training model and exit otherwise.
"""
try:
assert_training_model(model)
except AssertionError as e:
print(e, file=sys.stderr)
sys.exit(1)
"""
Copyright 2018 vidosits (https://github.com/vidosits/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image
allowed_backbones = {
'densenet121': ([6, 12, 24, 16], keras.applications.densenet.DenseNet121),
'densenet169': ([6, 12, 32, 32], keras.applications.densenet.DenseNet169),
'densenet201': ([6, 12, 48, 32], keras.applications.densenet.DenseNet201),
}
class DenseNetBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return densenet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Download pre-trained weights for the specified backbone name.
This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop
where backbone is the densenet + number of layers (e.g. densenet121).
For more info check the explanation from the keras densenet script itself:
https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py
"""
origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/'
file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'
# load weights
if keras.backend.image_data_format() == 'channels_first':
raise ValueError('Weights for "channels_first" format are not available.')
weights_url = origin + file_name.format(self.backbone)
return keras.utils.get_file(file_name.format(self.backbone), weights_url, cache_subdir='models')
def validate(self):
""" Checks whether the backbone string is correct.
"""
backbone = self.backbone.split('_')[0]
if backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones.keys()))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return preprocess_image(inputs, mode='tf')
def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a densenet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a DenseNet backbone.
"""
# choose default input
if inputs is None:
inputs = keras.layers.Input((None, None, 3))
blocks, creator = allowed_backbones[backbone]
model = creator(input_tensor=inputs, include_top=False, pooling=None, weights=None)
# get last conv layer from the end of each dense block
layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format(idx + 2, block_num)).output for idx, block_num in enumerate(blocks)]
# create the densenet backbone
# layer_outputs contains 4 layers
model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
# invoke modifier if given
if modifier:
model = modifier(model)
# create the full model
backbone_layers = {
'C2': model.outputs[0],
'C3': model.outputs[1],
'C4': model.outputs[2],
'C5': model.outputs[3]
}
model = retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
return model
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
import efficientnet.keras as efn
class EfficientNetBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def __init__(self, backbone):
super(EfficientNetBackbone, self).__init__(backbone)
self.preprocess_image_func = None
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return effnet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
"""
from efficientnet.weights import IMAGENET_WEIGHTS_PATH
from efficientnet.weights import IMAGENET_WEIGHTS_HASHES
model_name = 'efficientnet-b' + self.backbone[-1]
file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5'
file_hash = IMAGENET_WEIGHTS_HASHES[model_name][1]
weights_path = keras.utils.get_file(file_name, IMAGENET_WEIGHTS_PATH + file_name, cache_subdir='models', file_hash=file_hash)
return weights_path
def validate(self):
""" Checks whether the backbone string is correct.
"""
allowed_backbones = ['EfficientNetB0', 'EfficientNetB1', 'EfficientNetB2', 'EfficientNetB3', 'EfficientNetB4',
'EfficientNetB5', 'EfficientNetB6', 'EfficientNetB7']
backbone = self.backbone.split('_')[0]
if backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return efn.preprocess_input(inputs)
def effnet_retinanet(num_classes, backbone='EfficientNetB0', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a resnet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a ResNet backbone.
"""
# choose default input
if inputs is None:
if keras.backend.image_data_format() == 'channels_first':
inputs = keras.layers.Input(shape=(3, None, None))
else:
# inputs = keras.layers.Input(shape=(224, 224, 3))
inputs = keras.layers.Input(shape=(None, None, 3))
# get last conv layer from the end of each block [28x28, 14x14, 7x7]
if backbone == 'EfficientNetB0':
model = efn.EfficientNetB0(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB1':
model = efn.EfficientNetB1(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB2':
model = efn.EfficientNetB2(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB3':
model = efn.EfficientNetB3(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB4':
model = efn.EfficientNetB4(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB5':
model = efn.EfficientNetB5(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB6':
model = efn.EfficientNetB6(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'EfficientNetB7':
model = efn.EfficientNetB7(input_tensor=inputs, include_top=False, weights=None)
else:
raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
layer_outputs = ['block4a_expand_activation', 'block6a_expand_activation', 'top_activation']
layer_outputs = [
model.get_layer(name=layer_outputs[0]).output, # 28x28
model.get_layer(name=layer_outputs[1]).output, # 14x14
model.get_layer(name=layer_outputs[2]).output, # 7x7
]
# create the densenet backbone
model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
# invoke modifier if given
if modifier:
model = modifier(model)
# C2 not provided
backbone_layers = {
'C3': model.outputs[0],
'C4': model.outputs[1],
'C5': model.outputs[2]
}
# create the full model
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
def EfficientNetB0_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB0', inputs=inputs, **kwargs)
def EfficientNetB1_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB1', inputs=inputs, **kwargs)
def EfficientNetB2_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB2', inputs=inputs, **kwargs)
def EfficientNetB3_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB3', inputs=inputs, **kwargs)
def EfficientNetB4_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB4', inputs=inputs, **kwargs)
def EfficientNetB5_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB5', inputs=inputs, **kwargs)
def EfficientNetB6_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB6', inputs=inputs, **kwargs)
def EfficientNetB7_retinanet(num_classes, inputs=None, **kwargs):
return effnet_retinanet(num_classes=num_classes, backbone='EfficientNetB7', inputs=inputs, **kwargs)
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from ..utils.image import preprocess_image
from . import retinanet
from . import Backbone
class MobileNetBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
allowed_backbones = ['mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224']
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Download pre-trained weights for the specified backbone name.
This name is in the format mobilenet{rows}_{alpha} where rows is the
imagenet shape dimension and 'alpha' controls the width of the network.
For more info check the explanation from the keras mobilenet script itself.
"""
alpha = float(self.backbone.split('_')[1])
rows = int(self.backbone.split('_')[0].replace('mobilenet', ''))
# load weights
if keras.backend.image_data_format() == 'channels_first':
raise ValueError('Weights for "channels_last" format '
'are not available.')
if alpha == 1.0:
alpha_text = '1_0'
elif alpha == 0.75:
alpha_text = '7_5'
elif alpha == 0.50:
alpha_text = '5_0'
else:
alpha_text = '2_5'
model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows)
weights_url = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/' + model_name
weights_path = keras.utils.get_file(model_name, weights_url, cache_subdir='models')
return weights_path
def validate(self):
""" Checks whether the backbone string is correct.
"""
backbone = self.backbone.split('_')[0]
if backbone not in MobileNetBackbone.allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, MobileNetBackbone.allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return preprocess_image(inputs, mode='tf')
def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a mobilenet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a MobileNet backbone.
"""
alpha = float(backbone.split('_')[1])
# choose default input
if inputs is None:
inputs = keras.layers.Input((None, None, 3))
backbone = keras.applications.mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None)
# create the full model
layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu']
layer_outputs = [backbone.get_layer(name).output for name in layer_names]
backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name)
# invoke modifier if given
if modifier:
backbone = modifier(backbone)
# C2 not provided
backbone_layers = {
'C3': backbone.outputs[0],
'C4': backbone.outputs[1],
'C5': backbone.outputs[2]
}
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import keras_resnet
import keras_resnet.models
from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image
class ResNetBackbone(Backbone):
"""Describes backbone information and provides utility functions."""
def __init__(self, backbone):
super(ResNetBackbone, self).__init__(backbone)
self.custom_objects.update(keras_resnet.custom_objects)
def retinanet(self, *args, **kwargs):
"""Returns a retinanet model using the correct backbone."""
return resnet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
"""Downloads ImageNet weights and returns path to weights file."""
resnet_filename = "ResNet-{}-model.keras.h5"
resnet_resource = (
"https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}".format(
resnet_filename
)
)
depth = int(self.backbone.replace("resnet", ""))
filename = resnet_filename.format(depth)
resource = resnet_resource.format(depth)
if depth == 50:
checksum = "3e9f4e4f77bbe2c9bec13b53ee1c2319"
elif depth == 101:
checksum = "05dc86924389e5b401a9ea0348a3213c"
elif depth == 152:
checksum = "6ee11ef2b135592f8031058820bb9e71"
return keras.utils.get_file(
filename, resource, cache_subdir="models", md5_hash=checksum
)
def validate(self):
"""Checks whether the backbone string is correct."""
allowed_backbones = ["resnet50", "resnet101", "resnet152"]
backbone = self.backbone.split("_")[0]
if backbone not in allowed_backbones:
raise ValueError(
"Backbone ('{}') not in allowed backbones ({}).".format(
backbone, allowed_backbones
)
)
def preprocess_image(self, inputs):
"""Takes as input an image and prepares it for being passed through the network."""
return preprocess_image(inputs, mode="caffe")
def resnet_retinanet(
num_classes, backbone="resnet50", inputs=None, modifier=None, **kwargs
):
"""Constructs a retinanet model using a resnet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a ResNet backbone.
"""
# choose default input
if inputs is None:
if keras.backend.image_data_format() == "channels_first":
inputs = keras.layers.Input(shape=(3, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, 3))
# create the resnet backbone
if backbone == "resnet50":
resnet = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)
elif backbone == "resnet101":
resnet = keras_resnet.models.ResNet101(
inputs, include_top=False, freeze_bn=True
)
elif backbone == "resnet152":
resnet = keras_resnet.models.ResNet152(
inputs, include_top=False, freeze_bn=True
)
else:
raise ValueError("Backbone ('{}') is invalid.".format(backbone))
# invoke modifier if given
if modifier:
resnet = modifier(resnet)
# create the full model
# resnet.outputs contains 4 layers
backbone_layers = {
"C2": resnet.outputs[0],
"C3": resnet.outputs[1],
"C4": resnet.outputs[2],
"C5": resnet.outputs[3],
}
return retinanet.retinanet(
inputs=inputs,
num_classes=num_classes,
backbone_layers=backbone_layers,
**kwargs
)
def resnet50_retinanet(num_classes, inputs=None, **kwargs):
return resnet_retinanet(
num_classes=num_classes, backbone="resnet50", inputs=inputs, **kwargs
)
def resnet101_retinanet(num_classes, inputs=None, **kwargs):
return resnet_retinanet(
num_classes=num_classes, backbone="resnet101", inputs=inputs, **kwargs
)
def resnet152_retinanet(num_classes, inputs=None, **kwargs):
return resnet_retinanet(
num_classes=num_classes, backbone="resnet152", inputs=inputs, **kwargs
)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from .. import initializers
from .. import layers
from ..utils.anchors import AnchorParameters
from . import assert_training_model
def default_classification_model(
num_classes,
num_anchors,
pyramid_feature_size=256,
prior_probability=0.01,
classification_feature_size=256,
name="classification_submodel",
):
"""Creates the default classification submodel.
Args
num_classes : Number of classes to predict a score for at each feature level.
num_anchors : Number of anchors to predict classification scores for at each feature level.
pyramid_feature_size : The number of filters to expect from the feature pyramid levels.
classification_feature_size : The number of filters to use in the layers in the classification submodel.
name : The name of the submodel.
Returns
A keras.models.Model that predicts classes for each anchor.
"""
options = {
"kernel_size": 3,
"strides": 1,
"padding": "same",
}
# set input
if keras.backend.image_data_format() == "channels_first":
inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
# 4 layer
for i in range(4):
# 각 층의 output
outputs = keras.layers.Conv2D(
filters=classification_feature_size,
activation="relu",
name="pyramid_classification_{}".format(i),
kernel_initializer=keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
), # 정규분포에 따라 텐서를 생성하는 초기값 설정
bias_initializer="zeros",
**options
)(outputs)
# 마지막 layer는 다른 필터로 다른 conv layer를 생성
outputs = keras.layers.Conv2D(
filters=num_classes * num_anchors,
kernel_initializer=keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
),
bias_initializer=initializers.PriorProbability(probability=prior_probability),
name="pyramid_classification",
**options
)(outputs)
# reshape output and apply sigmoid
if keras.backend.image_data_format() == "channels_first":
outputs = keras.layers.Permute(
(2, 3, 1), name="pyramid_classification_permute"
)(outputs)
# reshape : 2차원 > 1차원
outputs = keras.layers.Reshape(
(-1, num_classes), name="pyramid_classification_reshape"
)(outputs)
# output layer activation : sigmoid
outputs = keras.layers.Activation("sigmoid", name="pyramid_classification_sigmoid")(
outputs
)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
def default_regression_model(
num_values,
num_anchors,
pyramid_feature_size=256,
regression_feature_size=256,
name="regression_submodel",
):
"""Creates the default regression submodel.
Args
num_values : Number of values to regress.
num_anchors : Number of anchors to regress for each feature level.
pyramid_feature_size : The number of filters to expect from the feature pyramid levels.
regression_feature_size : The number of filters to use in the layers in the regression submodel.
name : The name of the submodel.
Returns
A keras.models.Model that predicts regression values for each anchor.
"""
# All new conv layers except the final one in the
# RetinaNet (classification) subnets are initialized
# with bias b = 0 and a Gaussian weight fill with stddev = 0.01.
options = {
"kernel_size": 3,
"strides": 1,
"padding": "same",
"kernel_initializer": keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
),
"bias_initializer": "zeros",
}
if keras.backend.image_data_format() == "channels_first":
inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
for i in range(4):
outputs = keras.layers.Conv2D(
filters=regression_feature_size,
activation="relu",
name="pyramid_regression_{}".format(i),
**options
)(outputs)
outputs = keras.layers.Conv2D(
num_anchors * num_values, name="pyramid_regression", **options
)(outputs)
if keras.backend.image_data_format() == "channels_first":
outputs = keras.layers.Permute((2, 3, 1), name="pyramid_regression_permute")(
outputs
)
outputs = keras.layers.Reshape((-1, num_values), name="pyramid_regression_reshape")(
outputs
)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
def __create_pyramid_features(backbone_layers, pyramid_levels, feature_size=256):
"""Creates the FPN layers on top of the backbone features.
Args
backbone_layers: a dictionary containing feature stages C3, C4, C5 from the backbone. Also contains C2 if provided.
pyramid_levels: Pyramid levels in use.
feature_size : The feature size to use for the resulting feature levels.
Returns
output_layers : A dict of feature levels. P3, P4, P5, P6 are always included. P2, P6, P7 included if in use.
"""
output_layers = {}
# upsample C5 to get P5 from the FPN paper
P5 = keras.layers.Conv2D(
feature_size, kernel_size=1, strides=1, padding="same", name="C5_reduced"
)(backbone_layers["C5"])
P5_upsampled = layers.UpsampleLike(name="P5_upsampled")([P5, backbone_layers["C4"]])
P5 = keras.layers.Conv2D(
feature_size, kernel_size=3, strides=1, padding="same", name="P5"
)(P5)
output_layers["P5"] = P5
# add P5 elementwise to C4
P4 = keras.layers.Conv2D(
feature_size, kernel_size=1, strides=1, padding="same", name="C4_reduced"
)(backbone_layers["C4"])
P4 = keras.layers.Add(name="P4_merged")([P5_upsampled, P4])
P4_upsampled = layers.UpsampleLike(name="P4_upsampled")([P4, backbone_layers["C3"]])
P4 = keras.layers.Conv2D(
feature_size, kernel_size=3, strides=1, padding="same", name="P4"
)(P4)
output_layers["P4"] = P4
# add P4 elementwise to C3
P3 = keras.layers.Conv2D(
feature_size, kernel_size=1, strides=1, padding="same", name="C3_reduced"
)(backbone_layers["C3"])
P3 = keras.layers.Add(name="P3_merged")([P4_upsampled, P3])
if "C2" in backbone_layers and 2 in pyramid_levels:
P3_upsampled = layers.UpsampleLike(name="P3_upsampled")(
[P3, backbone_layers["C2"]]
)
P3 = keras.layers.Conv2D(
feature_size, kernel_size=3, strides=1, padding="same", name="P3"
)(P3)
output_layers["P3"] = P3
if "C2" in backbone_layers and 2 in pyramid_levels:
P2 = keras.layers.Conv2D(
feature_size, kernel_size=1, strides=1, padding="same", name="C2_reduced"
)(backbone_layers["C2"])
P2 = keras.layers.Add(name="P2_merged")([P3_upsampled, P2])
P2 = keras.layers.Conv2D(
feature_size, kernel_size=3, strides=1, padding="same", name="P2"
)(P2)
output_layers["P2"] = P2
# "P6 is obtained via a 3x3 stride-2 conv on C5"
if 6 in pyramid_levels:
P6 = keras.layers.Conv2D(
feature_size, kernel_size=3, strides=2, padding="same", name="P6"
)(backbone_layers["C5"])
output_layers["P6"] = P6
# "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
if 7 in pyramid_levels:
if 6 not in pyramid_levels:
raise ValueError("P6 is required to use P7")
P7 = keras.layers.Activation("relu", name="C6_relu")(P6)
P7 = keras.layers.Conv2D(
feature_size, kernel_size=3, strides=2, padding="same", name="P7"
)(P7)
output_layers["P7"] = P7
return output_layers
def default_submodels(num_classes, num_anchors):
"""Create a list of default submodels used for object detection.
The default submodels contains a regression submodel and a classification submodel.
Args
num_classes : Number of classes to use.
num_anchors : Number of base anchors.
Returns
A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself.
"""
return [
("regression", default_regression_model(4, num_anchors)),
("classification", default_classification_model(num_classes, num_anchors)),
]
def __build_model_pyramid(name, model, features):
"""Applies a single submodel to each FPN level.
Args
name : Name of the submodel.
model : The submodel to evaluate.
features : The FPN features.
Returns
A tensor containing the response from the submodel on the FPN features.
"""
return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features])
def __build_pyramid(models, features):
"""Applies all submodels to each FPN level.
Args
models : List of submodels to run on each pyramid level (by default only regression, classifcation).
features : The FPN features.
Returns
A list of tensors, one for each submodel.
"""
return [__build_model_pyramid(n, m, features) for n, m in models]
def __build_anchors(anchor_parameters, features):
"""Builds anchors for the shape of the features from FPN.
Args
anchor_parameters : Parameteres that determine how anchors are generated.
features : The FPN features.
Returns
A tensor containing the anchors for the FPN features.
The shape is:
```
(batch_size, num_anchors, 4)
```
"""
anchors = [
layers.Anchors(
size=anchor_parameters.sizes[i],
stride=anchor_parameters.strides[i],
ratios=anchor_parameters.ratios,
scales=anchor_parameters.scales,
name="anchors_{}".format(i),
)(f)
for i, f in enumerate(features)
]
return keras.layers.Concatenate(axis=1, name="anchors")(anchors)
def retinanet(
inputs,
backbone_layers,
num_classes,
num_anchors=None,
create_pyramid_features=__create_pyramid_features,
pyramid_levels=None,
submodels=None,
name="retinanet",
):
"""Construct a RetinaNet model on top of a backbone.
This model is the minimum model necessary for training (with the unfortunate exception of anchors as output).
Args
inputs : keras.layers.Input (or list of) for the input to the model.
num_classes : Number of classes to classify.
num_anchors : Number of base anchors.
create_pyramid_features : Functor for creating pyramid features given the features C3, C4, C5, and possibly C2 from the backbone.
pyramid_levels : pyramid levels to use.
submodels : Submodels to run on each feature map (default is regression and classification submodels).
name : Name of the model.
Returns
A keras.models.Model which takes an image as input and outputs generated anchors and the result from each submodel on every pyramid level.
The order of the outputs is as defined in submodels:
```
[
regression, classification, other[0], other[1], ...
]
```
"""
if num_anchors is None:
num_anchors = AnchorParameters.default.num_anchors()
if submodels is None:
submodels = default_submodels(num_classes, num_anchors)
if pyramid_levels is None:
pyramid_levels = [3, 4, 5, 6, 7]
if 2 in pyramid_levels and "C2" not in backbone_layers:
raise ValueError("C2 not provided by backbone model. Cannot create P2 layers.")
if 3 not in pyramid_levels or 4 not in pyramid_levels or 5 not in pyramid_levels:
raise ValueError("pyramid levels 3, 4, and 5 required for functionality")
# compute pyramid features as per https://arxiv.org/abs/1708.02002
features = create_pyramid_features(backbone_layers, pyramid_levels)
feature_list = [features["P{}".format(p)] for p in pyramid_levels]
# for all pyramid levels, run available submodels
pyramids = __build_pyramid(submodels, feature_list)
return keras.models.Model(inputs=inputs, outputs=pyramids, name=name)
def retinanet_bbox(
model=None,
nms=True,
class_specific_filter=True,
name="retinanet-bbox",
anchor_params=None,
pyramid_levels=None,
nms_threshold=0.5,
score_threshold=0.05,
max_detections=300,
parallel_iterations=32,
**kwargs
):
"""Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly.
This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph.
These layers include applying the regression values to the anchors and performing NMS.
Args
model : RetinaNet model to append bbox layers to. If None, it will create a RetinaNet model using **kwargs.
nms : Whether to use non-maximum suppression for the filtering step.
class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only.
name : Name of the model.
anchor_params : Struct containing anchor parameters. If None, default values are used.
pyramid_levels : pyramid levels to use.
nms_threshold : Threshold for the IoU value to determine when a box should be suppressed.
score_threshold : Threshold used to prefilter the boxes with.
max_detections : Maximum number of detections to keep.
parallel_iterations : Number of batch items to process in parallel.
**kwargs : Additional kwargs to pass to the minimal retinanet model.
Returns
A keras.models.Model which takes an image as input and outputs the detections on the image.
The order is defined as follows:
```
[
boxes, scores, labels, other[0], other[1], ...
]
```
"""
# if no anchor parameters are passed, use default values
if anchor_params is None:
anchor_params = AnchorParameters.default
# create RetinaNet model
if model is None:
model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs)
else:
assert_training_model(model)
if pyramid_levels is None:
pyramid_levels = [3, 4, 5, 6, 7]
assert len(pyramid_levels) == len(
anchor_params.sizes
), "number of pyramid levels {} should match number of anchor parameter sizes {}".format(
len(pyramid_levels), len(anchor_params.sizes)
)
pyramid_layer_names = ["P{}".format(p) for p in pyramid_levels]
# compute the anchors
features = [model.get_layer(p_name).output for p_name in pyramid_layer_names]
anchors = __build_anchors(anchor_params, features)
# we expect the anchors, regression and classification values as first output
regression = model.outputs[0]
classification = model.outputs[1]
# "other" can be any additional output from custom submodels, by default this will be []
other = model.outputs[2:]
# apply predicted regression to anchors
boxes = layers.RegressBoxes(name="boxes")([anchors, regression])
boxes = layers.ClipBoxes(name="clipped_boxes")([model.inputs[0], boxes])
# filter detections (apply NMS / score threshold / select top-k)
detections = layers.FilterDetections(
nms=nms,
class_specific_filter=class_specific_filter,
name="filtered_detections",
nms_threshold=nms_threshold,
score_threshold=score_threshold,
max_detections=max_detections,
parallel_iterations=parallel_iterations,
)([boxes, classification] + other)
# construct the model
return keras.models.Model(inputs=model.inputs, outputs=detections, name=name)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
from classification_models.keras import Classifiers
class SeBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def __init__(self, backbone):
super(SeBackbone, self).__init__(backbone)
_, self.preprocess_image_func = Classifiers.get(self.backbone)
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return senet_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
"""
from classification_models.weights import WEIGHTS_COLLECTION
weights_path = None
for el in WEIGHTS_COLLECTION:
if el['model'] == self.backbone and not el['include_top']:
weights_path = keras.utils.get_file(el['name'], el['url'], cache_subdir='models', file_hash=el['md5'])
if weights_path is None:
raise ValueError('Unable to find imagenet weights for backbone {}!'.format(self.backbone))
return weights_path
def validate(self):
""" Checks whether the backbone string is correct.
"""
allowed_backbones = ['seresnet18', 'seresnet34', 'seresnet50', 'seresnet101', 'seresnet152',
'seresnext50', 'seresnext101', 'senet154']
backbone = self.backbone.split('_')[0]
if backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(backbone, allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return self.preprocess_image_func(inputs)
def senet_retinanet(num_classes, backbone='seresnext50', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a resnet backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a ResNet backbone.
"""
# choose default input
if inputs is None:
if keras.backend.image_data_format() == 'channels_first':
inputs = keras.layers.Input(shape=(3, None, None))
else:
# inputs = keras.layers.Input(shape=(224, 224, 3))
inputs = keras.layers.Input(shape=(None, None, 3))
classifier, _ = Classifiers.get(backbone)
model = classifier(input_tensor=inputs, include_top=False, weights=None)
# get last conv layer from the end of each block [28x28, 14x14, 7x7]
if backbone == 'seresnet18' or backbone == 'seresnet34':
layer_outputs = ['stage3_unit1_relu1', 'stage4_unit1_relu1', 'relu1']
elif backbone == 'seresnet50':
layer_outputs = ['activation_36', 'activation_66', 'activation_81']
elif backbone == 'seresnet101':
layer_outputs = ['activation_36', 'activation_151', 'activation_166']
elif backbone == 'seresnet152':
layer_outputs = ['activation_56', 'activation_236', 'activation_251']
elif backbone == 'seresnext50':
layer_outputs = ['activation_37', 'activation_67', 'activation_81']
elif backbone == 'seresnext101':
layer_outputs = ['activation_37', 'activation_152', 'activation_166']
elif backbone == 'senet154':
layer_outputs = ['activation_59', 'activation_239', 'activation_253']
else:
raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))
layer_outputs = [
model.get_layer(name=layer_outputs[0]).output, # 28x28
model.get_layer(name=layer_outputs[1]).output, # 14x14
model.get_layer(name=layer_outputs[2]).output, # 7x7
]
# create the densenet backbone
model = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=model.name)
# invoke modifier if given
if modifier:
model = modifier(model)
# C2 not provided
backbone_layers = {
'C3': model.outputs[0],
'C4': model.outputs[1],
'C5': model.outputs[2]
}
# create the full model
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
def seresnet18_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet18', inputs=inputs, **kwargs)
def seresnet34_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet34', inputs=inputs, **kwargs)
def seresnet50_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet50', inputs=inputs, **kwargs)
def seresnet101_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet101', inputs=inputs, **kwargs)
def seresnet152_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnet152', inputs=inputs, **kwargs)
def seresnext50_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnext50', inputs=inputs, **kwargs)
def seresnext101_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='seresnext101', inputs=inputs, **kwargs)
def senet154_retinanet(num_classes, inputs=None, **kwargs):
return senet_retinanet(num_classes=num_classes, backbone='senet154', inputs=inputs, **kwargs)
from tensorflow import keras
from .. import initializers
from .. import layers
from ..utils.anchors import AnchorParameters
from . import assert_training_model
from . import retinanet
def custom_classification_model(
num_classes,
num_anchors,
pyramid_feature_size=256,
prior_probability=0.01,
classification_feature_size=256,
name='classification_submodel'
):
# set input
if keras.backend.image_data_format() == "channels_first":
inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None))
else:
inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size))
outputs = inputs
# 3 layer
for i in range(3):
# 각 층의 output
outputs = keras.layers.Conv2D(
filters=classification_feature_size,
activation="relu",
name="pyramid_classification_{}".format(i),
kernel_initializer=keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
), # 정규분포에 따라 텐서를 생성하는 초기값 설정
bias_initializer="zeros",
**options
)(outputs)
# 마지막 layer는 다른 필터로 다른 conv layer를 생성
outputs = keras.layers.Conv2D(
filters=num_classes * num_anchors,
kernel_initializer=keras.initializers.RandomNormal(
mean=0.0, stddev=0.01, seed=None
),
bias_initializer=initializers.PriorProbability(probability=prior_probability),
name="pyramid_classification",
**options
)(outputs)
# reshape output and apply sigmoid
if keras.backend.image_data_format() == "channels_first":
outputs = keras.layers.Permute(
(2, 3, 1), name="pyramid_classification_permute"
)(outputs)
# reshape : 2차원 > 1차원
outputs = keras.layers.Reshape(
(-1, num_classes), name="pyramid_classification_reshape"
)(outputs)
# output layer activation : sigmoid
outputs = keras.layers.Activation("sigmoid", name="pyramid_classification_sigmoid")(
outputs
)
return keras.models.Model(inputs=inputs, outputs=outputs, name=name)
def custom_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'):
if num_anchors is None:
num_anchors = AnchorParameters.default.num_anchors()
model = retinanet.default_regression_model(num_values, num_anchors, pyramid_feature_size, regression_feature_size, name)
return model
def custom_submodels(num_classes, num_anchors):
if num_anchors is None:
num_anchors = AnchorParameters.default.num_anchors()
return [
("regression", custom_regression_model(4, num_anchors)),
("classification", custom_classification_model(num_classes, num_anchors)),
]
"""
Copyright 2017-2018 cgratie (https://github.com/cgratie/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
from . import retinanet
from . import Backbone
from ..utils.image import preprocess_image
class VGGBackbone(Backbone):
""" Describes backbone information and provides utility functions.
"""
def retinanet(self, *args, **kwargs):
""" Returns a retinanet model using the correct backbone.
"""
return vgg_retinanet(*args, backbone=self.backbone, **kwargs)
def download_imagenet(self):
""" Downloads ImageNet weights and returns path to weights file.
Weights can be downloaded at https://github.com/fizyr/keras-models/releases .
"""
if self.backbone == 'vgg16':
resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP
checksum = '6d6bbae143d832006294945121d1f1fc'
elif self.backbone == 'vgg19':
resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP
checksum = '253f8cb515780f3b799900260a226db6'
else:
raise ValueError("Backbone '{}' not recognized.".format(self.backbone))
return keras.utils.get_file(
'{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format(self.backbone),
resource,
cache_subdir='models',
file_hash=checksum
)
def validate(self):
""" Checks whether the backbone string is correct.
"""
allowed_backbones = ['vgg16', 'vgg19']
if self.backbone not in allowed_backbones:
raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format(self.backbone, allowed_backbones))
def preprocess_image(self, inputs):
""" Takes as input an image and prepares it for being passed through the network.
"""
return preprocess_image(inputs, mode='caffe')
def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs):
""" Constructs a retinanet model using a vgg backbone.
Args
num_classes: Number of classes to predict.
backbone: Which backbone to use (one of ('vgg16', 'vgg19')).
inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).
Returns
RetinaNet model with a VGG backbone.
"""
# choose default input
if inputs is None:
inputs = keras.layers.Input(shape=(None, None, 3))
# create the vgg backbone
if backbone == 'vgg16':
vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights=None)
elif backbone == 'vgg19':
vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights=None)
else:
raise ValueError("Backbone '{}' not recognized.".format(backbone))
if modifier:
vgg = modifier(vgg)
# create the full model
layer_names = ["block3_pool", "block4_pool", "block5_pool"]
layer_outputs = [vgg.get_layer(name).output for name in layer_names]
# C2 not provided
backbone_layers = {
'C3': layer_outputs[0],
'C4': layer_outputs[1],
'C5': layer_outputs[2]
}
return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from ..preprocessing.generator import Generator
from ..utils.image import read_image_bgr
import os
import numpy as np
from pycocotools.coco import COCO
class CocoGenerator(Generator):
""" Generate data from the COCO dataset.
See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information.
"""
def __init__(self, data_dir, set_name, **kwargs):
""" Initialize a COCO data generator.
Args
data_dir: Path to where the COCO dataset is stored.
set_name: Name of the set to parse.
"""
self.data_dir = data_dir
self.set_name = set_name
self.coco = COCO(os.path.join(data_dir, 'annotations', 'instances_' + set_name + '.json'))
self.image_ids = self.coco.getImgIds()
self.load_classes()
super(CocoGenerator, self).__init__(**kwargs)
def load_classes(self):
""" Loads the class to label mapping (and inverse) for COCO.
"""
# load class names (name -> label)
categories = self.coco.loadCats(self.coco.getCatIds())
categories.sort(key=lambda x: x['id'])
self.classes = {}
self.coco_labels = {}
self.coco_labels_inverse = {}
for c in categories:
self.coco_labels[len(self.classes)] = c['id']
self.coco_labels_inverse[c['id']] = len(self.classes)
self.classes[c['name']] = len(self.classes)
# also load the reverse (label -> name)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
def size(self):
""" Size of the COCO dataset.
"""
return len(self.image_ids)
def num_classes(self):
""" Number of classes in the dataset. For COCO this is 80.
"""
return len(self.classes)
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
return self.classes[name]
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def coco_label_to_label(self, coco_label):
""" Map COCO label to the label as used in the network.
COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes.
"""
return self.coco_labels_inverse[coco_label]
def coco_label_to_name(self, coco_label):
""" Map COCO label to name.
"""
return self.label_to_name(self.coco_label_to_label(coco_label))
def label_to_coco_label(self, label):
""" Map label as used by the network to labels as used by COCO.
"""
return self.coco_labels[label]
def image_path(self, image_index):
""" Returns the image path for image_index.
"""
image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
path = os.path.join(self.data_dir, 'images', self.set_name, image_info['file_name'])
return path
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
image = self.coco.loadImgs(self.image_ids[image_index])[0]
return float(image['width']) / float(image['height'])
def load_image(self, image_index):
""" Load an image at the image_index.
"""
path = self.image_path(image_index)
return read_image_bgr(path)
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
# get ground truth annotations
annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}
# some images appear to miss annotations (like image with id 257034)
if len(annotations_ids) == 0:
return annotations
# parse annotations
coco_annotations = self.coco.loadAnns(annotations_ids)
for idx, a in enumerate(coco_annotations):
# some annotations have basically no width / height, skip them
if a['bbox'][2] < 1 or a['bbox'][3] < 1:
continue
annotations['labels'] = np.concatenate([annotations['labels'], [self.coco_label_to_label(a['category_id'])]], axis=0)
annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[
a['bbox'][0],
a['bbox'][1],
a['bbox'][0] + a['bbox'][2],
a['bbox'][1] + a['bbox'][3],
]]], axis=0)
return annotations
"""
Copyright 2017-2018 yhenon (https://github.com/yhenon/)
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from .generator import Generator
from ..utils.image import read_image_bgr
import numpy as np
from PIL import Image
from six import raise_from
import csv
import sys
import os.path
from collections import OrderedDict
def _parse(value, function, fmt):
"""
Parse a string into a value, and format a nice ValueError if it fails.
Returns `function(value)`.
Any `ValueError` raised is catched and a new `ValueError` is raised
with message `fmt.format(e)`, where `e` is the caught `ValueError`.
"""
try:
return function(value)
except ValueError as e:
raise_from(ValueError(fmt.format(e)), None)
def _read_classes(csv_reader):
""" Parse the classes file given by csv_reader.
"""
result = OrderedDict()
for line, row in enumerate(csv_reader):
line += 1
try:
class_name, class_id = row
except ValueError:
raise_from(ValueError('line {}: format should be \'class_name,class_id\''.format(line)), None)
class_id = _parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
if class_name in result:
raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
result[class_name] = class_id
return result
def _read_annotations(csv_reader, classes):
""" Read annotations from the csv_reader.
"""
result = OrderedDict()
for line, row in enumerate(csv_reader):
line += 1
try:
img_file, x1, y1, x2, y2, class_name = row[:6]
except ValueError:
raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
if img_file not in result:
result[img_file] = []
# If a row contains only an image path, it's an image without annotations.
if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
continue
x1 = _parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
y1 = _parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
x2 = _parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
y2 = _parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
# Check that the bounding box is valid.
if x2 <= x1:
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
if y2 <= y1:
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
# check if the current class name is correctly present
if class_name not in classes:
raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
return result
def _open_for_csv(path):
""" Open a file with flags suitable for csv.reader.
This is different for python2 it means with mode 'rb',
for python3 this means 'r' with "universal newlines".
"""
if sys.version_info[0] < 3:
return open(path, 'rb')
else:
return open(path, 'r', newline='')
class CSVGenerator(Generator):
""" Generate data for a custom CSV dataset.
See https://github.com/fizyr/keras-retinanet#csv-datasets for more information.
"""
def __init__(
self,
csv_data_file,
csv_class_file,
base_dir=None,
**kwargs
):
""" Initialize a CSV data generator.
Args
csv_data_file: Path to the CSV annotations file.
csv_class_file: Path to the CSV classes file.
base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
"""
self.image_names = []
self.image_data = {}
self.base_dir = base_dir
# Take base_dir from annotations file if not explicitly specified.
if self.base_dir is None:
self.base_dir = os.path.dirname(csv_data_file)
# parse the provided class file
try:
with _open_for_csv(csv_class_file) as file:
self.classes = _read_classes(csv.reader(file, delimiter=','))
except ValueError as e:
raise_from(ValueError('invalid CSV class file: {}: {}'.format(csv_class_file, e)), None)
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
# csv with img_path, x1, y1, x2, y2, class_name
try:
with _open_for_csv(csv_data_file) as file:
self.image_data = _read_annotations(csv.reader(file, delimiter=','), self.classes)
except ValueError as e:
raise_from(ValueError('invalid CSV annotations file: {}: {}'.format(csv_data_file, e)), None)
self.image_names = list(self.image_data.keys())
super(CSVGenerator, self).__init__(**kwargs)
def size(self):
""" Size of the dataset.
"""
return len(self.image_names)
def num_classes(self):
""" Number of classes in the dataset.
"""
return max(self.classes.values()) + 1
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
return self.classes[name]
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def image_path(self, image_index):
""" Returns the image path for image_index.
"""
return os.path.join(self.base_dir, self.image_names[image_index])
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
# PIL is fast for metadata
image = Image.open(self.image_path(image_index))
return float(image.width) / float(image.height)
def load_image(self, image_index):
""" Load an image at the image_index.
"""
return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
path = self.image_names[image_index]
annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))}
for idx, annot in enumerate(self.image_data[path]):
annotations['labels'] = np.concatenate((annotations['labels'], [self.name_to_label(annot['class'])]))
annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[
float(annot['x1']),
float(annot['y1']),
float(annot['x2']),
float(annot['y2']),
]]))
return annotations
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
import random
import warnings
from tensorflow import keras
from ..utils.anchors import (
anchor_targets_bbox,
anchors_for_shape,
guess_shapes
)
from ..utils.config import parse_anchor_parameters, parse_pyramid_levels
from ..utils.image import (
TransformParameters,
adjust_transform_for_image,
apply_transform,
preprocess_image,
resize_image,
)
from ..utils.transform import transform_aabb
class Generator(keras.utils.Sequence):
""" Abstract generator class.
"""
def __init__(
self,
transform_generator = None,
visual_effect_generator=None,
batch_size=1,
group_method='ratio', # one of 'none', 'random', 'ratio'
shuffle_groups=True,
image_min_side=800,
image_max_side=1333,
no_resize=False,
transform_parameters=None,
compute_anchor_targets=anchor_targets_bbox,
compute_shapes=guess_shapes,
preprocess_image=preprocess_image,
config=None
):
""" Initialize Generator object.
Args
transform_generator : A generator used to randomly transform images and annotations.
batch_size : The size of the batches to generate.
group_method : Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')).
shuffle_groups : If True, shuffles the groups each epoch.
image_min_side : After resizing the minimum side of an image is equal to image_min_side.
image_max_side : If after resizing the maximum side is larger than image_max_side, scales down further so that the max side is equal to image_max_side.
no_resize : If True, no image/annotation resizing is performed.
transform_parameters : The transform parameters used for data augmentation.
compute_anchor_targets : Function handler for computing the targets of anchors for an image and its annotations.
compute_shapes : Function handler for computing the shapes of the pyramid for a given input.
preprocess_image : Function handler for preprocessing an image (scaling / normalizing) for passing through a network.
"""
self.transform_generator = transform_generator
self.visual_effect_generator = visual_effect_generator
self.batch_size = int(batch_size)
self.group_method = group_method
self.shuffle_groups = shuffle_groups
self.image_min_side = image_min_side
self.image_max_side = image_max_side
self.no_resize = no_resize
self.transform_parameters = transform_parameters or TransformParameters()
self.compute_anchor_targets = compute_anchor_targets
self.compute_shapes = compute_shapes
self.preprocess_image = preprocess_image
self.config = config
# Define groups
self.group_images()
# Shuffle when initializing
if self.shuffle_groups:
self.on_epoch_end()
def on_epoch_end(self):
if self.shuffle_groups:
random.shuffle(self.groups)
def size(self):
""" Size of the dataset.
"""
raise NotImplementedError('size method not implemented')
def num_classes(self):
""" Number of classes in the dataset.
"""
raise NotImplementedError('num_classes method not implemented')
def has_label(self, label):
""" Returns True if label is a known label.
"""
raise NotImplementedError('has_label method not implemented')
def has_name(self, name):
""" Returns True if name is a known class.
"""
raise NotImplementedError('has_name method not implemented')
def name_to_label(self, name):
""" Map name to label.
"""
raise NotImplementedError('name_to_label method not implemented')
def label_to_name(self, label):
""" Map label to name.
"""
raise NotImplementedError('label_to_name method not implemented')
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
raise NotImplementedError('image_aspect_ratio method not implemented')
def image_path(self, image_index):
""" Get the path to an image.
"""
raise NotImplementedError('image_path method not implemented')
def load_image(self, image_index):
""" Load an image at the image_index.
"""
raise NotImplementedError('load_image method not implemented')
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
raise NotImplementedError('load_annotations method not implemented')
def load_annotations_group(self, group):
""" Load annotations for all images in group.
"""
annotations_group = [self.load_annotations(image_index) for image_index in group]
for annotations in annotations_group:
assert(isinstance(annotations, dict)), '\'load_annotations\' should return a list of dictionaries, received: {}'.format(type(annotations))
assert('labels' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.'
assert('bboxes' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.'
return annotations_group
def filter_annotations(self, image_group, annotations_group, group):
""" Filter annotations by removing those that are outside of the image bounds or whose width/height < 0.
"""
# test all annotations
for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
# test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0]
invalid_indices = np.where(
(annotations['bboxes'][:, 2] <= annotations['bboxes'][:, 0]) |
(annotations['bboxes'][:, 3] <= annotations['bboxes'][:, 1]) |
(annotations['bboxes'][:, 0] < 0) |
(annotations['bboxes'][:, 1] < 0) |
(annotations['bboxes'][:, 2] > image.shape[1]) |
(annotations['bboxes'][:, 3] > image.shape[0])
)[0]
# delete invalid indices
if len(invalid_indices):
warnings.warn('Image {} with id {} (shape {}) contains the following invalid boxes: {}.'.format(
self.image_path(group[index]),
group[index],
image.shape,
annotations['bboxes'][invalid_indices, :]
))
for k in annotations_group[index].keys():
annotations_group[index][k] = np.delete(annotations[k], invalid_indices, axis=0)
return image_group, annotations_group
def load_image_group(self, group):
""" Load images for all images in a group.
"""
return [self.load_image(image_index) for image_index in group]
def random_visual_effect_group_entry(self, image, annotations):
""" Randomly transforms image and annotation.
"""
visual_effect = next(self.visual_effect_generator)
# apply visual effect
image = visual_effect(image)
return image, annotations
def random_visual_effect_group(self, image_group, annotations_group):
""" Randomly apply visual effect on each image.
"""
assert(len(image_group) == len(annotations_group))
if self.visual_effect_generator is None:
# do nothing
return image_group, annotations_group
for index in range(len(image_group)):
# apply effect on a single group entry
image_group[index], annotations_group[index] = self.random_visual_effect_group_entry(
image_group[index], annotations_group[index]
)
return image_group, annotations_group
def random_transform_group_entry(self, image, annotations, transform=None):
""" Randomly transforms image and annotation.
"""
# randomly transform both image and annotations
if transform is not None or self.transform_generator:
if transform is None:
transform = adjust_transform_for_image(next(self.transform_generator), image, self.transform_parameters.relative_translation)
# apply transformation to image
image = apply_transform(transform, image, self.transform_parameters)
# Transform the bounding boxes in the annotations.
annotations['bboxes'] = annotations['bboxes'].copy()
for index in range(annotations['bboxes'].shape[0]):
annotations['bboxes'][index, :] = transform_aabb(transform, annotations['bboxes'][index, :])
return image, annotations
def random_transform_group(self, image_group, annotations_group):
""" Randomly transforms each image and its annotations.
"""
assert(len(image_group) == len(annotations_group))
for index in range(len(image_group)):
# transform a single group entry
image_group[index], annotations_group[index] = self.random_transform_group_entry(image_group[index], annotations_group[index])
return image_group, annotations_group
def resize_image(self, image):
""" Resize an image using image_min_side and image_max_side.
"""
if self.no_resize:
return image, 1
else:
return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)
def preprocess_group_entry(self, image, annotations):
""" Preprocess image and its annotations.
"""
# resize image
image, image_scale = self.resize_image(image)
# preprocess the image
image = self.preprocess_image(image)
# apply resizing to annotations too
annotations['bboxes'] *= image_scale
# convert to the wanted keras floatx
image = keras.backend.cast_to_floatx(image)
return image, annotations
def preprocess_group(self, image_group, annotations_group):
""" Preprocess each image and its annotations in its group.
"""
assert(len(image_group) == len(annotations_group))
for index in range(len(image_group)):
# preprocess a single group entry
image_group[index], annotations_group[index] = self.preprocess_group_entry(image_group[index], annotations_group[index])
return image_group, annotations_group
def group_images(self):
""" Order the images according to self.order and makes groups of self.batch_size.
"""
# determine the order of the images
order = list(range(self.size()))
if self.group_method == 'random':
random.shuffle(order)
elif self.group_method == 'ratio':
order.sort(key=lambda x: self.image_aspect_ratio(x))
# divide into groups, one group = one batch
self.groups = [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
def compute_inputs(self, image_group):
""" Compute inputs for the network using an image_group.
"""
# get the max image shape
max_shape = tuple(max(image.shape[x] for image in image_group) for x in range(3))
# construct an image batch object
image_batch = np.zeros((self.batch_size,) + max_shape, dtype=keras.backend.floatx())
# copy all images to the upper left part of the image batch object
for image_index, image in enumerate(image_group):
image_batch[image_index, :image.shape[0], :image.shape[1], :image.shape[2]] = image
if keras.backend.image_data_format() == 'channels_first':
image_batch = image_batch.transpose((0, 3, 1, 2))
return image_batch
def generate_anchors(self, image_shape):
anchor_params = None
pyramid_levels = None
if self.config and 'anchor_parameters' in self.config:
anchor_params = parse_anchor_parameters(self.config)
if self.config and 'pyramid_levels' in self.config:
pyramid_levels = parse_pyramid_levels(self.config)
return anchors_for_shape(image_shape, anchor_params=anchor_params, pyramid_levels=pyramid_levels, shapes_callback=self.compute_shapes)
def compute_targets(self, image_group, annotations_group):
""" Compute target outputs for the network using images and their annotations.
"""
# get the max image shape
max_shape = tuple(max(image.shape[x] for image in image_group) for x in range(3))
anchors = self.generate_anchors(max_shape)
batches = self.compute_anchor_targets(
anchors,
image_group,
annotations_group,
self.num_classes()
)
return list(batches)
def compute_input_output(self, group):
""" Compute inputs and target outputs for the network.
"""
# load images and annotations
image_group = self.load_image_group(group)
annotations_group = self.load_annotations_group(group)
# check validity of annotations
image_group, annotations_group = self.filter_annotations(image_group, annotations_group, group)
# randomly apply visual effect
image_group, annotations_group = self.random_visual_effect_group(image_group, annotations_group)
# randomly transform data
image_group, annotations_group = self.random_transform_group(image_group, annotations_group)
# perform preprocessing steps
image_group, annotations_group = self.preprocess_group(image_group, annotations_group)
# compute network inputs
inputs = self.compute_inputs(image_group)
# compute network targets
targets = self.compute_targets(image_group, annotations_group)
return inputs, targets
def __len__(self):
"""
Number of batches for generator.
"""
return len(self.groups)
def __getitem__(self, index):
"""
Keras sequence method for generating batches.
"""
group = self.groups[index]
inputs, targets = self.compute_input_output(group)
return inputs, targets
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import csv
import os.path
import numpy as np
from PIL import Image
from .generator import Generator
from ..utils.image import read_image_bgr
kitti_classes = {
'Car': 0,
'Van': 1,
'Truck': 2,
'Pedestrian': 3,
'Person_sitting': 4,
'Cyclist': 5,
'Tram': 6,
'Misc': 7,
'DontCare': 7
}
class KittiGenerator(Generator):
""" Generate data for a KITTI dataset.
See http://www.cvlibs.net/datasets/kitti/ for more information.
"""
def __init__(
self,
base_dir,
subset='train',
**kwargs
):
""" Initialize a KITTI data generator.
Args
base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
subset: The subset to generate data for (defaults to 'train').
"""
self.base_dir = base_dir
label_dir = os.path.join(self.base_dir, subset, 'labels')
image_dir = os.path.join(self.base_dir, subset, 'images')
"""
1 type Describes the type of object: 'Car', 'Van', 'Truck',
'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram',
'Misc' or 'DontCare'
1 truncated Float from 0 (non-truncated) to 1 (truncated), where
truncated refers to the object leaving image boundaries
1 occluded Integer (0,1,2,3) indicating occlusion state:
0 = fully visible, 1 = partly occluded
2 = largely occluded, 3 = unknown
1 alpha Observation angle of object, ranging [-pi..pi]
4 bbox 2D bounding box of object in the image (0-based index):
contains left, top, right, bottom pixel coordinates
3 dimensions 3D object dimensions: height, width, length (in meters)
3 location 3D object location x,y,z in camera coordinates (in meters)
1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
self.labels = {}
self.classes = kitti_classes
for name, label in self.classes.items():
self.labels[label] = name
self.image_data = dict()
self.images = []
for i, fn in enumerate(os.listdir(label_dir)):
label_fp = os.path.join(label_dir, fn)
image_fp = os.path.join(image_dir, fn.replace('.txt', '.png'))
self.images.append(image_fp)
fieldnames = ['type', 'truncated', 'occluded', 'alpha', 'left', 'top', 'right', 'bottom', 'dh', 'dw', 'dl',
'lx', 'ly', 'lz', 'ry']
with open(label_fp, 'r') as csv_file:
reader = csv.DictReader(csv_file, delimiter=' ', fieldnames=fieldnames)
boxes = []
for line, row in enumerate(reader):
label = row['type']
cls_id = kitti_classes[label]
annotation = {'cls_id': cls_id, 'x1': row['left'], 'x2': row['right'], 'y2': row['bottom'], 'y1': row['top']}
boxes.append(annotation)
self.image_data[i] = boxes
super(KittiGenerator, self).__init__(**kwargs)
def size(self):
""" Size of the dataset.
"""
return len(self.images)
def num_classes(self):
""" Number of classes in the dataset.
"""
return max(self.classes.values()) + 1
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
raise NotImplementedError()
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
# PIL is fast for metadata
image = Image.open(self.images[image_index])
return float(image.width) / float(image.height)
def image_path(self, image_index):
""" Get the path to an image.
"""
return self.images[image_index]
def load_image(self, image_index):
""" Load an image at the image_index.
"""
return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
image_data = self.image_data[image_index]
annotations = {'labels': np.empty((len(image_data),)), 'bboxes': np.empty((len(image_data), 4))}
for idx, ann in enumerate(image_data):
annotations['bboxes'][idx, 0] = float(ann['x1'])
annotations['bboxes'][idx, 1] = float(ann['y1'])
annotations['bboxes'][idx, 2] = float(ann['x2'])
annotations['bboxes'][idx, 3] = float(ann['y2'])
annotations['labels'][idx] = int(ann['cls_id'])
return annotations
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import csv
import json
import os
import warnings
import numpy as np
from PIL import Image
from .generator import Generator
from ..utils.image import read_image_bgr
def load_hierarchy(metadata_dir, version='v4'):
hierarchy = None
if version == 'challenge2018':
hierarchy = 'bbox_labels_500_hierarchy.json'
elif version == 'v4':
hierarchy = 'bbox_labels_600_hierarchy.json'
elif version == 'v3':
hierarchy = 'bbox_labels_600_hierarchy.json'
hierarchy_json = os.path.join(metadata_dir, hierarchy)
with open(hierarchy_json) as f:
hierarchy_data = json.loads(f.read())
return hierarchy_data
def load_hierarchy_children(hierarchy):
res = [hierarchy['LabelName']]
if 'Subcategory' in hierarchy:
for subcategory in hierarchy['Subcategory']:
children = load_hierarchy_children(subcategory)
for c in children:
res.append(c)
return res
def find_hierarchy_parent(hierarchy, parent_cls):
if hierarchy['LabelName'] == parent_cls:
return hierarchy
elif 'Subcategory' in hierarchy:
for child in hierarchy['Subcategory']:
res = find_hierarchy_parent(child, parent_cls)
if res is not None:
return res
return None
def get_labels(metadata_dir, version='v4'):
if version == 'v4' or version == 'challenge2018':
csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'
boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
id_to_labels = {}
cls_index = {}
i = 0
with open(boxable_classes_descriptions) as f:
for row in csv.reader(f):
# make sure the csv row is not empty (usually the last one)
if len(row):
label = row[0]
description = row[1].replace("\"", "").replace("'", "").replace('`', '')
id_to_labels[i] = description
cls_index[label] = i
i += 1
else:
trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
description_path = os.path.join(metadata_dir, 'class-descriptions.csv')
description_table = {}
with open(description_path) as f:
for row in csv.reader(f):
# make sure the csv row is not empty (usually the last one)
if len(row):
description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')
with open(trainable_classes_path, 'rb') as f:
trainable_classes = f.read().split('\n')
id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])
return id_to_labels, cls_index
def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
validation_image_ids = {}
if version == 'v4':
annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
elif version == 'challenge2018':
validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')
with open(validation_image_ids_path, 'r') as csv_file:
reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
next(reader)
for line, row in enumerate(reader):
image_id = row['ImageID']
validation_image_ids[image_id] = True
annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
else:
annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')
fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
'XMin', 'XMax', 'YMin', 'YMax',
'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']
id_annotations = dict()
with open(annotations_path, 'r') as csv_file:
reader = csv.DictReader(csv_file, fieldnames=fieldnames)
next(reader)
images_sizes = {}
for line, row in enumerate(reader):
frame = row['ImageID']
if version == 'challenge2018':
if subset == 'train':
if frame in validation_image_ids:
continue
elif subset == 'validation':
if frame not in validation_image_ids:
continue
else:
raise NotImplementedError('This generator handles only the train and validation subsets')
class_name = row['LabelName']
if class_name not in cls_index:
continue
cls_id = cls_index[class_name]
if version == 'challenge2018':
# We recommend participants to use the provided subset of the training set as a validation set.
# This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
else:
img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')
if frame in images_sizes:
width, height = images_sizes[frame]
else:
try:
with Image.open(img_path) as img:
width, height = img.width, img.height
images_sizes[frame] = (width, height)
except Exception as ex:
if version == 'challenge2018':
raise ex
continue
x1 = float(row['XMin'])
x2 = float(row['XMax'])
y1 = float(row['YMin'])
y2 = float(row['YMax'])
x1_int = int(round(x1 * width))
x2_int = int(round(x2 * width))
y1_int = int(round(y1 * height))
y2_int = int(round(y2 * height))
# Check that the bounding box is valid.
if x2 <= x1:
raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
if y2 <= y1:
raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
if y2_int == y1_int:
warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
continue
if x2_int == x1_int:
warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
continue
img_id = row['ImageID']
annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}
if img_id in id_annotations:
annotations = id_annotations[img_id]
annotations['boxes'].append(annotation)
else:
id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
return id_annotations
class OpenImagesGenerator(Generator):
def __init__(
self, main_dir, subset, version='v4',
labels_filter=None, annotation_cache_dir='.',
parent_label=None,
**kwargs
):
if version == 'challenge2018':
metadata = 'challenge2018'
elif version == 'v4':
metadata = '2018_04'
elif version == 'v3':
metadata = '2017_11'
else:
raise NotImplementedError('There is currently no implementation for versions older than v3')
if version == 'challenge2018':
self.base_dir = os.path.join(main_dir, 'images', 'train')
else:
self.base_dir = os.path.join(main_dir, 'images', subset)
metadata_dir = os.path.join(main_dir, metadata)
annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')
self.hierarchy = load_hierarchy(metadata_dir, version=version)
id_to_labels, cls_index = get_labels(metadata_dir, version=version)
if os.path.exists(annotation_cache_json):
with open(annotation_cache_json, 'r') as f:
self.annotations = json.loads(f.read())
else:
self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version=version)
json.dump(self.annotations, open(annotation_cache_json, "w"))
if labels_filter is not None or parent_label is not None:
self.id_to_labels, self.annotations = self.__filter_data(id_to_labels, cls_index, labels_filter, parent_label)
else:
self.id_to_labels = id_to_labels
self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])
super(OpenImagesGenerator, self).__init__(**kwargs)
def __filter_data(self, id_to_labels, cls_index, labels_filter=None, parent_label=None):
"""
If you want to work with a subset of the labels just set a list with trainable labels
:param labels_filter: Ex: labels_filter = ['Helmet', 'Hat', 'Analog television']
:param parent_label: If parent_label is set this will bring you the parent label
but also its children in the semantic hierarchy as defined in OID, ex: Animal
hierarchical tree
:return:
"""
children_id_to_labels = {}
if parent_label is None:
# there is/are no other sublabel(s) other than the labels itself
for label in labels_filter:
for i, lb in id_to_labels.items():
if lb == label:
children_id_to_labels[i] = label
break
else:
parent_cls = None
for i, lb in id_to_labels.items():
if lb == parent_label:
parent_id = i
for c, index in cls_index.items():
if index == parent_id:
parent_cls = c
break
if parent_cls is None:
raise Exception('Couldnt find label {}'.format(parent_label))
parent_tree = find_hierarchy_parent(self.hierarchy, parent_cls)
if parent_tree is None:
raise Exception('Couldnt find parent {} in the semantic hierarchical tree'.format(parent_label))
children = load_hierarchy_children(parent_tree)
for cls in children:
index = cls_index[cls]
label = id_to_labels[index]
children_id_to_labels[index] = label
id_map = dict([(ind, i) for i, ind in enumerate(children_id_to_labels.keys())])
filtered_annotations = {}
for k in self.annotations:
img_ann = self.annotations[k]
filtered_boxes = []
for ann in img_ann['boxes']:
cls_id = ann['cls_id']
if cls_id in children_id_to_labels:
ann['cls_id'] = id_map[cls_id]
filtered_boxes.append(ann)
if len(filtered_boxes) > 0:
filtered_annotations[k] = {'w': img_ann['w'], 'h': img_ann['h'], 'boxes': filtered_boxes}
children_id_to_labels = dict([(id_map[i], l) for (i, l) in children_id_to_labels.items()])
return children_id_to_labels, filtered_annotations
def size(self):
return len(self.annotations)
def num_classes(self):
return len(self.id_to_labels)
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.id_to_labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
raise NotImplementedError()
def name_to_label(self, name):
raise NotImplementedError()
def label_to_name(self, label):
return self.id_to_labels[label]
def image_aspect_ratio(self, image_index):
img_annotations = self.annotations[self.id_to_image_id[image_index]]
height, width = img_annotations['h'], img_annotations['w']
return float(width) / float(height)
def image_path(self, image_index):
path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
return path
def load_image(self, image_index):
return read_image_bgr(self.image_path(image_index))
def load_annotations(self, image_index):
image_annotations = self.annotations[self.id_to_image_id[image_index]]
labels = image_annotations['boxes']
height, width = image_annotations['h'], image_annotations['w']
annotations = {'labels': np.empty((len(labels),)), 'bboxes': np.empty((len(labels), 4))}
for idx, ann in enumerate(labels):
cls_id = ann['cls_id']
x1 = ann['x1'] * width
x2 = ann['x2'] * width
y1 = ann['y1'] * height
y2 = ann['y2'] * height
annotations['bboxes'][idx, 0] = x1
annotations['bboxes'][idx, 1] = y1
annotations['bboxes'][idx, 2] = x2
annotations['bboxes'][idx, 3] = y2
annotations['labels'][idx] = cls_id
return annotations
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from ..preprocessing.generator import Generator
from ..utils.image import read_image_bgr
import os
import numpy as np
from six import raise_from
from PIL import Image
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
voc_classes = {
'aeroplane' : 0,
'bicycle' : 1,
'bird' : 2,
'boat' : 3,
'bottle' : 4,
'bus' : 5,
'car' : 6,
'cat' : 7,
'chair' : 8,
'cow' : 9,
'diningtable' : 10,
'dog' : 11,
'horse' : 12,
'motorbike' : 13,
'person' : 14,
'pottedplant' : 15,
'sheep' : 16,
'sofa' : 17,
'train' : 18,
'tvmonitor' : 19
}
def _findNode(parent, name, debug_name=None, parse=None):
if debug_name is None:
debug_name = name
result = parent.find(name)
if result is None:
raise ValueError('missing element \'{}\''.format(debug_name))
if parse is not None:
try:
return parse(result.text)
except ValueError as e:
raise_from(ValueError('illegal value for \'{}\': {}'.format(debug_name, e)), None)
return result
class PascalVocGenerator(Generator):
""" Generate data for a Pascal VOC dataset.
See http://host.robots.ox.ac.uk/pascal/VOC/ for more information.
"""
def __init__(
self,
data_dir,
set_name,
classes=voc_classes,
image_extension='.jpg',
skip_truncated=False,
skip_difficult=False,
**kwargs
):
""" Initialize a Pascal VOC data generator.
Args
base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file).
csv_class_file: Path to the CSV classes file.
"""
self.data_dir = data_dir
self.set_name = set_name
self.classes = classes
self.image_names = [line.strip().split(None, 1)[0] for line in open(os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()]
self.image_extension = image_extension
self.skip_truncated = skip_truncated
self.skip_difficult = skip_difficult
self.labels = {}
for key, value in self.classes.items():
self.labels[value] = key
super(PascalVocGenerator, self).__init__(**kwargs)
def size(self):
""" Size of the dataset.
"""
return len(self.image_names)
def num_classes(self):
""" Number of classes in the dataset.
"""
return len(self.classes)
def has_label(self, label):
""" Return True if label is a known label.
"""
return label in self.labels
def has_name(self, name):
""" Returns True if name is a known class.
"""
return name in self.classes
def name_to_label(self, name):
""" Map name to label.
"""
return self.classes[name]
def label_to_name(self, label):
""" Map label to name.
"""
return self.labels[label]
def image_aspect_ratio(self, image_index):
""" Compute the aspect ratio for an image with image_index.
"""
path = os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
image = Image.open(path)
return float(image.width) / float(image.height)
def image_path(self, image_index):
""" Get the path to an image.
"""
return os.path.join(self.data_dir, 'JPEGImages', self.image_names[image_index] + self.image_extension)
def load_image(self, image_index):
""" Load an image at the image_index.
"""
return read_image_bgr(self.image_path(image_index))
def __parse_annotation(self, element):
""" Parse an annotation given an XML element.
"""
truncated = _findNode(element, 'truncated', parse=int)
difficult = _findNode(element, 'difficult', parse=int)
class_name = _findNode(element, 'name').text
if class_name not in self.classes:
raise ValueError('class name \'{}\' not found in classes: {}'.format(class_name, list(self.classes.keys())))
box = np.zeros((4,))
label = self.name_to_label(class_name)
bndbox = _findNode(element, 'bndbox')
box[0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1
box[1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1
box[2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1
box[3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1
return truncated, difficult, box, label
def __parse_annotations(self, xml_root):
""" Parse all annotations under the xml_root.
"""
annotations = {'labels': np.empty((len(xml_root.findall('object')),)), 'bboxes': np.empty((len(xml_root.findall('object')), 4))}
for i, element in enumerate(xml_root.iter('object')):
try:
truncated, difficult, box, label = self.__parse_annotation(element)
except ValueError as e:
raise_from(ValueError('could not parse object #{}: {}'.format(i, e)), None)
if truncated and self.skip_truncated:
continue
if difficult and self.skip_difficult:
continue
annotations['bboxes'][i, :] = box
annotations['labels'][i] = label
return annotations
def load_annotations(self, image_index):
""" Load annotations for an image_index.
"""
filename = self.image_names[image_index] + '.xml'
try:
tree = ET.parse(os.path.join(self.data_dir, 'Annotations', filename))
return self.__parse_annotations(tree.getroot())
except ET.ParseError as e:
raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None)
except ValueError as e:
raise_from(ValueError('invalid annotations file: {}: {}'.format(filename, e)), None)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
from tensorflow import keras
from ..utils.compute_overlap import compute_overlap
class AnchorParameters:
""" The parameteres that define how anchors are generated.
Args
sizes : List of sizes to use. Each size corresponds to one feature level.
strides : List of strides to use. Each stride correspond to one feature level.
ratios : List of ratios to use per location in a feature map.
scales : List of scales to use per location in a feature map.
"""
def __init__(self, sizes, strides, ratios, scales):
self.sizes = sizes
self.strides = strides
self.ratios = ratios
self.scales = scales
def num_anchors(self):
return len(self.ratios) * len(self.scales)
"""
The default anchor parameters.
"""
AnchorParameters.default = AnchorParameters(
sizes = [32, 64, 128, 256, 512],
strides = [8, 16, 32, 64, 128],
ratios = np.array([0.5, 1, 2], keras.backend.floatx()),
scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)], keras.backend.floatx()),
)
def anchor_targets_bbox(
anchors,
image_group,
annotations_group,
num_classes,
negative_overlap=0.4,
positive_overlap=0.5
):
""" Generate anchor targets for bbox detection.
Args
anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
image_group: List of BGR images.
annotations_group: List of annotation dictionaries with each annotation containing 'labels' and 'bboxes' of an image.
num_classes: Number of classes to predict.
mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image.
negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).
Returns
labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1),
where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg).
regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1),
where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the
last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg).
"""
assert(len(image_group) == len(annotations_group)), "The length of the images and annotations need to be equal."
assert(len(annotations_group) > 0), "No data received to compute anchor targets for."
for annotations in annotations_group:
assert('bboxes' in annotations), "Annotations should contain bboxes."
assert('labels' in annotations), "Annotations should contain labels."
batch_size = len(image_group)
regression_batch = np.zeros((batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx())
labels_batch = np.zeros((batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx())
# compute labels and regression targets
for index, (image, annotations) in enumerate(zip(image_group, annotations_group)):
if annotations['bboxes'].shape[0]:
# obtain indices of gt annotations with the greatest overlap
positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations(anchors, annotations['bboxes'], negative_overlap, positive_overlap)
labels_batch[index, ignore_indices, -1] = -1
labels_batch[index, positive_indices, -1] = 1
regression_batch[index, ignore_indices, -1] = -1
regression_batch[index, positive_indices, -1] = 1
# compute target class labels
labels_batch[index, positive_indices, annotations['labels'][argmax_overlaps_inds[positive_indices]].astype(int)] = 1
regression_batch[index, :, :-1] = bbox_transform(anchors, annotations['bboxes'][argmax_overlaps_inds, :])
# ignore annotations outside of image
if image.shape:
anchors_centers = np.vstack([(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T
indices = np.logical_or(anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0])
labels_batch[index, indices, -1] = -1
regression_batch[index, indices, -1] = -1
return regression_batch, labels_batch
def compute_gt_annotations(
anchors,
annotations,
negative_overlap=0.4,
positive_overlap=0.5
):
""" Obtain indices of gt annotations with the greatest overlap.
Args
anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2).
annotations: np.array of shape (N, 5) for (x1, y1, x2, y2, label).
negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative).
positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive).
Returns
positive_indices: indices of positive anchors
ignore_indices: indices of ignored anchors
argmax_overlaps_inds: ordered overlaps indices
"""
overlaps = compute_overlap(anchors.astype(np.float64), annotations.astype(np.float64))
argmax_overlaps_inds = np.argmax(overlaps, axis=1)
max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds]
# assign "dont care" labels
positive_indices = max_overlaps >= positive_overlap
ignore_indices = (max_overlaps > negative_overlap) & ~positive_indices
return positive_indices, ignore_indices, argmax_overlaps_inds
def layer_shapes(image_shape, model):
"""Compute layer shapes given input image shape and the model.
Args
image_shape: The shape of the image.
model: The model to use for computing how the image shape is transformed in the pyramid.
Returns
A dictionary mapping layer names to image shapes.
"""
shape = {
model.layers[0].name: (None,) + image_shape,
}
for layer in model.layers[1:]:
nodes = layer._inbound_nodes
for node in nodes:
if isinstance(node.inbound_layers, keras.layers.Layer):
inputs = [shape[node.inbound_layers.name]]
else:
inputs = [shape[lr.name] for lr in node.inbound_layers]
if not inputs:
continue
shape[layer.name] = layer.compute_output_shape(inputs[0] if len(inputs) == 1 else inputs)
return shape
def make_shapes_callback(model):
""" Make a function for getting the shape of the pyramid levels.
"""
def get_shapes(image_shape, pyramid_levels):
shape = layer_shapes(image_shape, model)
image_shapes = [shape["P{}".format(level)][1:3] for level in pyramid_levels]
return image_shapes
return get_shapes
def guess_shapes(image_shape, pyramid_levels):
"""Guess shapes based on pyramid levels.
Args
image_shape: The shape of the image.
pyramid_levels: A list of what pyramid levels are used.
Returns
A list of image shapes at each pyramid level.
"""
image_shape = np.array(image_shape[:2])
image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
return image_shapes
def anchors_for_shape(
image_shape,
pyramid_levels=None,
anchor_params=None,
shapes_callback=None,
):
""" Generators anchors for a given shape.
Args
image_shape: The shape of the image.
pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]).
anchor_params: Struct containing anchor parameters. If None, default values are used.
shapes_callback: Function to call for getting the shape of the image at different pyramid levels.
Returns
np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors.
"""
if pyramid_levels is None:
pyramid_levels = [3, 4, 5, 6, 7]
if anchor_params is None:
anchor_params = AnchorParameters.default
if shapes_callback is None:
shapes_callback = guess_shapes
image_shapes = shapes_callback(image_shape, pyramid_levels)
# compute anchors over all pyramid levels
all_anchors = np.zeros((0, 4))
for idx, p in enumerate(pyramid_levels):
anchors = generate_anchors(
base_size=anchor_params.sizes[idx],
ratios=anchor_params.ratios,
scales=anchor_params.scales
)
shifted_anchors = shift(image_shapes[idx], anchor_params.strides[idx], anchors)
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
return all_anchors
def shift(shape, stride, anchors):
""" Produce shifted anchors based on shape of the map and stride size.
Args
shape : Shape to shift the anchors over.
stride : Stride to shift the anchors with over the shape.
anchors: The anchors to apply at each location.
"""
# create a grid starting from half stride from the top left corner
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
shift_x, shift_y = np.meshgrid(shift_x, shift_y)
shifts = np.vstack((
shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel()
)).transpose()
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
A = anchors.shape[0]
K = shifts.shape[0]
all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
all_anchors = all_anchors.reshape((K * A, 4))
return all_anchors
def generate_anchors(base_size=16, ratios=None, scales=None):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales w.r.t. a reference window.
"""
if ratios is None:
ratios = AnchorParameters.default.ratios
if scales is None:
scales = AnchorParameters.default.scales
num_anchors = len(ratios) * len(scales)
# initialize output anchors
anchors = np.zeros((num_anchors, 4))
# scale base_size
anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
# compute areas of anchors
areas = anchors[:, 2] * anchors[:, 3]
# correct for ratios
anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
# transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
return anchors
def bbox_transform(anchors, gt_boxes, mean=None, std=None):
"""Compute bounding-box regression targets for an image."""
# The Mean and std are calculated from COCO dataset.
# Bounding box normalization was firstly introduced in the Fast R-CNN paper.
# See https://github.com/fizyr/keras-retinanet/issues/1273#issuecomment-585828825 for more details
if mean is None:
mean = np.array([0, 0, 0, 0])
if std is None:
std = np.array([0.2, 0.2, 0.2, 0.2])
if isinstance(mean, (list, tuple)):
mean = np.array(mean)
elif not isinstance(mean, np.ndarray):
raise ValueError('Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean)))
if isinstance(std, (list, tuple)):
std = np.array(std)
elif not isinstance(std, np.ndarray):
raise ValueError('Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std)))
anchor_widths = anchors[:, 2] - anchors[:, 0]
anchor_heights = anchors[:, 3] - anchors[:, 1]
# According to the information provided by a keras-retinanet author, they got marginally better results using
# the following way of bounding box parametrization.
# See https://github.com/fizyr/keras-retinanet/issues/1273#issuecomment-585828825 for more details
targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths
targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights
targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths
targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights
targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2))
targets = targets.T
targets = (targets - mean) / std
return targets
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from pycocotools.cocoeval import COCOeval
from tensorflow import keras
import numpy as np
import json
import progressbar
assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."
def evaluate_coco(generator, model, threshold=0.05):
""" Use the pycocotools to evaluate a COCO model on a dataset.
Args
generator : The generator for generating the evaluation data.
model : The model to evaluate.
threshold : The score threshold to use.
"""
# start collecting results
results = []
image_ids = []
for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '):
image = generator.load_image(index)
image = generator.preprocess_image(image)
image, scale = generator.resize_image(image)
if keras.backend.image_data_format() == 'channels_first':
image = image.transpose((2, 0, 1))
# run network
boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))
# correct boxes for image scale
boxes /= scale
# change to (x, y, w, h) (MS COCO standard)
boxes[:, :, 2] -= boxes[:, :, 0]
boxes[:, :, 3] -= boxes[:, :, 1]
# compute predicted labels and scores
for box, score, label in zip(boxes[0], scores[0], labels[0]):
# scores are sorted, so we can break
if score < threshold:
break
# append detection for each positively labeled class
image_result = {
'image_id' : generator.image_ids[index],
'category_id' : generator.label_to_coco_label(label),
'score' : float(score),
'bbox' : box.tolist(),
}
# append detection to results
results.append(image_result)
# append image to list of processed images
image_ids.append(generator.image_ids[index])
if not len(results):
return
# write output
json.dump(results, open('{}_bbox_results.json'.format(generator.set_name), 'w'), indent=4)
json.dump(image_ids, open('{}_processed_image_ids.json'.format(generator.set_name), 'w'), indent=4)
# load results in COCO evaluation tool
coco_true = generator.coco
coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(generator.set_name))
# run COCO evaluation
coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
return coco_eval.stats
import warnings
def label_color(label):
""" Return a color from a set of predefined colors. Contains 80 colors in total.
Args
label: The label to get the color for.
Returns
A list of three values representing a RGB color.
If no color is defined for a certain label, the color green is returned and a warning is printed.
"""
if label < len(colors):
return colors[label]
else:
warnings.warn('Label {} has no color, returning default.'.format(label))
return (0, 255, 0)
"""
Generated using:
```
colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)]
shuffle(colors)
pprint(colors)
```
"""
colors = [
[31 , 0 , 255] ,
[0 , 159 , 255] ,
[255 , 95 , 0] ,
[255 , 19 , 0] ,
[255 , 0 , 0] ,
[255 , 38 , 0] ,
[0 , 255 , 25] ,
[255 , 0 , 133] ,
[255 , 172 , 0] ,
[108 , 0 , 255] ,
[0 , 82 , 255] ,
[0 , 255 , 6] ,
[255 , 0 , 152] ,
[223 , 0 , 255] ,
[12 , 0 , 255] ,
[0 , 255 , 178] ,
[108 , 255 , 0] ,
[184 , 0 , 255] ,
[255 , 0 , 76] ,
[146 , 255 , 0] ,
[51 , 0 , 255] ,
[0 , 197 , 255] ,
[255 , 248 , 0] ,
[255 , 0 , 19] ,
[255 , 0 , 38] ,
[89 , 255 , 0] ,
[127 , 255 , 0] ,
[255 , 153 , 0] ,
[0 , 255 , 255] ,
[0 , 255 , 216] ,
[0 , 255 , 121] ,
[255 , 0 , 248] ,
[70 , 0 , 255] ,
[0 , 255 , 159] ,
[0 , 216 , 255] ,
[0 , 6 , 255] ,
[0 , 63 , 255] ,
[31 , 255 , 0] ,
[255 , 57 , 0] ,
[255 , 0 , 210] ,
[0 , 255 , 102] ,
[242 , 255 , 0] ,
[255 , 191 , 0] ,
[0 , 255 , 63] ,
[255 , 0 , 95] ,
[146 , 0 , 255] ,
[184 , 255 , 0] ,
[255 , 114 , 0] ,
[0 , 255 , 235] ,
[255 , 229 , 0] ,
[0 , 178 , 255] ,
[255 , 0 , 114] ,
[255 , 0 , 57] ,
[0 , 140 , 255] ,
[0 , 121 , 255] ,
[12 , 255 , 0] ,
[255 , 210 , 0] ,
[0 , 255 , 44] ,
[165 , 255 , 0] ,
[0 , 25 , 255] ,
[0 , 255 , 140] ,
[0 , 101 , 255] ,
[0 , 255 , 82] ,
[223 , 255 , 0] ,
[242 , 0 , 255] ,
[89 , 0 , 255] ,
[165 , 0 , 255] ,
[70 , 255 , 0] ,
[255 , 0 , 172] ,
[255 , 76 , 0] ,
[203 , 255 , 0] ,
[204 , 0 , 255] ,
[255 , 0 , 229] ,
[255 , 133 , 0] ,
[127 , 0 , 255] ,
[0 , 235 , 255] ,
[0 , 255 , 197] ,
[255 , 0 , 191] ,
[0 , 44 , 255] ,
[50 , 255 , 0]
]
This diff could not be displayed because it is too large.
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Sergey Karayev
# --------------------------------------------------------
cimport cython
import numpy as np
cimport numpy as np
def compute_overlap(
np.ndarray[double, ndim=2] boxes,
np.ndarray[double, ndim=2] query_boxes
):
"""
Args
a: (N, 4) ndarray of float
b: (K, 4) ndarray of float
Returns
overlaps: (N, K) ndarray of overlap between boxes and query_boxes
"""
cdef unsigned int N = boxes.shape[0]
cdef unsigned int K = query_boxes.shape[0]
cdef np.ndarray[double, ndim=2] overlaps = np.zeros((N, K), dtype=np.float64)
cdef double iw, ih, box_area
cdef double ua
cdef unsigned int k, n
for k in range(K):
box_area = (
(query_boxes[k, 2] - query_boxes[k, 0]) *
(query_boxes[k, 3] - query_boxes[k, 1])
)
for n in range(N):
iw = (
min(boxes[n, 2], query_boxes[k, 2]) -
max(boxes[n, 0], query_boxes[k, 0])
)
if iw > 0:
ih = (
min(boxes[n, 3], query_boxes[k, 3]) -
max(boxes[n, 1], query_boxes[k, 1])
)
if ih > 0:
ua = np.float64(
(boxes[n, 2] - boxes[n, 0]) *
(boxes[n, 3] - boxes[n, 1]) +
box_area - iw * ih
)
overlaps[n, k] = iw * ih / ua
return overlaps
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import configparser
import numpy as np
from tensorflow import keras
from ..utils.anchors import AnchorParameters
def read_config_file(config_path):
config = configparser.ConfigParser()
with open(config_path, 'r') as file:
config.read_file(file)
assert 'anchor_parameters' in config, \
"Malformed config file. Verify that it contains the anchor_parameters section."
config_keys = set(config['anchor_parameters'])
default_keys = set(AnchorParameters.default.__dict__.keys())
assert config_keys <= default_keys, \
"Malformed config file. These keys are not valid: {}".format(config_keys - default_keys)
if 'pyramid_levels' in config:
assert('levels' in config['pyramid_levels']), "pyramid levels specified by levels key"
return config
def parse_anchor_parameters(config):
ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split(' '))), keras.backend.floatx())
scales = np.array(list(map(float, config['anchor_parameters']['scales'].split(' '))), keras.backend.floatx())
sizes = list(map(int, config['anchor_parameters']['sizes'].split(' ')))
strides = list(map(int, config['anchor_parameters']['strides'].split(' ')))
assert (len(sizes) == len(strides)), "sizes and strides should have an equal number of values"
return AnchorParameters(sizes, strides, ratios, scales)
def parse_pyramid_levels(config):
levels = list(map(int, config['pyramid_levels']['levels'].split(' ')))
return levels
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from .anchors import compute_overlap
from .visualization import draw_detections, draw_annotations
from tensorflow import keras
import numpy as np
import os
import time
import cv2
import progressbar
assert(callable(progressbar.progressbar)), "Using wrong progressbar module, install 'progressbar2' instead."
def _compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves.
Code originally from https://github.com/rbgirshick/py-faster-rcnn.
# Arguments
recall: The recall curve (list).
precision: The precision curve (list).
# Returns
The average precision as computed in py-faster-rcnn.
"""
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], recall, [1.]))
mpre = np.concatenate(([0.], precision, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None):
""" Get the detections from the model using the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
# Arguments
generator : The generator used to run images through the model.
model : The model to run on the images.
score_threshold : The score confidence threshold to use.
max_detections : The maximum number of detections to use per image.
save_path : The path to save the images with visualized detections to.
# Returns
A list of lists containing the detections for each image in the generator.
"""
all_detections = [[None for i in range(generator.num_classes()) if generator.has_label(i)] for j in range(generator.size())]
all_inferences = [None for i in range(generator.size())]
for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '):
raw_image = generator.load_image(i)
image, scale = generator.resize_image(raw_image.copy())
image = generator.preprocess_image(image)
if keras.backend.image_data_format() == 'channels_first':
image = image.transpose((2, 0, 1))
# run network
start = time.time()
boxes, scores, labels = model.predict_on_batch(np.expand_dims(image, axis=0))[:3]
inference_time = time.time() - start
# correct boxes for image scale
boxes /= scale
# select indices which have a score above the threshold
indices = np.where(scores[0, :] > score_threshold)[0]
# select those scores
scores = scores[0][indices]
# find the order with which to sort the scores
scores_sort = np.argsort(-scores)[:max_detections]
# select detections
image_boxes = boxes[0, indices[scores_sort], :]
image_scores = scores[scores_sort]
image_labels = labels[0, indices[scores_sort]]
image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
if save_path is not None:
draw_annotations(raw_image, generator.load_annotations(i), label_to_name=generator.label_to_name)
draw_detections(raw_image, image_boxes, image_scores, image_labels, label_to_name=generator.label_to_name, score_threshold=score_threshold)
cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image)
# copy detections to all_detections
for label in range(generator.num_classes()):
if not generator.has_label(label):
continue
all_detections[i][label] = image_detections[image_detections[:, -1] == label, :-1]
all_inferences[i] = inference_time
return all_detections, all_inferences
def _get_annotations(generator):
""" Get the ground truth annotations from the generator.
The result is a list of lists such that the size is:
all_detections[num_images][num_classes] = annotations[num_detections, 5]
# Arguments
generator : The generator used to retrieve ground truth annotations.
# Returns
A list of lists containing the annotations for each image in the generator.
"""
all_annotations = [[None for i in range(generator.num_classes())] for j in range(generator.size())]
for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '):
# load the annotations
annotations = generator.load_annotations(i)
# copy detections to all_annotations
for label in range(generator.num_classes()):
if not generator.has_label(label):
continue
all_annotations[i][label] = annotations['bboxes'][annotations['labels'] == label, :].copy()
return all_annotations
def evaluate(
generator,
model,
iou_threshold=0.5,
score_threshold=0.05,
max_detections=100,
save_path=None
):
""" Evaluate a given dataset using a given model.
# Arguments
generator : The generator that represents the dataset to evaluate.
model : The model to evaluate.
iou_threshold : The threshold used to consider when a detection is positive or negative.
score_threshold : The score confidence threshold to use for detections.
max_detections : The maximum number of detections to use per image.
save_path : The path to save images with visualized detections to.
# Returns
A dict mapping class names to mAP scores.
"""
# gather all detections and annotations
all_detections, all_inferences = _get_detections(generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
all_annotations = _get_annotations(generator)
average_precisions = {}
# all_detections = pickle.load(open('all_detections.pkl', 'rb'))
# all_annotations = pickle.load(open('all_annotations.pkl', 'rb'))
# pickle.dump(all_detections, open('all_detections.pkl', 'wb'))
# pickle.dump(all_annotations, open('all_annotations.pkl', 'wb'))
# process detections and annotations
for label in range(generator.num_classes()):
if not generator.has_label(label):
continue
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
num_annotations = 0.0
for i in range(generator.size()):
detections = all_detections[i][label]
annotations = all_annotations[i][label]
num_annotations += annotations.shape[0]
detected_annotations = []
for d in detections:
scores = np.append(scores, d[4])
if annotations.shape[0] == 0:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
continue
overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
assigned_annotation = np.argmax(overlaps, axis=1)
max_overlap = overlaps[0, assigned_annotation]
if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
false_positives = np.append(false_positives, 0)
true_positives = np.append(true_positives, 1)
detected_annotations.append(assigned_annotation)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
# no annotations -> AP for this class is 0 (is this correct?)
if num_annotations == 0:
average_precisions[label] = 0, 0
continue
# sort by score
indices = np.argsort(-scores)
false_positives = false_positives[indices]
true_positives = true_positives[indices]
# compute false positives and true positives
false_positives = np.cumsum(false_positives)
true_positives = np.cumsum(true_positives)
# compute recall and precision
recall = true_positives / num_annotations
precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
# compute average precision
average_precision = _compute_ap(recall, precision)
average_precisions[label] = average_precision, num_annotations
# inference time
inference_time = np.sum(all_inferences) / generator.size()
return average_precisions, inference_time
"""
Copyright 2017-2019 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import tensorflow as tf
def setup_gpu(gpu_id):
try:
visible_gpu_indices = [int(id) for id in gpu_id.split(',')]
available_gpus = tf.config.list_physical_devices('GPU')
visible_gpus = [gpu for idx, gpu in enumerate(available_gpus) if idx in visible_gpu_indices]
if visible_gpus:
try:
# Currently, memory growth needs to be the same across GPUs.
for gpu in available_gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# Use only the selcted gpu.
tf.config.set_visible_devices(visible_gpus, 'GPU')
except RuntimeError as e:
# Visible devices must be set before GPUs have been initialized.
print(e)
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(available_gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
else:
tf.config.set_visible_devices([], 'GPU')
except ValueError:
tf.config.set_visible_devices([], 'GPU')
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import division
import numpy as np
import cv2
from PIL import Image
from .transform import change_transform_origin
def read_image_bgr(path):
""" Read an image in BGR format.
Args
path: Path to the image.
"""
# We deliberately don't use cv2.imread here, since it gives no feedback on errors while reading the image.
image = np.ascontiguousarray(Image.open(path).convert('RGB'))
return image[:, :, ::-1]
def preprocess_image(x, mode='caffe'):
""" Preprocess an image by subtracting the ImageNet mean.
Args
x: np.array of shape (None, None, 3) or (3, None, None).
mode: One of "caffe" or "tf".
- caffe: will zero-center each color channel with
respect to the ImageNet dataset, without scaling.
- tf: will scale pixels between -1 and 1, sample-wise.
Returns
The input with the ImageNet mean subtracted.
"""
# mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py"
# except for converting RGB -> BGR since we assume BGR already
# covert always to float32 to keep compatibility with opencv
x = x.astype(np.float32)
if mode == 'tf':
x /= 127.5
x -= 1.
elif mode == 'caffe':
x -= [103.939, 116.779, 123.68]
return x
def adjust_transform_for_image(transform, image, relative_translation):
""" Adjust a transformation for a specific image.
The translation of the matrix will be scaled with the size of the image.
The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image.
"""
height, width, channels = image.shape
result = transform
# Scale the translation with the image size if specified.
if relative_translation:
result[0:2, 2] *= [width, height]
# Move the origin of transformation.
result = change_transform_origin(transform, (0.5 * width, 0.5 * height))
return result
class TransformParameters:
""" Struct holding parameters determining how to apply a transformation to an image.
Args
fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap'
interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4'
cval: Fill value to use with fill_mode='constant'
relative_translation: If true (the default), interpret translation as a factor of the image size.
If false, interpret it as absolute pixels.
"""
def __init__(
self,
fill_mode = 'nearest',
interpolation = 'linear',
cval = 0,
relative_translation = True,
):
self.fill_mode = fill_mode
self.cval = cval
self.interpolation = interpolation
self.relative_translation = relative_translation
def cvBorderMode(self):
if self.fill_mode == 'constant':
return cv2.BORDER_CONSTANT
if self.fill_mode == 'nearest':
return cv2.BORDER_REPLICATE
if self.fill_mode == 'reflect':
return cv2.BORDER_REFLECT_101
if self.fill_mode == 'wrap':
return cv2.BORDER_WRAP
def cvInterpolation(self):
if self.interpolation == 'nearest':
return cv2.INTER_NEAREST
if self.interpolation == 'linear':
return cv2.INTER_LINEAR
if self.interpolation == 'cubic':
return cv2.INTER_CUBIC
if self.interpolation == 'area':
return cv2.INTER_AREA
if self.interpolation == 'lanczos4':
return cv2.INTER_LANCZOS4
def apply_transform(matrix, image, params):
"""
Apply a transformation to an image.
The origin of transformation is at the top left corner of the image.
The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image.
Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space.
Args
matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply.
image: The image to transform.
params: The transform parameters (see TransformParameters)
"""
output = cv2.warpAffine(
image,
matrix[:2, :],
dsize = (image.shape[1], image.shape[0]),
flags = params.cvInterpolation(),
borderMode = params.cvBorderMode(),
borderValue = params.cval,
)
return output
def compute_resize_scale(image_shape, min_side=800, max_side=1333):
""" Compute an image scale such that the image size is constrained to min_side and max_side.
Args
min_side: The image's min side will be equal to min_side after resizing.
max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
Returns
A resizing scale.
"""
(rows, cols, _) = image_shape
smallest_side = min(rows, cols)
# rescale the image so the smallest side is min_side
scale = min_side / smallest_side
# check if the largest side is now greater than max_side, which can happen
# when images have a large aspect ratio
largest_side = max(rows, cols)
if largest_side * scale > max_side:
scale = max_side / largest_side
return scale
def resize_image(img, min_side=800, max_side=1333):
""" Resize an image such that the size is constrained to min_side and max_side.
Args
min_side: The image's min side will be equal to min_side after resizing.
max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side.
Returns
A resized image.
"""
# compute scale to resize the image
scale = compute_resize_scale(img.shape, min_side=min_side, max_side=max_side)
# resize the image with the computed scale
img = cv2.resize(img, None, fx=scale, fy=scale)
return img, scale
def _uniform(val_range):
""" Uniformly sample from the given range.
Args
val_range: A pair of lower and upper bound.
"""
return np.random.uniform(val_range[0], val_range[1])
def _check_range(val_range, min_val=None, max_val=None):
""" Check whether the range is a valid range.
Args
val_range: A pair of lower and upper bound.
min_val: Minimal value for the lower bound.
max_val: Maximal value for the upper bound.
"""
if val_range[0] > val_range[1]:
raise ValueError('interval lower bound > upper bound')
if min_val is not None and val_range[0] < min_val:
raise ValueError('invalid interval lower bound')
if max_val is not None and val_range[1] > max_val:
raise ValueError('invalid interval upper bound')
def _clip(image):
"""
Clip and convert an image to np.uint8.
Args
image: Image to clip.
"""
return np.clip(image, 0, 255).astype(np.uint8)
class VisualEffect:
""" Struct holding parameters and applying image color transformation.
Args
contrast_factor: A factor for adjusting contrast. Should be between 0 and 3.
brightness_delta: Brightness offset between -1 and 1 added to the pixel values.
hue_delta: Hue offset between -1 and 1 added to the hue channel.
saturation_factor: A factor multiplying the saturation values of each pixel.
"""
def __init__(
self,
contrast_factor,
brightness_delta,
hue_delta,
saturation_factor,
):
self.contrast_factor = contrast_factor
self.brightness_delta = brightness_delta
self.hue_delta = hue_delta
self.saturation_factor = saturation_factor
def __call__(self, image):
""" Apply a visual effect on the image.
Args
image: Image to adjust
"""
if self.contrast_factor:
image = adjust_contrast(image, self.contrast_factor)
if self.brightness_delta:
image = adjust_brightness(image, self.brightness_delta)
if self.hue_delta or self.saturation_factor:
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
if self.hue_delta:
image = adjust_hue(image, self.hue_delta)
if self.saturation_factor:
image = adjust_saturation(image, self.saturation_factor)
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
return image
def random_visual_effect_generator(
contrast_range=(0.9, 1.1),
brightness_range=(-.1, .1),
hue_range=(-0.05, 0.05),
saturation_range=(0.95, 1.05)
):
""" Generate visual effect parameters uniformly sampled from the given intervals.
Args
contrast_factor: A factor interval for adjusting contrast. Should be between 0 and 3.
brightness_delta: An interval between -1 and 1 for the amount added to the pixels.
hue_delta: An interval between -1 and 1 for the amount added to the hue channel.
The values are rotated if they exceed 180.
saturation_factor: An interval for the factor multiplying the saturation values of each
pixel.
"""
_check_range(contrast_range, 0)
_check_range(brightness_range, -1, 1)
_check_range(hue_range, -1, 1)
_check_range(saturation_range, 0)
def _generate():
while True:
yield VisualEffect(
contrast_factor=_uniform(contrast_range),
brightness_delta=_uniform(brightness_range),
hue_delta=_uniform(hue_range),
saturation_factor=_uniform(saturation_range),
)
return _generate()
def adjust_contrast(image, factor):
""" Adjust contrast of an image.
Args
image: Image to adjust.
factor: A factor for adjusting contrast.
"""
mean = image.mean(axis=0).mean(axis=0)
return _clip((image - mean) * factor + mean)
def adjust_brightness(image, delta):
""" Adjust brightness of an image
Args
image: Image to adjust.
delta: Brightness offset between -1 and 1 added to the pixel values.
"""
return _clip(image + delta * 255)
def adjust_hue(image, delta):
""" Adjust hue of an image.
Args
image: Image to adjust.
delta: An interval between -1 and 1 for the amount added to the hue channel.
The values are rotated if they exceed 180.
"""
image[..., 0] = np.mod(image[..., 0] + delta * 180, 180)
return image
def adjust_saturation(image, factor):
""" Adjust saturation of an image.
Args
image: Image to adjust.
factor: An interval for the factor multiplying the saturation values of each pixel.
"""
image[..., 1] = np.clip(image[..., 1] * factor, 0 , 255)
return image
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
def freeze(model):
""" Set all layers in a model to non-trainable.
The weights for these layers will not be updated during training.
This function modifies the given model in-place,
but it also returns the modified model to allow easy chaining with other functions.
"""
for layer in model.layers:
layer.trainable = False
return model
"""
Copyright 2017-2019 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import print_function
import tensorflow as tf
import sys
MINIMUM_TF_VERSION = 2, 3, 0
BLACKLISTED_TF_VERSIONS = []
def tf_version():
""" Get the Tensorflow version.
Returns
tuple of (major, minor, patch).
"""
return tuple(map(int, tf.version.VERSION.split('-')[0].split('.')))
def tf_version_ok(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
""" Check if the current Tensorflow version is higher than the minimum version.
"""
return tf_version() >= minimum_tf_version and tf_version() not in blacklisted
def assert_tf_version(minimum_tf_version=MINIMUM_TF_VERSION, blacklisted=BLACKLISTED_TF_VERSIONS):
""" Assert that the Tensorflow version is up to date.
"""
detected = tf.version.VERSION
required = '.'.join(map(str, minimum_tf_version))
assert(tf_version_ok(minimum_tf_version, blacklisted)), 'You are using tensorflow version {}. The minimum required version is {} (blacklisted: {}).'.format(detected, required, blacklisted)
def check_tf_version():
""" Check that the Tensorflow version is up to date. If it isn't, print an error message and exit the script.
"""
try:
assert_tf_version()
except AssertionError as e:
print(e, file=sys.stderr)
sys.exit(1)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
DEFAULT_PRNG = np.random
def colvec(*args):
""" Create a numpy array representing a column vector. """
return np.array([args]).T
def transform_aabb(transform, aabb):
""" Apply a transformation to an axis aligned bounding box.
The result is a new AABB in the same coordinate system as the original AABB.
The new AABB contains all corner points of the original AABB after applying the given transformation.
Args
transform: The transformation to apply.
x1: The minimum x value of the AABB.
y1: The minimum y value of the AABB.
x2: The maximum x value of the AABB.
y2: The maximum y value of the AABB.
Returns
The new AABB as tuple (x1, y1, x2, y2)
"""
x1, y1, x2, y2 = aabb
# Transform all 4 corners of the AABB.
points = transform.dot([
[x1, x2, x1, x2],
[y1, y2, y2, y1],
[1, 1, 1, 1 ],
])
# Extract the min and max corners again.
min_corner = points.min(axis=1)
max_corner = points.max(axis=1)
return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]]
def _random_vector(min, max, prng=DEFAULT_PRNG):
""" Construct a random vector between min and max.
Args
min: the minimum value for each component
max: the maximum value for each component
"""
min = np.array(min)
max = np.array(max)
assert min.shape == max.shape
assert len(min.shape) == 1
return prng.uniform(min, max)
def rotation(angle):
""" Construct a homogeneous 2D rotation matrix.
Args
angle: the angle in radians
Returns
the rotation matrix as 3 by 3 numpy array
"""
return np.array([
[np.cos(angle), -np.sin(angle), 0],
[np.sin(angle), np.cos(angle), 0],
[0, 0, 1]
])
def random_rotation(min, max, prng=DEFAULT_PRNG):
""" Construct a random rotation between -max and max.
Args
min: a scalar for the minimum absolute angle in radians
max: a scalar for the maximum absolute angle in radians
prng: the pseudo-random number generator to use.
Returns
a homogeneous 3 by 3 rotation matrix
"""
return rotation(prng.uniform(min, max))
def translation(translation):
""" Construct a homogeneous 2D translation matrix.
# Arguments
translation: the translation 2D vector
# Returns
the translation matrix as 3 by 3 numpy array
"""
return np.array([
[1, 0, translation[0]],
[0, 1, translation[1]],
[0, 0, 1]
])
def random_translation(min, max, prng=DEFAULT_PRNG):
""" Construct a random 2D translation between min and max.
Args
min: a 2D vector with the minimum translation for each dimension
max: a 2D vector with the maximum translation for each dimension
prng: the pseudo-random number generator to use.
Returns
a homogeneous 3 by 3 translation matrix
"""
return translation(_random_vector(min, max, prng))
def shear(angle):
""" Construct a homogeneous 2D shear matrix.
Args
angle: the shear angle in radians
Returns
the shear matrix as 3 by 3 numpy array
"""
return np.array([
[1, -np.sin(angle), 0],
[0, np.cos(angle), 0],
[0, 0, 1]
])
def random_shear(min, max, prng=DEFAULT_PRNG):
""" Construct a random 2D shear matrix with shear angle between -max and max.
Args
min: the minimum shear angle in radians.
max: the maximum shear angle in radians.
prng: the pseudo-random number generator to use.
Returns
a homogeneous 3 by 3 shear matrix
"""
return shear(prng.uniform(min, max))
def scaling(factor):
""" Construct a homogeneous 2D scaling matrix.
Args
factor: a 2D vector for X and Y scaling
Returns
the zoom matrix as 3 by 3 numpy array
"""
return np.array([
[factor[0], 0, 0],
[0, factor[1], 0],
[0, 0, 1]
])
def random_scaling(min, max, prng=DEFAULT_PRNG):
""" Construct a random 2D scale matrix between -max and max.
Args
min: a 2D vector containing the minimum scaling factor for X and Y.
min: a 2D vector containing The maximum scaling factor for X and Y.
prng: the pseudo-random number generator to use.
Returns
a homogeneous 3 by 3 scaling matrix
"""
return scaling(_random_vector(min, max, prng))
def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG):
""" Construct a transformation randomly containing X/Y flips (or not).
Args
flip_x_chance: The chance that the result will contain a flip along the X axis.
flip_y_chance: The chance that the result will contain a flip along the Y axis.
prng: The pseudo-random number generator to use.
Returns
a homogeneous 3 by 3 transformation matrix
"""
flip_x = prng.uniform(0, 1) < flip_x_chance
flip_y = prng.uniform(0, 1) < flip_y_chance
# 1 - 2 * bool gives 1 for False and -1 for True.
return scaling((1 - 2 * flip_x, 1 - 2 * flip_y))
def change_transform_origin(transform, center):
""" Create a new transform representing the same transformation,
only with the origin of the linear part changed.
Args
transform: the transformation matrix
center: the new origin of the transformation
Returns
translate(center) * transform * translate(-center)
"""
center = np.array(center)
return np.linalg.multi_dot([translation(center), transform, translation(-center)])
def random_transform(
min_rotation=0,
max_rotation=0,
min_translation=(0, 0),
max_translation=(0, 0),
min_shear=0,
max_shear=0,
min_scaling=(1, 1),
max_scaling=(1, 1),
flip_x_chance=0,
flip_y_chance=0,
prng=DEFAULT_PRNG
):
""" Create a random transformation.
The transformation consists of the following operations in this order (from left to right):
* rotation
* translation
* shear
* scaling
* flip x (if applied)
* flip y (if applied)
Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
the translation directly as pixel distances instead.
Args
min_rotation: The minimum rotation in radians for the transform as scalar.
max_rotation: The maximum rotation in radians for the transform as scalar.
min_translation: The minimum translation for the transform as 2D column vector.
max_translation: The maximum translation for the transform as 2D column vector.
min_shear: The minimum shear angle for the transform in radians.
max_shear: The maximum shear angle for the transform in radians.
min_scaling: The minimum scaling for the transform as 2D column vector.
max_scaling: The maximum scaling for the transform as 2D column vector.
flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction.
flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction.
prng: The pseudo-random number generator to use.
"""
return np.linalg.multi_dot([
random_rotation(min_rotation, max_rotation, prng),
random_translation(min_translation, max_translation, prng),
random_shear(min_shear, max_shear, prng),
random_scaling(min_scaling, max_scaling, prng),
random_flip(flip_x_chance, flip_y_chance, prng)
])
def random_transform_generator(prng=None, **kwargs):
""" Create a random transform generator.
Uses a dedicated, newly created, properly seeded PRNG by default instead of the global DEFAULT_PRNG.
The transformation consists of the following operations in this order (from left to right):
* rotation
* translation
* shear
* scaling
* flip x (if applied)
* flip y (if applied)
Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation
as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width.
Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret
the translation directly as pixel distances instead.
Args
min_rotation: The minimum rotation in radians for the transform as scalar.
max_rotation: The maximum rotation in radians for the transform as scalar.
min_translation: The minimum translation for the transform as 2D column vector.
max_translation: The maximum translation for the transform as 2D column vector.
min_shear: The minimum shear angle for the transform in radians.
max_shear: The maximum shear angle for the transform in radians.
min_scaling: The minimum scaling for the transform as 2D column vector.
max_scaling: The maximum scaling for the transform as 2D column vector.
flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction.
flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction.
prng: The pseudo-random number generator to use.
"""
if prng is None:
# RandomState automatically seeds using the best available method.
prng = np.random.RandomState()
while True:
yield random_transform(prng=prng, **kwargs)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import cv2
import numpy as np
from .colors import label_color
def draw_box(image, box, color, thickness=2):
""" Draws a box on an image with a given color.
# Arguments
image : The image to draw on.
box : A list of 4 elements (x1, y1, x2, y2).
color : The color of the box.
thickness : The thickness of the lines to draw a box with.
"""
b = np.array(box).astype(int)
cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), color, thickness, cv2.LINE_AA)
def draw_caption(image, box, caption):
""" Draws a caption above the box in an image.
# Arguments
image : The image to draw on.
box : A list of 4 elements (x1, y1, x2, y2).
caption : String containing the text to draw.
"""
b = np.array(box).astype(int)
cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2)
cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
def draw_boxes(image, boxes, color, thickness=2):
""" Draws boxes on an image with a given color.
# Arguments
image : The image to draw on.
boxes : A [N, 4] matrix (x1, y1, x2, y2).
color : The color of the boxes.
thickness : The thickness of the lines to draw boxes with.
"""
for b in boxes:
draw_box(image, b, color, thickness=thickness)
def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5):
""" Draws detections in an image.
# Arguments
image : The image to draw on.
boxes : A [N, 4] matrix (x1, y1, x2, y2).
scores : A list of N classification scores.
labels : A list of N labels.
color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
label_to_name : (optional) Functor for mapping a label to a name.
score_threshold : Threshold used for determining what detections to draw.
"""
selection = np.where(scores > score_threshold)[0]
for i in selection:
c = color if color is not None else label_color(labels[i])
draw_box(image, boxes[i, :], color=c)
# draw labels
caption = (label_to_name(labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i])
draw_caption(image, boxes[i, :], caption)
def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None):
""" Draws annotations in an image.
# Arguments
image : The image to draw on.
annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]).
color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used.
label_to_name : (optional) Functor for mapping a label to a name.
"""
if isinstance(annotations, np.ndarray):
annotations = {'bboxes': annotations[:, :4], 'labels': annotations[:, 4]}
assert('bboxes' in annotations)
assert('labels' in annotations)
assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0])
for i in range(annotations['bboxes'].shape[0]):
label = annotations['labels'][i]
c = color if color is not None else label_color(label)
caption = '{}'.format(label_to_name(label) if label_to_name else label)
draw_caption(image, annotations['bboxes'][i], caption)
draw_box(image, annotations['bboxes'][i], color=c)
cython
keras-resnet==0.2.0
git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
h5py
keras
matplotlib
numpy>=1.14
opencv-python>=3.3.0
pillow
progressbar2
tensorflow>=2.3.0
# ignore:
# E201 whitespace after '['
# E202 whitespace before ']'
# E203 whitespace before ':'
# E221 multiple spaces before operator
# E241 multiple spaces after ','
# E251 unexpected spaces around keyword / parameter equals
# E501 line too long (85 > 79 characters)
# W504 line break after binary operator
[tool:pytest]
flake8-max-line-length = 100
flake8-ignore = E201 E202 E203 E221 E241 E251 E402 E501 W504
import setuptools
from setuptools.extension import Extension
from distutils.command.build_ext import build_ext as DistUtilsBuildExt
class BuildExtension(setuptools.Command):
description = DistUtilsBuildExt.description
user_options = DistUtilsBuildExt.user_options
boolean_options = DistUtilsBuildExt.boolean_options
help_options = DistUtilsBuildExt.help_options
def __init__(self, *args, **kwargs):
from setuptools.command.build_ext import build_ext as SetupToolsBuildExt
# Bypass __setatrr__ to avoid infinite recursion.
self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs)
def __getattr__(self, name):
return getattr(self._command, name)
def __setattr__(self, name, value):
setattr(self._command, name, value)
def initialize_options(self, *args, **kwargs):
return self._command.initialize_options(*args, **kwargs)
def finalize_options(self, *args, **kwargs):
ret = self._command.finalize_options(*args, **kwargs)
import numpy
self.include_dirs.append(numpy.get_include())
return ret
def run(self, *args, **kwargs):
return self._command.run(*args, **kwargs)
extensions = [
Extension(
'keras_retinanet.utils.compute_overlap',
['keras_retinanet/utils/compute_overlap.pyx']
),
]
setuptools.setup(
name = 'keras-retinanet',
version = '1.0.0',
description = 'Keras implementation of RetinaNet object detection.',
url = 'https://github.com/fizyr/keras-retinanet',
author = 'Hans Gaiser',
author_email = 'h.gaiser@fizyr.com',
maintainer = 'Hans Gaiser',
maintainer_email = 'h.gaiser@fizyr.com',
cmdclass = {'build_ext': BuildExtension},
packages = setuptools.find_packages(),
install_requires = ['keras-resnet==0.2.0', 'six', 'numpy', 'cython', 'Pillow', 'opencv-python', 'progressbar2'],
entry_points = {
'console_scripts': [
'retinanet-train=keras_retinanet.bin.train:main',
'retinanet-evaluate=keras_retinanet.bin.evaluate:main',
'retinanet-debug=keras_retinanet.bin.debug:main',
'retinanet-convert-model=keras_retinanet.bin.convert_model:main',
],
},
ext_modules = extensions,
setup_requires = ["cython>=0.28", "numpy>=1.14.0"]
)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
from tensorflow import keras
import keras_retinanet.backend
def test_bbox_transform_inv():
boxes = np.array([[
[100, 100, 200, 200],
[100, 100, 300, 300],
[100, 100, 200, 300],
[100, 100, 300, 200],
[80, 120, 200, 200],
[80, 120, 300, 300],
[80, 120, 200, 300],
[80, 120, 300, 200],
]])
boxes = keras.backend.variable(boxes)
deltas = np.array([[
[0 , 0 , 0 , 0 ],
[0 , 0.1, 0 , 0 ],
[-0.3, 0 , 0 , 0 ],
[0.2 , 0.2, 0 , 0 ],
[0 , 0 , 0.1 , 0 ],
[0 , 0 , 0 , -0.3],
[0 , 0 , 0.2 , 0.2 ],
[0.1 , 0.2, -0.3, 0.4 ],
]])
deltas = keras.backend.variable(deltas)
expected = np.array([[
[100 , 100 , 200 , 200 ],
[100 , 104 , 300 , 300 ],
[ 94 , 100 , 200 , 300 ],
[108 , 104 , 300 , 200 ],
[ 80 , 120 , 202.4 , 200 ],
[ 80 , 120 , 300 , 289.2],
[ 80 , 120 , 204.8 , 307.2],
[ 84.4, 123.2, 286.8 , 206.4]
]])
result = keras_retinanet.backend.bbox_transform_inv(boxes, deltas)
result = keras.backend.eval(result)
np.testing.assert_array_almost_equal(result, expected, decimal=2)
def test_shift():
shape = (2, 3)
stride = 8
anchors = np.array([
[-8, -8, 8, 8],
[-16, -16, 16, 16],
[-12, -12, 12, 12],
[-12, -16, 12, 16],
[-16, -12, 16, 12]
], dtype=keras.backend.floatx())
expected = [
# anchors for (0, 0)
[4 - 8, 4 - 8, 4 + 8, 4 + 8],
[4 - 16, 4 - 16, 4 + 16, 4 + 16],
[4 - 12, 4 - 12, 4 + 12, 4 + 12],
[4 - 12, 4 - 16, 4 + 12, 4 + 16],
[4 - 16, 4 - 12, 4 + 16, 4 + 12],
# anchors for (0, 1)
[12 - 8, 4 - 8, 12 + 8, 4 + 8],
[12 - 16, 4 - 16, 12 + 16, 4 + 16],
[12 - 12, 4 - 12, 12 + 12, 4 + 12],
[12 - 12, 4 - 16, 12 + 12, 4 + 16],
[12 - 16, 4 - 12, 12 + 16, 4 + 12],
# anchors for (0, 2)
[20 - 8, 4 - 8, 20 + 8, 4 + 8],
[20 - 16, 4 - 16, 20 + 16, 4 + 16],
[20 - 12, 4 - 12, 20 + 12, 4 + 12],
[20 - 12, 4 - 16, 20 + 12, 4 + 16],
[20 - 16, 4 - 12, 20 + 16, 4 + 12],
# anchors for (1, 0)
[4 - 8, 12 - 8, 4 + 8, 12 + 8],
[4 - 16, 12 - 16, 4 + 16, 12 + 16],
[4 - 12, 12 - 12, 4 + 12, 12 + 12],
[4 - 12, 12 - 16, 4 + 12, 12 + 16],
[4 - 16, 12 - 12, 4 + 16, 12 + 12],
# anchors for (1, 1)
[12 - 8, 12 - 8, 12 + 8, 12 + 8],
[12 - 16, 12 - 16, 12 + 16, 12 + 16],
[12 - 12, 12 - 12, 12 + 12, 12 + 12],
[12 - 12, 12 - 16, 12 + 12, 12 + 16],
[12 - 16, 12 - 12, 12 + 16, 12 + 12],
# anchors for (1, 2)
[20 - 8, 12 - 8, 20 + 8, 12 + 8],
[20 - 16, 12 - 16, 20 + 16, 12 + 16],
[20 - 12, 12 - 12, 20 + 12, 12 + 12],
[20 - 12, 12 - 16, 20 + 12, 12 + 16],
[20 - 16, 12 - 12, 20 + 16, 12 + 12],
]
result = keras_retinanet.backend.shift(shape, stride, anchors)
result = keras.backend.eval(result)
np.testing.assert_array_equal(result, expected)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import keras_retinanet.backend
import keras_retinanet.bin.train
from tensorflow import keras
import warnings
import pytest
@pytest.fixture(autouse=True)
def clear_session():
# run before test (do nothing)
yield
# run after test, clear keras session
keras.backend.clear_session()
def test_coco():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'coco',
'tests/test-data/coco',
])
def test_pascal():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'pascal',
'tests/test-data/pascal',
])
def test_csv():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'csv',
'tests/test-data/csv/annotations.csv',
'tests/test-data/csv/classes.csv',
])
def test_vgg():
# ignore warnings in this test
warnings.simplefilter('ignore')
# run training / evaluation
keras_retinanet.bin.train.main([
'--backbone=vgg16',
'--epochs=1',
'--steps=1',
'--no-weights',
'--no-snapshots',
'--freeze-backbone',
'coco',
'tests/test-data/coco',
])
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import keras_retinanet.backend
import keras_retinanet.layers
import numpy as np
class TestFilterDetections(object):
def test_simple(self):
# create simple FilterDetections layer
filter_detections_layer = keras_retinanet.layers.FilterDetections()
# create simple input
boxes = np.array([[
[0, 0, 10, 10],
[0, 0, 10, 10], # this will be suppressed
]], dtype=keras.backend.floatx())
boxes = keras.backend.constant(boxes)
classification = np.array([[
[0, 0.9], # this will be suppressed
[0, 1],
]], dtype=keras.backend.floatx())
classification = keras.backend.constant(classification)
# compute output
actual_boxes, actual_scores, actual_labels = filter_detections_layer.call([boxes, classification])
actual_boxes = keras.backend.eval(actual_boxes)
actual_scores = keras.backend.eval(actual_scores)
actual_labels = keras.backend.eval(actual_labels)
# define expected output
expected_boxes = -1 * np.ones((1, 300, 4), dtype=keras.backend.floatx())
expected_boxes[0, 0, :] = [0, 0, 10, 10]
expected_scores = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_scores[0, 0] = 1
expected_labels = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_labels[0, 0] = 1
# assert actual and expected are equal
np.testing.assert_array_equal(actual_boxes, expected_boxes)
np.testing.assert_array_equal(actual_scores, expected_scores)
np.testing.assert_array_equal(actual_labels, expected_labels)
def test_simple_with_other(self):
# create simple FilterDetections layer
filter_detections_layer = keras_retinanet.layers.FilterDetections()
# create simple input
boxes = np.array([[
[0, 0, 10, 10],
[0, 0, 10, 10], # this will be suppressed
]], dtype=keras.backend.floatx())
boxes = keras.backend.constant(boxes)
classification = np.array([[
[0, 0.9], # this will be suppressed
[0, 1],
]], dtype=keras.backend.floatx())
classification = keras.backend.constant(classification)
other = []
other.append(np.array([[
[0, 1234], # this will be suppressed
[0, 5678],
]], dtype=keras.backend.floatx()))
other.append(np.array([[
5678, # this will be suppressed
1234,
]], dtype=keras.backend.floatx()))
other = [keras.backend.constant(o) for o in other]
# compute output
actual = filter_detections_layer.call([boxes, classification] + other)
actual_boxes = keras.backend.eval(actual[0])
actual_scores = keras.backend.eval(actual[1])
actual_labels = keras.backend.eval(actual[2])
actual_other = [keras.backend.eval(a) for a in actual[3:]]
# define expected output
expected_boxes = -1 * np.ones((1, 300, 4), dtype=keras.backend.floatx())
expected_boxes[0, 0, :] = [0, 0, 10, 10]
expected_scores = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_scores[0, 0] = 1
expected_labels = -1 * np.ones((1, 300), dtype=keras.backend.floatx())
expected_labels[0, 0] = 1
expected_other = []
expected_other.append(-1 * np.ones((1, 300, 2), dtype=keras.backend.floatx()))
expected_other[-1][0, 0, :] = [0, 5678]
expected_other.append(-1 * np.ones((1, 300), dtype=keras.backend.floatx()))
expected_other[-1][0, 0] = 1234
# assert actual and expected are equal
np.testing.assert_array_equal(actual_boxes, expected_boxes)
np.testing.assert_array_equal(actual_scores, expected_scores)
np.testing.assert_array_equal(actual_labels, expected_labels)
for a, e in zip(actual_other, expected_other):
np.testing.assert_array_equal(a, e)
def test_mini_batch(self):
# create simple FilterDetections layer
filter_detections_layer = keras_retinanet.layers.FilterDetections()
# create input with batch_size=2
boxes = np.array([
[
[0, 0, 10, 10], # this will be suppressed
[0, 0, 10, 10],
],
[
[100, 100, 150, 150],
[100, 100, 150, 150], # this will be suppressed
],
], dtype=keras.backend.floatx())
boxes = keras.backend.constant(boxes)
classification = np.array([
[
[0, 0.9], # this will be suppressed
[0, 1],
],
[
[1, 0],
[0.9, 0], # this will be suppressed
],
], dtype=keras.backend.floatx())
classification = keras.backend.constant(classification)
# compute output
actual_boxes, actual_scores, actual_labels = filter_detections_layer.call([boxes, classification])
actual_boxes = keras.backend.eval(actual_boxes)
actual_scores = keras.backend.eval(actual_scores)
actual_labels = keras.backend.eval(actual_labels)
# define expected output
expected_boxes = -1 * np.ones((2, 300, 4), dtype=keras.backend.floatx())
expected_boxes[0, 0, :] = [0, 0, 10, 10]
expected_boxes[1, 0, :] = [100, 100, 150, 150]
expected_scores = -1 * np.ones((2, 300), dtype=keras.backend.floatx())
expected_scores[0, 0] = 1
expected_scores[1, 0] = 1
expected_labels = -1 * np.ones((2, 300), dtype=keras.backend.floatx())
expected_labels[0, 0] = 1
expected_labels[1, 0] = 0
# assert actual and expected are equal
np.testing.assert_array_equal(actual_boxes, expected_boxes)
np.testing.assert_array_equal(actual_scores, expected_scores)
np.testing.assert_array_equal(actual_labels, expected_labels)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from tensorflow import keras
import keras_retinanet.backend
import keras_retinanet.layers
import numpy as np
class TestAnchors(object):
def test_simple(self):
# create simple Anchors layer
anchors_layer = keras_retinanet.layers.Anchors(
size=32,
stride=8,
ratios=np.array([1], keras.backend.floatx()),
scales=np.array([1], keras.backend.floatx()),
)
# create fake features input (only shape is used anyway)
features = np.zeros((1, 2, 2, 1024), dtype=keras.backend.floatx())
features = keras.backend.variable(features)
# call the Anchors layer
anchors = anchors_layer.call(features)
anchors = keras.backend.eval(anchors)
# expected anchor values
expected = np.array([[
[-12, -12, 20, 20],
[-4 , -12, 28, 20],
[-12, -4 , 20, 28],
[-4 , -4 , 28, 28],
]], dtype=keras.backend.floatx())
# test anchor values
np.testing.assert_array_equal(anchors, expected)
# mark test to fail
def test_mini_batch(self):
# create simple Anchors layer
anchors_layer = keras_retinanet.layers.Anchors(
size=32,
stride=8,
ratios=np.array([1], dtype=keras.backend.floatx()),
scales=np.array([1], dtype=keras.backend.floatx()),
)
# create fake features input with batch_size=2
features = np.zeros((2, 2, 2, 1024), dtype=keras.backend.floatx())
features = keras.backend.variable(features)
# call the Anchors layer
anchors = anchors_layer.call(features)
anchors = keras.backend.eval(anchors)
# expected anchor values
expected = np.array([[
[-12, -12, 20, 20],
[-4 , -12, 28, 20],
[-12, -4 , 20, 28],
[-4 , -4 , 28, 28],
]], dtype=keras.backend.floatx())
expected = np.tile(expected, (2, 1, 1))
# test anchor values
np.testing.assert_array_equal(anchors, expected)
class TestUpsampleLike(object):
def test_simple(self):
# create simple UpsampleLike layer
upsample_like_layer = keras_retinanet.layers.UpsampleLike()
# create input source
source = np.zeros((1, 2, 2, 1), dtype=keras.backend.floatx())
source = keras.backend.variable(source)
target = np.zeros((1, 5, 5, 1), dtype=keras.backend.floatx())
expected = target
target = keras.backend.variable(target)
# compute output
actual = upsample_like_layer.call([source, target])
actual = keras.backend.eval(actual)
np.testing.assert_array_equal(actual, expected)
def test_mini_batch(self):
# create simple UpsampleLike layer
upsample_like_layer = keras_retinanet.layers.UpsampleLike()
# create input source
source = np.zeros((2, 2, 2, 1), dtype=keras.backend.floatx())
source = keras.backend.variable(source)
target = np.zeros((2, 5, 5, 1), dtype=keras.backend.floatx())
expected = target
target = keras.backend.variable(target)
# compute output
actual = upsample_like_layer.call([source, target])
actual = keras.backend.eval(actual)
np.testing.assert_array_equal(actual, expected)
class TestRegressBoxes(object):
def test_simple(self):
mean = [0, 0, 0, 0]
std = [0.2, 0.2, 0.2, 0.2]
# create simple RegressBoxes layer
regress_boxes_layer = keras_retinanet.layers.RegressBoxes(mean=mean, std=std)
# create input
anchors = np.array([[
[0 , 0 , 10 , 10 ],
[50, 50, 100, 100],
[20, 20, 40 , 40 ],
]], dtype=keras.backend.floatx())
anchors = keras.backend.variable(anchors)
regression = np.array([[
[0 , 0 , 0 , 0 ],
[0.1, 0.1, 0 , 0 ],
[0 , 0 , 0.1, 0.1],
]], dtype=keras.backend.floatx())
regression = keras.backend.variable(regression)
# compute output
actual = regress_boxes_layer.call([anchors, regression])
actual = keras.backend.eval(actual)
# compute expected output
expected = np.array([[
[0 , 0 , 10 , 10 ],
[51, 51, 100 , 100 ],
[20, 20, 40.4, 40.4],
]], dtype=keras.backend.floatx())
np.testing.assert_array_almost_equal(actual, expected, decimal=2)
# mark test to fail
def test_mini_batch(self):
mean = [0, 0, 0, 0]
std = [0.2, 0.2, 0.2, 0.2]
# create simple RegressBoxes layer
regress_boxes_layer = keras_retinanet.layers.RegressBoxes(mean=mean, std=std)
# create input
anchors = np.array([
[
[0 , 0 , 10 , 10 ], # 1
[50, 50, 100, 100], # 2
[20, 20, 40 , 40 ], # 3
],
[
[20, 20, 40 , 40 ], # 3
[0 , 0 , 10 , 10 ], # 1
[50, 50, 100, 100], # 2
],
], dtype=keras.backend.floatx())
anchors = keras.backend.variable(anchors)
regression = np.array([
[
[0 , 0 , 0 , 0 ], # 1
[0.1, 0.1, 0 , 0 ], # 2
[0 , 0 , 0.1, 0.1], # 3
],
[
[0 , 0 , 0.1, 0.1], # 3
[0 , 0 , 0 , 0 ], # 1
[0.1, 0.1, 0 , 0 ], # 2
],
], dtype=keras.backend.floatx())
regression = keras.backend.variable(regression)
# compute output
actual = regress_boxes_layer.call([anchors, regression])
actual = keras.backend.eval(actual)
# compute expected output
expected = np.array([
[
[0 , 0 , 10 , 10 ], # 1
[51, 51, 100 , 100 ], # 2
[20, 20, 40.4, 40.4], # 3
],
[
[20, 20, 40.4, 40.4], # 3
[0 , 0 , 10 , 10 ], # 1
[51, 51, 100 , 100 ], # 2
],
], dtype=keras.backend.floatx())
np.testing.assert_array_almost_equal(actual, expected, decimal=2)
"""
Copyright 2018 vidosits (https://github.com/vidosits/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import warnings
import pytest
import numpy as np
from tensorflow import keras
from keras_retinanet import losses
from keras_retinanet.models.densenet import DenseNetBackbone
parameters = ['densenet121']
@pytest.mark.parametrize("backbone", parameters)
def test_backbone(backbone):
# ignore warnings in this test
warnings.simplefilter('ignore')
num_classes = 10
inputs = np.zeros((1, 200, 400, 3), dtype=np.float32)
targets = [np.zeros((1, 14814, 5), dtype=np.float32), np.zeros((1, 14814, num_classes + 1))]
inp = keras.layers.Input(inputs[0].shape)
densenet_backbone = DenseNetBackbone(backbone)
model = densenet_backbone.retinanet(num_classes=num_classes, inputs=inp)
model.summary()
# compile model
model.compile(
loss={
'regression': losses.smooth_l1(),
'classification': losses.focal()
},
optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001))
model.fit(inputs, targets, batch_size=1)
"""
Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import warnings
import pytest
import numpy as np
from tensorflow import keras
from keras_retinanet import losses
from keras_retinanet.models.mobilenet import MobileNetBackbone
alphas = ['1.0']
parameters = []
for backbone in MobileNetBackbone.allowed_backbones:
for alpha in alphas:
parameters.append((backbone, alpha))
@pytest.mark.parametrize("backbone, alpha", parameters)
def test_backbone(backbone, alpha):
# ignore warnings in this test
warnings.simplefilter('ignore')
num_classes = 10
inputs = np.zeros((1, 1024, 363, 3), dtype=np.float32)
targets = [np.zeros((1, 68760, 5), dtype=np.float32), np.zeros((1, 68760, num_classes + 1))]
inp = keras.layers.Input(inputs[0].shape)
mobilenet_backbone = MobileNetBackbone(backbone='{}_{}'.format(backbone, format(alpha)))
training_model = mobilenet_backbone.retinanet(num_classes=num_classes, inputs=inp)
training_model.summary()
# compile model
training_model.compile(
loss={
'regression': losses.smooth_l1(),
'classification': losses.focal()
},
optimizer=keras.optimizers.Adam(lr=1e-5, clipnorm=0.001))
training_model.fit(inputs, targets, batch_size=1)
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import csv
import pytest
try:
from io import StringIO
except ImportError:
from stringio import StringIO
from keras_retinanet.preprocessing import csv_generator
def csv_str(string):
if str == bytes:
string = string.decode('utf-8')
return csv.reader(StringIO(string))
def annotation(x1, y1, x2, y2, class_name):
return {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, 'class': class_name}
def test_read_classes():
assert csv_generator._read_classes(csv_str('')) == {}
assert csv_generator._read_classes(csv_str('a,1')) == {'a': 1}
assert csv_generator._read_classes(csv_str('a,1\nb,2')) == {'a': 1, 'b': 2}
def test_read_classes_wrong_format():
with pytest.raises(ValueError):
try:
csv_generator._read_classes(csv_str('a,b,c'))
except ValueError as e:
assert str(e).startswith('line 1: format should be')
raise
with pytest.raises(ValueError):
try:
csv_generator._read_classes(csv_str('a,1\nb,c,d'))
except ValueError as e:
assert str(e).startswith('line 2: format should be')
raise
def test_read_classes_malformed_class_id():
with pytest.raises(ValueError):
try:
csv_generator._read_classes(csv_str('a,b'))
except ValueError as e:
assert str(e).startswith("line 1: malformed class ID:")
raise
with pytest.raises(ValueError):
try:
csv_generator._read_classes(csv_str('a,1\nb,c'))
except ValueError as e:
assert str(e).startswith('line 2: malformed class ID:')
raise
def test_read_classes_duplicate_name():
with pytest.raises(ValueError):
try:
csv_generator._read_classes(csv_str('a,1\nb,2\na,3'))
except ValueError as e:
assert str(e).startswith('line 3: duplicate class name')
raise
def test_read_annotations():
classes = {'a': 1, 'b': 2, 'c': 4, 'd': 10}
annotations = csv_generator._read_annotations(csv_str(
'a.png,0,1,2,3,a' '\n'
'b.png,4,5,6,7,b' '\n'
'c.png,8,9,10,11,c' '\n'
'd.png,12,13,14,15,d' '\n'
), classes)
assert annotations == {
'a.png': [annotation( 0, 1, 2, 3, 'a')],
'b.png': [annotation( 4, 5, 6, 7, 'b')],
'c.png': [annotation( 8, 9, 10, 11, 'c')],
'd.png': [annotation(12, 13, 14, 15, 'd')],
}
def test_read_annotations_multiple():
classes = {'a': 1, 'b': 2, 'c': 4, 'd': 10}
annotations = csv_generator._read_annotations(csv_str(
'a.png,0,1,2,3,a' '\n'
'b.png,4,5,6,7,b' '\n'
'a.png,8,9,10,11,c' '\n'
), classes)
assert annotations == {
'a.png': [
annotation(0, 1, 2, 3, 'a'),
annotation(8, 9, 10, 11, 'c'),
],
'b.png': [annotation(4, 5, 6, 7, 'b')],
}
def test_read_annotations_wrong_format():
classes = {'a': 1, 'b': 2, 'c': 4, 'd': 10}
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,1,2,3,a'), classes)
except ValueError as e:
assert str(e).startswith("line 1: format should be")
raise
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str(
'a.png,0,1,2,3,a' '\n'
'a.png,1,2,3,a' '\n'
), classes)
except ValueError as e:
assert str(e).startswith("line 2: format should be")
raise
def test_read_annotations_wrong_x1():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,a,0,1,2,a'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: malformed x1:")
raise
def test_read_annotations_wrong_y1():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,0,a,1,2,a'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: malformed y1:")
raise
def test_read_annotations_wrong_x2():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,0,1,a,2,a'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: malformed x2:")
raise
def test_read_annotations_wrong_y2():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,0,1,2,a,a'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: malformed y2:")
raise
def test_read_annotations_wrong_class():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,0,1,2,3,g'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: unknown class name:")
raise
def test_read_annotations_invalid_bb_x():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,1,2,1,3,g'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: x2 (1) must be higher than x1 (1)")
raise
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,9,2,5,3,g'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: x2 (5) must be higher than x1 (9)")
raise
def test_read_annotations_invalid_bb_y():
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,1,2,3,2,a'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: y2 (2) must be higher than y1 (2)")
raise
with pytest.raises(ValueError):
try:
csv_generator._read_annotations(csv_str('a.png,1,8,3,5,a'), {'a': 1})
except ValueError as e:
assert str(e).startswith("line 1: y2 (5) must be higher than y1 (8)")
raise
def test_read_annotations_empty_image():
# Check that images without annotations are parsed.
assert csv_generator._read_annotations(csv_str('a.png,,,,,\nb.png,,,,,'), {'a': 1}) == {'a.png': [], 'b.png': []}
# Check that lines without annotations don't clear earlier annotations.
assert csv_generator._read_annotations(csv_str('a.png,0,1,2,3,a\na.png,,,,,'), {'a': 1}) == {'a.png': [annotation(0, 1, 2, 3, 'a')]}
"""
Copyright 2017-2018 Fizyr (https://fizyr.com)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from keras_retinanet.preprocessing.generator import Generator
import numpy as np
import pytest
class SimpleGenerator(Generator):
def __init__(self, bboxes, labels, num_classes=0, image=None):
assert(len(bboxes) == len(labels))
self.bboxes = bboxes
self.labels = labels
self.num_classes_ = num_classes
self.image = image
super(SimpleGenerator, self).__init__(group_method='none', shuffle_groups=False)
def num_classes(self):
return self.num_classes_
def load_image(self, image_index):
return self.image
def image_path(self, image_index):
return ''
def size(self):
return len(self.bboxes)
def load_annotations(self, image_index):
annotations = {'labels': self.labels[image_index], 'bboxes': self.bboxes[image_index]}
return annotations
class TestLoadAnnotationsGroup(object):
def test_simple(self):
input_bboxes_group = [
np.array([
[ 0, 0, 10, 10],
[150, 150, 350, 350]
]),
]
input_labels_group = [
np.array([
1,
3
]),
]
expected_bboxes_group = input_bboxes_group
expected_labels_group = input_labels_group
simple_generator = SimpleGenerator(input_bboxes_group, input_labels_group)
annotations = simple_generator.load_annotations_group(simple_generator.groups[0])
assert('bboxes' in annotations[0])
assert('labels' in annotations[0])
np.testing.assert_equal(expected_bboxes_group[0], annotations[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[0], annotations[0]['labels'])
def test_multiple(self):
input_bboxes_group = [
np.array([
[ 0, 0, 10, 10],
[150, 150, 350, 350]
]),
np.array([
[0, 0, 50, 50],
]),
]
input_labels_group = [
np.array([
1,
0
]),
np.array([
3
])
]
expected_bboxes_group = input_bboxes_group
expected_labels_group = input_labels_group
simple_generator = SimpleGenerator(input_bboxes_group, input_labels_group)
annotations_group_0 = simple_generator.load_annotations_group(simple_generator.groups[0])
annotations_group_1 = simple_generator.load_annotations_group(simple_generator.groups[1])
assert('bboxes' in annotations_group_0[0])
assert('bboxes' in annotations_group_1[0])
assert('labels' in annotations_group_0[0])
assert('labels' in annotations_group_1[0])
np.testing.assert_equal(expected_bboxes_group[0], annotations_group_0[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[0], annotations_group_0[0]['labels'])
np.testing.assert_equal(expected_bboxes_group[1], annotations_group_1[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[1], annotations_group_1[0]['labels'])
class TestFilterAnnotations(object):
def test_simple_filter(self):
input_bboxes_group = [
np.array([
[ 0, 0, 10, 10],
[150, 150, 50, 50]
]),
]
input_labels_group = [
np.array([
3,
1
]),
]
input_image = np.zeros((500, 500, 3))
expected_bboxes_group = [
np.array([
[0, 0, 10, 10],
]),
]
expected_labels_group = [
np.array([
3,
]),
]
simple_generator = SimpleGenerator(input_bboxes_group, input_labels_group)
annotations = simple_generator.load_annotations_group(simple_generator.groups[0])
# expect a UserWarning
with pytest.warns(UserWarning):
image_group, annotations_group = simple_generator.filter_annotations([input_image], annotations, simple_generator.groups[0])
np.testing.assert_equal(expected_bboxes_group[0], annotations_group[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[0], annotations_group[0]['labels'])
def test_multiple_filter(self):
input_bboxes_group = [
np.array([
[ 0, 0, 10, 10],
[150, 150, 50, 50],
[150, 150, 350, 350],
[350, 350, 150, 150],
[ 1, 1, 2, 2],
[ 2, 2, 1, 1]
]),
np.array([
[0, 0, -1, -1]
]),
np.array([
[-10, -10, 0, 0],
[-10, -10, -100, -100],
[ 10, 10, 100, 100]
]),
np.array([
[ 10, 10, 100, 100],
[ 10, 10, 600, 600]
]),
]
input_labels_group = [
np.array([
6,
5,
4,
3,
2,
1
]),
np.array([
0
]),
np.array([
10,
11,
12
]),
np.array([
105,
107
]),
]
input_image = np.zeros((500, 500, 3))
expected_bboxes_group = [
np.array([
[ 0, 0, 10, 10],
[150, 150, 350, 350],
[ 1, 1, 2, 2]
]),
np.zeros((0, 4)),
np.array([
[10, 10, 100, 100]
]),
np.array([
[ 10, 10, 100, 100]
]),
]
expected_labels_group = [
np.array([
6,
4,
2
]),
np.zeros((0,)),
np.array([
12
]),
np.array([
105
]),
]
simple_generator = SimpleGenerator(input_bboxes_group, input_labels_group)
# expect a UserWarning
annotations_group_0 = simple_generator.load_annotations_group(simple_generator.groups[0])
with pytest.warns(UserWarning):
image_group, annotations_group_0 = simple_generator.filter_annotations([input_image], annotations_group_0, simple_generator.groups[0])
annotations_group_1 = simple_generator.load_annotations_group(simple_generator.groups[1])
with pytest.warns(UserWarning):
image_group, annotations_group_1 = simple_generator.filter_annotations([input_image], annotations_group_1, simple_generator.groups[1])
annotations_group_2 = simple_generator.load_annotations_group(simple_generator.groups[2])
with pytest.warns(UserWarning):
image_group, annotations_group_2 = simple_generator.filter_annotations([input_image], annotations_group_2, simple_generator.groups[2])
np.testing.assert_equal(expected_bboxes_group[0], annotations_group_0[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[0], annotations_group_0[0]['labels'])
np.testing.assert_equal(expected_bboxes_group[1], annotations_group_1[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[1], annotations_group_1[0]['labels'])
np.testing.assert_equal(expected_bboxes_group[2], annotations_group_2[0]['bboxes'])
np.testing.assert_equal(expected_labels_group[2], annotations_group_2[0]['labels'])
def test_complete(self):
input_bboxes_group = [
np.array([
[ 0, 0, 50, 50],
[150, 150, 50, 50], # invalid bbox
], dtype=float)
]
input_labels_group = [
np.array([
5, # one object of class 5
3, # one object of class 3 with an invalid box
], dtype=float)
]
input_image = np.zeros((500, 500, 3), dtype=np.uint8)
simple_generator = SimpleGenerator(input_bboxes_group, input_labels_group, image=input_image, num_classes=6)
# expect a UserWarning
with pytest.warns(UserWarning):
_, [_, labels_batch] = simple_generator[0]
# test that only object with class 5 is present in labels_batch
labels = np.unique(np.argmax(labels_batch == 5, axis=2))
assert(len(labels) == 1 and labels[0] == 0), 'Expected only class 0 to be present, but got classes {}'.format(labels)
import os
import pytest
from PIL import Image
from keras_retinanet.utils import image
import numpy as np
_STUB_IMG_FNAME = 'stub-image.jpg'
@pytest.fixture(autouse=True)
def run_around_tests(tmp_path):
"""Create a temp image for test"""
rand_img = np.random.randint(0, 255, (3, 3, 3), dtype='uint8')
Image.fromarray(rand_img).save(os.path.join(tmp_path, _STUB_IMG_FNAME))
yield
def test_read_image_bgr(tmp_path):
stub_image_path = os.path.join(tmp_path, _STUB_IMG_FNAME)
original_img = np.asarray(Image.open(
stub_image_path).convert('RGB'))[:, :, ::-1]
loaded_image = image.read_image_bgr(stub_image_path)
# Assert images are equal
np.testing.assert_array_equal(original_img, loaded_image)
check-manifest
image-classifiers
efficientnet
# pytest
pytest-xdist
pytest-cov
pytest-flake8
# flake8
coverage
codecov
import keras_retinanet.losses
from tensorflow import keras
import numpy as np
import pytest
def test_smooth_l1():
regression = np.array([
[
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
]
], dtype=keras.backend.floatx())
regression = keras.backend.variable(regression)
regression_target = np.array([
[
[0, 0, 0, 1, 1],
[0, 0, 1, 0, 1],
[0, 0, 0.05, 0, 1],
[0, 0, 1, 0, 0],
]
], dtype=keras.backend.floatx())
regression_target = keras.backend.variable(regression_target)
loss = keras_retinanet.losses.smooth_l1()(regression_target, regression)
loss = keras.backend.eval(loss)
assert loss == pytest.approx((((1 - 0.5 / 9) * 2 + (0.5 * 9 * 0.05 ** 2)) / 3))
import numpy as np
import configparser
from tensorflow import keras
from keras_retinanet.utils.anchors import anchors_for_shape, AnchorParameters
from keras_retinanet.utils.config import read_config_file, parse_anchor_parameters
def test_config_read():
config = read_config_file('tests/test-data/config/config.ini')
assert 'anchor_parameters' in config
assert 'sizes' in config['anchor_parameters']
assert 'strides' in config['anchor_parameters']
assert 'ratios' in config['anchor_parameters']
assert 'scales' in config['anchor_parameters']
assert config['anchor_parameters']['sizes'] == '32 64 128 256 512'
assert config['anchor_parameters']['strides'] == '8 16 32 64 128'
assert config['anchor_parameters']['ratios'] == '0.5 1 2 3'
assert config['anchor_parameters']['scales'] == '1 1.2 1.6'
def create_anchor_params_config():
config = configparser.ConfigParser()
config['anchor_parameters'] = {}
config['anchor_parameters']['sizes'] = '32 64 128 256 512'
config['anchor_parameters']['strides'] = '8 16 32 64 128'
config['anchor_parameters']['ratios'] = '0.5 1'
config['anchor_parameters']['scales'] = '1 1.2 1.6'
return config
def test_parse_anchor_parameters():
config = create_anchor_params_config()
anchor_params_parsed = parse_anchor_parameters(config)
sizes = [32, 64, 128, 256, 512]
strides = [8, 16, 32, 64, 128]
ratios = np.array([0.5, 1], keras.backend.floatx())
scales = np.array([1, 1.2, 1.6], keras.backend.floatx())
assert sizes == anchor_params_parsed.sizes
assert strides == anchor_params_parsed.strides
np.testing.assert_equal(ratios, anchor_params_parsed.ratios)
np.testing.assert_equal(scales, anchor_params_parsed.scales)
def test_anchors_for_shape_dimensions():
sizes = [32, 64, 128]
strides = [8, 16, 32]
ratios = np.array([0.5, 1, 2, 3], keras.backend.floatx())
scales = np.array([1, 1.2, 1.6], keras.backend.floatx())
anchor_params = AnchorParameters(sizes, strides, ratios, scales)
pyramid_levels = [3, 4, 5]
image_shape = (64, 64)
all_anchors = anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params)
assert all_anchors.shape == (1008, 4)
def test_anchors_for_shape_values():
sizes = [12]
strides = [8]
ratios = np.array([1, 2], keras.backend.floatx())
scales = np.array([1, 2], keras.backend.floatx())
anchor_params = AnchorParameters(sizes, strides, ratios, scales)
pyramid_levels = [3]
image_shape = (16, 16)
all_anchors = anchors_for_shape(image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params)
# using almost_equal for floating point imprecisions
np.testing.assert_almost_equal(all_anchors[0, :], [
strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[1, :], [
strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[2, :], [
strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[3, :], [
strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[4, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[5, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[6, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[7, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[8, :], [
strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[9, :], [
strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[10, :], [
strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[11, :], [
strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[12, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[13, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[14, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2,
], decimal=6)
np.testing.assert_almost_equal(all_anchors[15, :], [
strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
], decimal=6)
import numpy as np
from numpy.testing import assert_almost_equal
from math import pi
from keras_retinanet.utils.transform import (
colvec,
transform_aabb,
rotation, random_rotation,
translation, random_translation,
scaling, random_scaling,
shear, random_shear,
random_flip,
random_transform,
random_transform_generator,
change_transform_origin,
)
def test_colvec():
assert np.array_equal(colvec(0), np.array([[0]]))
assert np.array_equal(colvec(1, 2, 3), np.array([[1], [2], [3]]))
assert np.array_equal(colvec(-1, -2), np.array([[-1], [-2]]))
def test_rotation():
assert_almost_equal(colvec( 1, 0, 1), rotation(0.0 * pi).dot(colvec(1, 0, 1)))
assert_almost_equal(colvec( 0, 1, 1), rotation(0.5 * pi).dot(colvec(1, 0, 1)))
assert_almost_equal(colvec(-1, 0, 1), rotation(1.0 * pi).dot(colvec(1, 0, 1)))
assert_almost_equal(colvec( 0, -1, 1), rotation(1.5 * pi).dot(colvec(1, 0, 1)))
assert_almost_equal(colvec( 1, 0, 1), rotation(2.0 * pi).dot(colvec(1, 0, 1)))
assert_almost_equal(colvec( 0, 1, 1), rotation(0.0 * pi).dot(colvec(0, 1, 1)))
assert_almost_equal(colvec(-1, 0, 1), rotation(0.5 * pi).dot(colvec(0, 1, 1)))
assert_almost_equal(colvec( 0, -1, 1), rotation(1.0 * pi).dot(colvec(0, 1, 1)))
assert_almost_equal(colvec( 1, 0, 1), rotation(1.5 * pi).dot(colvec(0, 1, 1)))
assert_almost_equal(colvec( 0, 1, 1), rotation(2.0 * pi).dot(colvec(0, 1, 1)))
def test_random_rotation():
prng = np.random.RandomState(0)
for i in range(100):
assert_almost_equal(1, np.linalg.det(random_rotation(-i, i, prng)))
def test_translation():
assert_almost_equal(colvec( 1, 2, 1), translation(colvec( 0, 0)).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec( 4, 6, 1), translation(colvec( 3, 4)).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec(-2, -2, 1), translation(colvec(-3, -4)).dot(colvec(1, 2, 1)))
def assert_is_translation(transform, min, max):
assert transform.shape == (3, 3)
assert np.array_equal(transform[:, 0:2], np.eye(3, 2))
assert transform[2, 2] == 1
assert np.greater_equal(transform[0:2, 2], min).all()
assert np.less( transform[0:2, 2], max).all()
def test_random_translation():
prng = np.random.RandomState(0)
min = (-10, -20)
max = (20, 10)
for i in range(100):
assert_is_translation(random_translation(min, max, prng), min, max)
def test_shear():
assert_almost_equal(colvec( 1, 2, 1), shear(0.0 * pi).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec(-1, 0, 1), shear(0.5 * pi).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec( 1, -2, 1), shear(1.0 * pi).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec( 3, 0, 1), shear(1.5 * pi).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec( 1, 2, 1), shear(2.0 * pi).dot(colvec(1, 2, 1)))
def assert_is_shear(transform):
assert transform.shape == (3, 3)
assert np.array_equal(transform[:, 0], [1, 0, 0])
assert np.array_equal(transform[:, 2], [0, 0, 1])
assert transform[2, 1] == 0
# sin^2 + cos^2 == 1
assert_almost_equal(1, transform[0, 1] ** 2 + transform[1, 1] ** 2)
def test_random_shear():
prng = np.random.RandomState(0)
for i in range(100):
assert_is_shear(random_shear(-pi, pi, prng))
def test_scaling():
assert_almost_equal(colvec(1.0, 2, 1), scaling(colvec(1.0, 1.0)).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec(0.0, 2, 1), scaling(colvec(0.0, 1.0)).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec(1.0, 0, 1), scaling(colvec(1.0, 0.0)).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec(0.5, 4, 1), scaling(colvec(0.5, 2.0)).dot(colvec(1, 2, 1)))
def assert_is_scaling(transform, min, max):
assert transform.shape == (3, 3)
assert np.array_equal(transform[2, :], [0, 0, 1])
assert np.array_equal(transform[:, 2], [0, 0, 1])
assert transform[1, 0] == 0
assert transform[0, 1] == 0
assert np.greater_equal(np.diagonal(transform)[:2], min).all()
assert np.less( np.diagonal(transform)[:2], max).all()
def test_random_scaling():
prng = np.random.RandomState(0)
min = (0.1, 0.2)
max = (20, 10)
for i in range(100):
assert_is_scaling(random_scaling(min, max, prng), min, max)
def assert_is_flip(transform):
assert transform.shape == (3, 3)
assert np.array_equal(transform[2, :], [0, 0, 1])
assert np.array_equal(transform[:, 2], [0, 0, 1])
assert transform[1, 0] == 0
assert transform[0, 1] == 0
assert abs(transform[0, 0]) == 1
assert abs(transform[1, 1]) == 1
def test_random_flip():
prng = np.random.RandomState(0)
for i in range(100):
assert_is_flip(random_flip(0.5, 0.5, prng))
def test_random_transform():
prng = np.random.RandomState(0)
for i in range(100):
transform = random_transform(prng=prng)
assert np.array_equal(transform, np.identity(3))
for i, transform in zip(range(100), random_transform_generator(prng=np.random.RandomState())):
assert np.array_equal(transform, np.identity(3))
def test_transform_aabb():
assert np.array_equal([1, 2, 3, 4], transform_aabb(np.identity(3), [1, 2, 3, 4]))
assert_almost_equal([-3, -4, -1, -2], transform_aabb(rotation(pi), [1, 2, 3, 4]))
assert_almost_equal([ 2, 4, 4, 6], transform_aabb(translation([1, 2]), [1, 2, 3, 4]))
def test_change_transform_origin():
assert np.array_equal(change_transform_origin(translation([3, 4]), [1, 2]), translation([3, 4]))
assert_almost_equal(colvec(1, 2, 1), change_transform_origin(rotation(pi), [1, 2]).dot(colvec(1, 2, 1)))
assert_almost_equal(colvec(0, 0, 1), change_transform_origin(rotation(pi), [1, 2]).dot(colvec(2, 4, 1)))
assert_almost_equal(colvec(0, 0, 1), change_transform_origin(scaling([0.5, 0.5]), [-2, -4]).dot(colvec(2, 4, 1)))