From e5c2a82dcbae702e69283497a924ea3ca9a35a11 Mon Sep 17 00:00:00 2001 From: CANOE Date: Wed, 16 Mar 2022 10:11:54 +0800 Subject: [PATCH 01/12] Add browse_dataset for visualization (#98) * Add browse_dataset for visualization * Add docstring and fix the lint. * Put the assertion of the if branch --- mmrotate/core/visualization/image.py | 19 +++-- tools/misc/browse_dataset.py | 110 +++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 tools/misc/browse_dataset.py diff --git a/mmrotate/core/visualization/image.py b/mmrotate/core/visualization/image.py index 779275a12..bc6f8b1a5 100644 --- a/mmrotate/core/visualization/image.py +++ b/mmrotate/core/visualization/image.py @@ -59,21 +59,25 @@ def imshow_det_rbboxes(img, wait_time (int): Value of waitKey param. out_file (str or None): The filename to write the image. """ - assert bboxes.ndim == 2 + assert bboxes is not None and bboxes.ndim == 2 assert labels.ndim == 1 + img = imread(img) - scores = bboxes[:, -1] - inds = scores > score_thr - bboxes = bboxes[inds, :] - labels = labels[inds] + if score_thr > 0: + assert bboxes.shape[1] == 6 + scores = bboxes[:, -1] + inds = scores > score_thr + bboxes = bboxes[inds, :] + labels = labels[inds] bbox_color = (226, 43, 138) if bbox_color is None else color_val(bbox_color) text_color = (255, 255, 255) if text_color is None else color_val(text_color) for bbox, label in zip(bboxes, labels): - xc, yc, w, h, ag, score = bbox.tolist() + xc, yc, w, h, ag = bbox[:5] + score = bbox[5] if bboxes.shape[1] == 6 else None wx, wy = w / 2 * math.cos(ag), w / 2 * math.sin(ag) hx, hy = -h / 2 * math.sin(ag), h / 2 * math.cos(ag) p1 = (xc - wx - hx, yc - wy - hy) @@ -84,7 +88,8 @@ def imshow_det_rbboxes(img, cv2.drawContours(img, [ps], -1, bbox_color, thickness=thickness) label_text = class_names[ label] if class_names is not None else 'cls {}'.format(label) - label_text += '|{:.02f}'.format(score) + if score: + label_text += '|{:.02f}'.format(score) font = cv2.FONT_HERSHEY_COMPLEX text_size = cv2.getTextSize(label_text, font, font_scale, 1) text_width = text_size[0][0] diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py new file mode 100644 index 000000000..dd2d37b72 --- /dev/null +++ b/tools/misc/browse_dataset.py @@ -0,0 +1,110 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +from collections import Sequence +from pathlib import Path + +import mmcv +from mmcv import Config, DictAction +from mmdet.datasets.builder import build_dataset + +from mmrotate.core.visualization import imshow_det_rbboxes + + +def parse_args(): + parser = argparse.ArgumentParser(description='Browse a dataset') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--skip-type', + type=str, + nargs='+', + default=['DefaultFormatBundle', 'Normalize', 'Collect'], + help='skip some useless pipeline') + parser.add_argument( + '--output-dir', + default=None, + type=str, + help='If there is no display interface, you can save it') + parser.add_argument('--not-show', default=False, action='store_true') + parser.add_argument( + '--show-interval', + type=float, + default=2, + help='the interval of show (s)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def retrieve_data_cfg(config_path, skip_type, cfg_options): + """Retrieve the dataset config file. + + Args: + config_path (str): Path of the config file. + skip_type (list[str]): List of the useless pipeline to skip. + cfg_options (dict): dict of configs to merge from. + """ + + def skip_pipeline_steps(config): + config['pipeline'] = [ + x for x in config.pipeline if x['type'] not in skip_type + ] + + cfg = Config.fromfile(config_path) + if cfg_options is not None: + cfg.merge_from_dict(cfg_options) + train_data_cfg = cfg.data.train + while 'dataset' in train_data_cfg and train_data_cfg[ + 'type'] != 'MultiImageMixDataset': + train_data_cfg = train_data_cfg['dataset'] + + if isinstance(train_data_cfg, Sequence): + [skip_pipeline_steps(c) for c in train_data_cfg] + else: + skip_pipeline_steps(train_data_cfg) + + return cfg + + +def main(): + args = parse_args() + cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options) + + dataset = build_dataset(cfg.data.train) + + progress_bar = mmcv.ProgressBar(len(dataset)) + + for item in dataset: + filename = os.path.join(args.output_dir, + Path(item['filename']).name + ) if args.output_dir is not None else None + + gt_bboxes = item['gt_bboxes'] + gt_labels = item['gt_labels'] + + imshow_det_rbboxes( + item['img'], + gt_bboxes, + gt_labels, + class_names=dataset.CLASSES, + score_thr=0, + show=not args.not_show, + wait_time=args.show_interval, + out_file=filename, + bbox_color=dataset.PALETTE, + text_color=(200, 200, 200)) + + progress_bar.update() + + +if __name__ == '__main__': + main() From 30698917eea1a7d63dd05348e7de9f4cfdebe0a8 Mon Sep 17 00:00:00 2001 From: "MingJian.L" <45811724+matrixgame2018@users.noreply.github.com> Date: Wed, 16 Mar 2022 10:19:59 +0800 Subject: [PATCH 02/12] [Docs] Update customize_dataset.md (#65) * Update customize_dataset.md Translate the Doc to chinese * Update customize_dataset.md * Update customize_dataset.md * Delete customize_dataset.md * Create customize_dataset.md * Update customize_dataset.md * Update customize_dataset.md * Update customize_dataset.md * Update customize_dataset.md Fix somthing * Update customize_dataset.md Fix something mistakes --- docs/zh_cn/tutorials/customize_dataset.md | 142 +++++++++++----------- 1 file changed, 70 insertions(+), 72 deletions(-) diff --git a/docs/zh_cn/tutorials/customize_dataset.md b/docs/zh_cn/tutorials/customize_dataset.md index 90d6df9ec..7328e75b0 100644 --- a/docs/zh_cn/tutorials/customize_dataset.md +++ b/docs/zh_cn/tutorials/customize_dataset.md @@ -1,15 +1,18 @@ -# Tutorial 2: Customize Datasets +# 教程 2:自定义数据集 -## Support new data format +## 支持新的数据格式 -To support a new data format, you can convert them to existing formats (DOTA format). You could choose to convert them offline (before training by a script) or online (implement a new dataset and do the conversion at training). -In MMRotate, we recommend to convert the data into DOTA formats and do the conversion offline, thus you only need to modify the config's data annotation paths and classes after the conversion of your data. -### Reorganize new data formats to existing format +要支持新的数据格式,您可以将它们转换为现有的格式(DOTA 格式)。您可以选择离线(在通过脚本训练之前)或在线(实施新数据集并在训练时进行转换)进行转换。 +在 MMRotate 中,我们建议将数据转换为 DOTA 格式并离线进行转换,如此您只需在数据转换后修改 config 的数据标注路径和类别即可。 -The simplest way is to convert your dataset to existing dataset formats (DOTA). -The annotation txt files in DOTA format: +### 将新数据格式重构为现有格式 + + +最简单的方法是将数据集转换为现有数据集格式 (DOTA) 。 + +DOTA 格式的注解 txt 文件: ```text 184 2875 193 2923 146 2932 137 2885 plane 0 @@ -17,33 +20,44 @@ The annotation txt files in DOTA format: ... ``` -Each line represents an object and records it as a 10-dimensional array `A`. -- `A[0:8]`: Polygons with format `(x1, y1, x2, y2, x3, y3, x4, y4)`. -- `A[8]`: Category. -- `A[9]`: Difficulty. -After the data pre-processing, there are two steps for users to train the customized new dataset with existing format (e.g. DOTA format): +每行代表一个对象,并将其记录为一个 10 维数组 `A` 。 + +- `A[0:8]`: 多边形的格式 `(x1, y1, x2, y2, x3, y3, x4, y4)` 。 +- `A[8]`: 类别 +- `A[9]`: 困难 + + +在数据预处理之后,用户可以通过两个步骤来训练具有现有格式(例如 DOTA 格式)的自定义新数据集: + +1. 修改配置文件以使用自定义数据集。 +2. 检查自定义数据集的标注。 + + +下面给出两个例子展示上述两个步骤,它使用一个自定义的 5 类 COCO 格式的数据集来训练一个现有的 Cascade Mask R-CNN R50-FPN 检测器。 -1. Modify the config file for using the customized dataset. -2. Check the annotations of the customized dataset. -Here we give an example to show the above two steps, which uses a customized dataset of 5 classes with COCO format to train an existing Cascade Mask R-CNN R50-FPN detector. +#### 1. 修改配置文件以使用自定义数据集 -#### 1. Modify the config file for using the customized dataset -There are two aspects involved in the modification of config file: +配置文件的修改主要涉及两个方面: -1. The `data` field. Specifically, you need to explicitly add the `classes` fields in `data.train`, `data.val` and `data.test`. -2. The `num_classes` field in the `model` part. Explicitly over-write all the `num_classes` from default value (e.g. 80 in COCO) to your classes number. -In `configs/my_custom_config.py`: +1. `data` 部分。具体来说,您需要在 `data.train`, `data.val` 和 `data.test` 中显式添加 classes 字段。 + +2. `data` 属性变量。具体来说,特别是您需要在 `data.train`, `data.val` 和 `data.test` 中添加 classes 字段。 + +3. `model` 部分中的 ` num_classes` 属性变量。特别是将所有 num_classes 的默认值(例如 COCO 中的 80)覆盖到您的类别编号中。 + + +在 `configs/my_custom_config.py` : ```python -# the new config inherits the base configs to highlight the necessary modification +# 新配置继承了基础配置用于突出显示必要的修改 _base_ = './rotated_retinanet_hbb_r50_fpn_1x_dota_oc' -# 1. dataset settings +# 1. 数据集的设置 dataset_type = 'DOTADataset' classes = ('a', 'b', 'c', 'd', 'e') data = dict( @@ -51,57 +65,58 @@ data = dict( workers_per_gpu=2, train=dict( type=dataset_type, - # explicitly add your class names to the field `classes` + + # 注意将你的类名添加到字段 `classes` classes=classes, ann_file='path/to/your/train/annotation_data', img_prefix='path/to/your/train/image_data'), val=dict( type=dataset_type, - # explicitly add your class names to the field `classes` + + # 注意将你的类名添加到字段 `classes` classes=classes, ann_file='path/to/your/val/annotation_data', img_prefix='path/to/your/val/image_data'), test=dict( type=dataset_type, - # explicitly add your class names to the field `classes` + + # 注意将你的类名添加到字段 `classes` classes=classes, ann_file='path/to/your/test/annotation_data', img_prefix='path/to/your/test/image_data')) -# 2. model settings +# 2. 模型设置 model = dict( bbox_head=dict( type='RotatedRetinaHead', - # explicitly over-write all the `num_classes` field from default 15 to 5. + # 显式将所有 `num_classes` 字段从 15 重写为 5。。 num_classes=15)) ``` -#### 2. Check the annotations of the customized dataset - -Assuming your customized dataset is DOTA format, make sure you have the correct annotations in the customized dataset: +#### 2. 查看自定义数据集的标注 -- The `classes` fields in your config file should have exactly the same elements and the same order with the `A[8]` in txt annotations. MMRotate automatically maps the uncontinuous `id` in `categories` to the continuous label indices, so the string order of `name` in `categories` field affects the order of label indices. Meanwhile, the string order of `classes` in config affects the label text during visualization of predicted bounding boxes. +假设您的自定义数据集是 DOTA 格式,请确保您在自定义数据集中具有正确的标注: +- 配置文件中的 `classes` 字段应该与 txt 标注的 `A[8]` 保持完全相同的元素和相同的顺序。 +MMRotate 会自动的将 `categories` 中不连续的 `id` 映射到连续的标签索引中,所以在 `categories` 中 `name` 的字符串顺序会影响标签索引的顺序。同时,配置文件中 `classes` 的字符串顺序也会影响预测边界框可视化过程中的标签文本信息。 -## Customize datasets by dataset wrappers -MMRotate also supports many dataset wrappers to mix the dataset or modify the dataset distribution for training. -Currently it supports to three dataset wrappers as below: -- `RepeatDataset`: simply repeat the whole dataset. -- `ClassBalancedDataset`: repeat dataset in a class balanced manner. -- `ConcatDataset`: concat datasets. +## 通过封装器自定义数据集 -### Repeat dataset - -We use `RepeatDataset` as wrapper to repeat the dataset. For example, suppose the original dataset is `Dataset_A`, to repeat it, the config looks like the following +MMRotate 还支持许多数据集封装器对数据集进行混合或修改数据集的分布以进行训练。目前它支持三个数据集封装器,如下所示: +- `RepeatDataset`: 简单地重复整个数据集。 +- `ClassBalancedDataset`: 以类平衡的方式重复数据集。 +- `ConcatDataset`: 拼接数据集。 +### 重复数据集 +我们使用 `RepeatDataset` 作为封装器来重复这个数据集。例如,假设原始数据集是 `Dataset_A`,我们就重复一遍这个数据集。配置信息如下所示: ```python dataset_A_train = dict( type='RepeatDataset', times=N, - dataset=dict( # This is the original config of Dataset_A + dataset=dict( # 这是 Dataset_A 的原始配置信息 type='Dataset_A', ... pipeline=train_pipeline @@ -109,12 +124,8 @@ dataset_A_train = dict( ) ``` -### Class balanced dataset - -We use `ClassBalancedDataset` as wrapper to repeat the dataset based on category -frequency. The dataset to repeat needs to instantiate function `self.get_cat_ids(idx)` -to support `ClassBalancedDataset`. -For example, to repeat `Dataset_A` with `oversample_thr=1e-3`, the config looks like the following +### 类别平衡数据集 +我们使用 `ClassBalancedDataset` 作为封装器,根据类别频率重复数据集。这个数据集的重复操作 `ClassBalancedDataset` 需要实例化函数 `self.get_cat_ids(idx)` 的支持。例如,`Dataset_A` 需要使用`oversample_thr=1e-3`,配置信息如下所示: ```python dataset_A_train = dict( @@ -128,11 +139,9 @@ dataset_A_train = dict( ) ``` -### Concatenate dataset - -There are three ways to concatenate the dataset. - -1. If the datasets you want to concatenate are in the same type with different annotation files, you can concatenate the dataset configs like the following. +### 拼接数据集 +这里用三种方式对数据集进行拼接。 +1. 如果要拼接的数据集属于同一类型且具有不同的标注文件,则可以通过如下所示的配置信息来拼接数据集: ```python dataset_A_train = dict( @@ -140,9 +149,7 @@ There are three ways to concatenate the dataset. ann_file = ['anno_file_1', 'anno_file_2'], pipeline=train_pipeline ) - ``` - - If the concatenated dataset is used for test or evaluation, this manner supports to evaluate each dataset separately. To test the concatenated datasets as a whole, you can set `separate_eval=False` as below. + ```如果拼接后的数据集用于测试或评估,我们这种方式是可以支持对每个数据集分别进行评估。如果要测试的整个拼接数据集,如下所示您可以直接通过设置 separate_eval=False 来实现。 ```python dataset_A_train = dict( @@ -152,13 +159,11 @@ There are three ways to concatenate the dataset. pipeline=train_pipeline ) ``` - -2. In case the dataset you want to concatenate is different, you can concatenate the dataset configs like the following. +2. 如果您要拼接不同的数据集,您可以通过如下所示的方法对拼接数据集配置信息进行设置。 ```python dataset_A_train = dict() dataset_B_train = dict() - data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -169,16 +174,13 @@ There are three ways to concatenate the dataset. val = dataset_A_val, test = dataset_A_test ) - ``` + ```如果拼接后的数据集用于测试或评估,这种方式还支持对每个数据集分别进行评估。 - If the concatenated dataset is used for test or evaluation, this manner also supports to evaluate each dataset separately. - -3. We also support to define `ConcatDataset` explicitly as the following. +3. 我们也支持如下所示的方法对 `ConcatDataset` 进行明确的定义。 ```python dataset_A_val = dict() dataset_B_val = dict() - data = dict( imgs_per_gpu=2, workers_per_gpu=2, @@ -187,16 +189,13 @@ There are three ways to concatenate the dataset. type='ConcatDataset', datasets=[dataset_A_val, dataset_B_val], separate_eval=False)) - ``` - - This manner allows users to evaluate all the datasets as a single one by setting `separate_eval=False`. + ```这种方式允许用户通过设置 `separate_eval=False` 将所有数据集转为单个数据集进行评估。 -**Note:** +**笔记:** +1. 假设数据集在评估期间使用 `self.data_infos`,就要把选项设置为 `separate_eval=False`。因为 COCO 数据集不完全依赖 `self.data_infos` 进行评估,所以 COCO 数据集并不支持这种设置操作。没有在组合不同类型的数据集并对其进行整体评估的场景进行测试,因此我们不建议使用这样的操作。 +2. 不支持评估 `ClassBalancedDataset` 和 `RepeatDataset`,所以也不支持评估这些类型的串联组合后的数据集。 -1. The option `separate_eval=False` assumes the datasets use `self.data_infos` during evaluation. Therefore, COCO datasets do not support this behavior since COCO datasets do not fully rely on `self.data_infos` for evaluation. Combining different types of datasets and evaluating them as a whole is not tested thus is not suggested. -2. Evaluating `ClassBalancedDataset` and `RepeatDataset` is not supported thus evaluating concatenated datasets of these types is also not supported. - -A more complex example that repeats `Dataset_A` and `Dataset_B` by N and M times, respectively, and then concatenates the repeated datasets is as the following. +一个更复杂的例子,分别将 `Dataset_A` 和 `Dataset_B` 重复 N 次和 M 次,然后将重复的数据集连接起来,如下所示。 ```python dataset_A_train = dict( @@ -235,5 +234,4 @@ data = dict( val = dataset_A_val, test = dataset_A_test ) - ``` From 47055d5b1b3d09e2cb67182a8442d0125622c8af Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Wed, 16 Mar 2022 22:06:05 +0800 Subject: [PATCH 03/12] [Feature] Add multi machine dist_train (#143) --- docs/en/get_started.md | 18 ++++++++++++++++++ docs/zh_cn/get_started.md | 20 +++++++++++++++++++- tools/dist_test.sh | 16 ++++++++++++++-- tools/dist_train.sh | 15 +++++++++++++-- 4 files changed, 64 insertions(+), 5 deletions(-) diff --git a/docs/en/get_started.md b/docs/en/get_started.md index 4dba699f4..b0c5372e3 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -86,6 +86,24 @@ Difference between `resume-from` and `load-from`: ### Train with multiple machines +If you launch with multiple machines simply connected with ethernet, you can simply run following commands: + +On the first machine: + +```shell +NNODES=2 NODE_RANK=0 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +On the second machine: + +```shell +NNODES=2 NODE_RANK=1 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +Usually it is slow if you do not have high speed networking like InfiniBand. + +### Manage jobs with Slurm + If you run MMRotate on a cluster managed with [slurm](https://slurm.schedmd.com/), you can use the script `slurm_train.sh`. (This script also supports single machine training.) ```shell diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index ba6f1738f..dc6b8c3ea 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -85,7 +85,25 @@ python tools/train.py ${CONFIG_FILE} [optional arguments] `resume-from` 读取模型的权重和优化器的状态,并且 epoch 也会继承于指定的检查点。通常用于恢复意外中断的训练过程。 `load-from` 只读取模型的权重并且训练的 epoch 会从 0 开始。通常用于微调。 -### 多机多 GPU 训练 +### 使用多台机器训练 + +如果您想使用由 ethernet 连接起来的多台机器, 您可以使用以下命令: + +在第一台机器上: + +```shell +NNODES=2 NODE_RANK=0 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +在第二台机器上: + +```shell +NNODES=2 NODE_RANK=1 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +但是,如果您不使用高速网路连接这几台机器的话,训练将会非常慢。 + +### 使用 Slurm 来管理任务 如果您在 [slurm](https://slurm.schedmd.com/) 管理的集群上运行 MMRotate,您可以使用脚本 `slurm_train.sh` (此脚本还支持单机训练)。 diff --git a/tools/dist_test.sh b/tools/dist_test.sh index 3c74ec6ec..dea131b43 100644 --- a/tools/dist_test.sh +++ b/tools/dist_test.sh @@ -3,8 +3,20 @@ CONFIG=$1 CHECKPOINT=$2 GPUS=$3 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ -python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ - $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/test.py \ + $CONFIG \ + $CHECKPOINT \ + --launcher pytorch \ + ${@:4} diff --git a/tools/dist_train.sh b/tools/dist_train.sh index 5b43fffbf..aa71bf4ae 100644 --- a/tools/dist_train.sh +++ b/tools/dist_train.sh @@ -2,8 +2,19 @@ CONFIG=$1 GPUS=$2 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ -python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ - $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/train.py \ + $CONFIG \ + --seed 0 \ + --launcher pytorch ${@:3} From f5da6f2e107d0c7474ab46d187eaff36db1dded4 Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Thu, 17 Mar 2022 22:34:14 +0800 Subject: [PATCH 04/12] [Feature] Add different seeds to different ranks (#102) --- tools/train.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/train.py b/tools/train.py index e761e2a57..1cf9983ab 100644 --- a/tools/train.py +++ b/tools/train.py @@ -8,6 +8,7 @@ import mmcv import torch +import torch.distributed as dist from mmcv import Config, DictAction from mmcv.runner import get_dist_info, init_dist from mmcv.utils import get_git_hash @@ -47,6 +48,10 @@ def parse_args(): help='ids of gpus to use ' '(only applicable to non-distributed training)') parser.add_argument('--seed', type=int, default=None, help='random seed') + parser.add_argument( + '--diff_seed', + action='store_true', + help='Whether or not set different seeds for different ranks') parser.add_argument( '--deterministic', action='store_true', @@ -142,6 +147,7 @@ def main(): # set random seeds seed = init_random_seed(args.seed) + seed = seed + dist.get_rank() if args.diff_seed else seed logger.info(f'Set random seed to {seed}, ' f'deterministic: {args.deterministic}') set_random_seed(seed, deterministic=args.deterministic) From e85805a877ead7149329b09e914032a7ecabcf56 Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Mon, 28 Mar 2022 09:45:47 +0800 Subject: [PATCH 05/12] Fix rbbox_overlaps (#155) * Update rotate_iou2d_calculator.py * refactor in-place operator * fix bug Co-authored-by: Wenwei Zhang <40779233+ZwwWayne@users.noreply.github.com> --- .../bbox/iou_calculators/rotate_iou2d_calculator.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py b/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py index 3c84c6e14..63bffaf80 100644 --- a/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py +++ b/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py @@ -70,10 +70,6 @@ def rbbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): assert (bboxes1.size(-1) == 5 or bboxes1.size(0) == 0) assert (bboxes2.size(-1) == 5 or bboxes2.size(0) == 0) - # resolve `rbbox_overlaps` abnormal when input rbbox is too small. - bboxes1[2:4].clamp_(min=1e-3) - bboxes2[2:4].clamp_(min=1e-3) - rows = bboxes1.size(0) cols = bboxes2.size(0) if is_aligned: @@ -82,4 +78,10 @@ def rbbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): if rows * cols == 0: return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) - return box_iou_rotated(bboxes1, bboxes2, mode, is_aligned) + # resolve `rbbox_overlaps` abnormal when input rbbox is too small. + clamped_bboxes1 = bboxes1.detach().clone() + clamped_bboxes2 = bboxes2.detach().clone() + clamped_bboxes1[:, 2:4].clamp_(min=1e-3) + clamped_bboxes2[:, 2:4].clamp_(min=1e-3) + + return box_iou_rotated(clamped_bboxes1, clamped_bboxes2, mode, is_aligned) From 4961f0467673d730653da395118c131401ae85f3 Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Mon, 28 Mar 2022 09:48:22 +0800 Subject: [PATCH 06/12] Add .dev_scripts/gather_models.py (#162) * Create gather_models.py * update * Update gather_models.py --- .dev_scripts/gather_models.py | 265 ++++++++++++++++++++++++++++++++++ setup.cfg | 2 +- 2 files changed, 266 insertions(+), 1 deletion(-) create mode 100644 .dev_scripts/gather_models.py diff --git a/.dev_scripts/gather_models.py b/.dev_scripts/gather_models.py new file mode 100644 index 000000000..84e9a3de4 --- /dev/null +++ b/.dev_scripts/gather_models.py @@ -0,0 +1,265 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import json +import os.path as osp +import shutil +import subprocess +from collections import OrderedDict + +import mmcv +import torch +import yaml + + +def ordered_yaml_dump(data, stream=None, Dumper=yaml.SafeDumper, **kwds): + + class OrderedDumper(Dumper): + pass + + def _dict_representer(dumper, data): + return dumper.represent_mapping( + yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items()) + + OrderedDumper.add_representer(OrderedDict, _dict_representer) + return yaml.dump(data, stream, OrderedDumper, **kwds) + + +def process_checkpoint(in_file, out_file): + checkpoint = torch.load(in_file, map_location='cpu') + # remove optimizer for smaller file size + if 'optimizer' in checkpoint: + del checkpoint['optimizer'] + + # remove ema state_dict + for key in list(checkpoint['state_dict']): + if key.startswith('ema_'): + checkpoint['state_dict'].pop(key) + + # if it is necessary to remove some sensitive data in checkpoint['meta'], + # add the code here. + if torch.__version__ >= '1.6': + torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False) + else: + torch.save(checkpoint, out_file) + sha = subprocess.check_output(['sha256sum', out_file]).decode() + final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) + subprocess.Popen(['mv', out_file, final_file]) + return final_file + + +def get_final_epoch(config): + cfg = mmcv.Config.fromfile('./configs/' + config) + return cfg.runner.max_epochs + + +def get_best_epoch(exp_dir): + best_epoch_full_path = list( + sorted(glob.glob(osp.join(exp_dir, 'best_*.pth'))))[-1] + best_epoch_model_path = best_epoch_full_path.split('/')[-1] + best_epoch = best_epoch_model_path.split('_')[-1].split('.')[0] + return best_epoch_model_path, int(best_epoch) + + +def get_real_epoch(config): + cfg = mmcv.Config.fromfile('./configs/' + config) + epoch = cfg.runner.max_epochs + if cfg.data.train.type == 'RepeatDataset': + epoch *= cfg.data.train.times + return epoch + + +def get_final_results(log_json_path, epoch, results_lut): + result_dict = dict() + with open(log_json_path, 'r') as f: + for line in f.readlines(): + log_line = json.loads(line) + if 'mode' not in log_line.keys(): + continue + + if log_line['mode'] == 'train' and log_line['epoch'] == epoch: + result_dict['memory'] = log_line['memory'] + + if log_line['mode'] == 'val' and log_line['epoch'] == epoch: + result_dict.update({ + key: log_line[key] + for key in results_lut if key in log_line + }) + return result_dict + + +def get_dataset_name(config): + # If there are more dataset, add here. + name_map = dict( + HRSCDataset='HRSC 2016', SARDataset='SAR', DOTADataset='DOTA v1.0') + cfg = mmcv.Config.fromfile('./configs/' + config) + return name_map[cfg.dataset_type] + + +def convert_model_info_to_pwc(model_infos): + pwc_files = {} + for model in model_infos: + cfg_folder_name = osp.split(model['config'])[-2] + pwc_model_info = OrderedDict() + pwc_model_info['Name'] = osp.split(model['config'])[-1].split('.')[0] + pwc_model_info['In Collection'] = 'Please fill in Collection name' + pwc_model_info['Config'] = osp.join('configs', model['config']) + + # get metadata + memory = round(model['results']['memory'] / 1024, 1) + epochs = get_real_epoch(model['config']) + meta_data = OrderedDict() + meta_data['Training Memory (GB)'] = memory + meta_data['Epochs'] = epochs + pwc_model_info['Metadata'] = meta_data + + # get dataset name + dataset_name = get_dataset_name(model['config']) + + # get results + results = [] + # if there are more metrics, add here. + if 'mAP' in model['results']: + metric = round(model['results']['mAP'] * 100, 1) + results.append( + OrderedDict( + Task='Object Detection', + Dataset=dataset_name, + Metrics={'box AP': metric})) + pwc_model_info['Results'] = results + + link_string = 'https://download.openmmlab.com/mmrotate/v0.1.0/' + link_string += '{}/{}'.format(model['config'].rstrip('.py'), + osp.split(model['model_path'])[-1]) + pwc_model_info['Weights'] = link_string + if cfg_folder_name in pwc_files: + pwc_files[cfg_folder_name].append(pwc_model_info) + else: + pwc_files[cfg_folder_name] = [pwc_model_info] + return pwc_files + + +def parse_args(): + parser = argparse.ArgumentParser(description='Gather benchmarked models') + parser.add_argument( + 'root', + type=str, + help='root path of benchmarked models to be gathered') + parser.add_argument( + 'out', type=str, help='output path of gathered models to be stored') + parser.add_argument( + '--best', + action='store_true', + help='whether to gather the best model.') + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + models_root = args.root + models_out = args.out + mmcv.mkdir_or_exist(models_out) + + # find all models in the root directory to be gathered + raw_configs = list(mmcv.scandir('./configs', '.py', recursive=True)) + + # filter configs that is not trained in the experiments dir + used_configs = [] + for raw_config in raw_configs: + if osp.exists(osp.join(models_root, raw_config)): + used_configs.append(raw_config) + print(f'Find {len(used_configs)} models to be gathered') + + # find final_ckpt and log file for trained each config + # and parse the best performance + model_infos = [] + for used_config in used_configs: + exp_dir = osp.join(models_root, used_config) + # check whether the exps is finished + if args.best is True: + final_model, final_epoch = get_best_epoch(exp_dir) + else: + final_epoch = get_final_epoch(used_config) + final_model = 'epoch_{}.pth'.format(final_epoch) + + model_path = osp.join(exp_dir, final_model) + # skip if the model is still training + if not osp.exists(model_path): + continue + + # get the latest logs + log_json_path = list( + sorted(glob.glob(osp.join(exp_dir, '*.log.json'))))[-1] + log_txt_path = list(sorted(glob.glob(osp.join(exp_dir, '*.log'))))[-1] + cfg = mmcv.Config.fromfile('./configs/' + used_config) + results_lut = cfg.evaluation.metric + if not isinstance(results_lut, list): + results_lut = [results_lut] + model_performance = get_final_results(log_json_path, final_epoch, + results_lut) + + if model_performance is None: + continue + + model_time = osp.split(log_txt_path)[-1].split('.')[0] + model_infos.append( + dict( + config=used_config, + results=model_performance, + epochs=final_epoch, + model_time=model_time, + final_model=final_model, + log_json_path=osp.split(log_json_path)[-1])) + + # publish model for each checkpoint + publish_model_infos = [] + for model in model_infos: + model_publish_dir = osp.join(models_out, model['config'].rstrip('.py')) + mmcv.mkdir_or_exist(model_publish_dir) + + model_name = osp.split(model['config'])[-1].split('.')[0] + + model_name += '_' + model['model_time'] + publish_model_path = osp.join(model_publish_dir, model_name) + trained_model_path = osp.join(models_root, model['config'], + model['final_model']) + + # convert model + final_model_path = process_checkpoint(trained_model_path, + publish_model_path) + + # copy log + shutil.copy( + osp.join(models_root, model['config'], model['log_json_path']), + osp.join(model_publish_dir, f'{model_name}.log.json')) + shutil.copy( + osp.join(models_root, model['config'], + model['log_json_path'].rstrip('.json')), + osp.join(model_publish_dir, f'{model_name}.log')) + + # copy config to guarantee reproducibility + config_path = model['config'] + config_path = osp.join( + 'configs', + config_path) if 'configs' not in config_path else config_path + target_config_path = osp.split(config_path)[-1] + shutil.copy(config_path, osp.join(model_publish_dir, + target_config_path)) + + model['model_path'] = final_model_path + publish_model_infos.append(model) + + models = dict(models=publish_model_infos) + print(f'Totally gathered {len(publish_model_infos)} models') + mmcv.dump(models, osp.join(models_out, 'model_info.json')) + + pwc_files = convert_model_info_to_pwc(publish_model_infos) + for name in pwc_files: + with open(osp.join(models_out, name + '_metafile.yml'), 'w') as f: + ordered_yaml_dump(pwc_files[name], f, encoding='utf-8') + + +if __name__ == '__main__': + main() diff --git a/setup.cfg b/setup.cfg index ea404076c..6971d587e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ line_length = 79 multi_line_output = 0 known_standard_library = setuptools known_first_party = mmrotate -known_third_party = PIL,cv2,e2cnn,matplotlib,mmcv,mmdet,numpy,pytest,pytorch_sphinx_theme,terminaltables,torch +known_third_party = PIL,cv2,e2cnn,matplotlib,mmcv,mmdet,numpy,pytest,pytorch_sphinx_theme,terminaltables,torch,yaml no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY From 92d750590cbd973dd2dba852a4c6891838566964 Mon Sep 17 00:00:00 2001 From: yangxue Date: Tue, 29 Mar 2022 14:28:06 +0800 Subject: [PATCH 07/12] [Algorithm] Support Circular Smooth Label (CSL, ECCV'20) (#153) * [Algorithm] Support CSL * rm inplace operation in gwd * fix typo in redet config * update comments, delete useless code, and fix bugs * fix typo * add docstring * fix docstring typo * Organize comments * Add description to anchor generator * Delete useless code * Add more comments * fix urls * speed up csl head * update docstring * fix cls-pulse bug Co-authored-by: Wenwei Zhang <40779233+ZwwWayne@users.noreply.github.com> --- README.md | 1 + README_zh-CN.md | 1 + configs/csl/README.md | 43 ++ configs/csl/metafile.yml | 27 + ..._csl_gaussian_r50_fpn_fp16_1x_dota_le90.py | 22 + .../redet_re50_refpn_1x_dota_ms_rr_le90.py | 4 +- docs/en/model_zoo.md | 4 +- docs/zh_cn/model_zoo.md | 4 +- mmrotate/core/anchor/anchor_generator.py | 10 +- mmrotate/core/bbox/coder/__init__.py | 3 +- mmrotate/core/bbox/coder/angle_coder.py | 114 ++++ mmrotate/core/bbox/transforms.py | 8 +- mmrotate/datasets/pipelines/transforms.py | 2 +- mmrotate/models/dense_heads/__init__.py | 3 +- .../dense_heads/csl_rotated_retina_head.py | 579 ++++++++++++++++++ .../dense_heads/kfiou_rotate_retina_head.py | 3 +- .../models/dense_heads/oriented_rpn_head.py | 3 +- .../models/dense_heads/rotated_anchor_head.py | 7 +- .../models/dense_heads/rotated_rpn_head.py | 3 +- mmrotate/models/losses/__init__.py | 3 +- mmrotate/models/losses/gaussian_dist_loss.py | 26 + mmrotate/models/losses/smooth_focal_loss.py | 129 ++++ 22 files changed, 980 insertions(+), 19 deletions(-) create mode 100644 configs/csl/README.md create mode 100644 configs/csl/metafile.yml create mode 100644 configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py create mode 100644 mmrotate/core/bbox/coder/angle_coder.py create mode 100644 mmrotate/models/dense_heads/csl_rotated_retina_head.py create mode 100644 mmrotate/models/losses/smooth_focal_loss.py diff --git a/README.md b/README.md index 64e06cc6e..048206932 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,7 @@ A summary can be found in the [Model Zoo](docs/en/model_zoo.md) page. * [x] [Rotated RepPoints-OBB](configs/rotated_reppoints/README.md) (ICCV'2019) * [x] [RoI Transformer](configs/roi_trans/README.md) (CVPR'2019) * [x] [Gliding Vertex](configs/gliding_vertex/README.md) (TPAMI'2020) +* [x] [CSL](configs/csl/README.md) (ECCV'2020) * [x] [R3Det](configs/r3det/README.md) (AAAI'2021) * [x] [S2A-Net](configs/s2anet/README.md) (TGRS'2021) * [x] [ReDet](configs/redet/README.md) (CVPR'2021) diff --git a/README_zh-CN.md b/README_zh-CN.md index 4e5de4a04..e6cd17f16 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -100,6 +100,7 @@ MMRotate 也提供了其他更详细的教程: * [x] [Rotated RepPoints-OBB](configs/rotated_reppoints/README.md) (ICCV'2019) * [x] [RoI Transformer](configs/roi_trans/README.md) (CVPR'2019) * [x] [Gliding Vertex](configs/gliding_vertex/README.md) (TPAMI'2020) +* [x] [CSL](configs/csl/README.md) (ECCV'2020) * [x] [R3Det](configs/r3det/README.md) (AAAI'2021) * [x] [S2A-Net](configs/s2anet/README.md) (TGRS'2021) * [x] [ReDet](configs/redet/README.md) (CVPR'2021) diff --git a/configs/csl/README.md b/configs/csl/README.md new file mode 100644 index 000000000..972f9dfbb --- /dev/null +++ b/configs/csl/README.md @@ -0,0 +1,43 @@ +# CSL +> [Arbitrary-Oriented Object Detection with Circular Smooth Label](https://link.springer.com/chapter/10.1007/978-3-030-58598-3_40) + + +## Abstract + +
+ +
+ +Arbitrary-oriented object detection has recently attracted increasing attention in vision for their importance +in aerial imagery, scene text, and face etc. In this paper, we show that existing regression-based rotation detectors +suffer the problem of discontinuous boundaries, which is directly caused by angular periodicity or corner ordering. +By a careful study, we find the root cause is that the ideal predictions are beyond the defined range. We design a +new rotation detection baseline, to address the boundary problem by transforming angular prediction from a regression +problem to a classification task with little accuracy loss, whereby high-precision angle classification is devised in +contrast to previous works using coarse-granularity in rotation detection. We also propose a circular smooth label (CSL) +technique to handle the periodicity of the angle and increase the error tolerance to adjacent angles. We further +introduce four window functions in CSL and explore the effect of different window radius sizes on detection performance. +Extensive experiments and visual analysis on two large-scale public datasets for aerial images i.e. DOTA, HRSC2016, +as well as scene text dataset ICDAR2015 and MLT, show the effectiveness of our approach. + +## Results and models + +DOTA1.0 + +| Backbone | mAP | Angle | Window func. | Omega | lr schd | Mem (GB) | Inf Time (fps) | Aug | Batch Size | Configs | Download | +|:------------:|:----------:|:-----------:|:-----------:|:-----------:|:---------:|:---------:|:---------:|:---------:|:---------:|:---------:|:-------------:| +| ResNet50 (1024,1024,200) | 68.42 | le90 | - | - | 1x | 3.38 | 17.8 | - | 2 | [rotated_retinanet_obb_r50_fpn_1x_dota_le90](./rotated_retinanet_obb_r50_fpn_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90/rotated_retinanet_obb_r50_fpn_1x_dota_le90-c0097bc4.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90/rotated_retinanet_obb_r50_fpn_1x_dota_le90_20220128_130740.log.json) +| ResNet50 (1024,1024,200) | 68.79 | le90 | - | - | 1x | 2.36 | 25.9 | - | 2 | [rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90](./rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90-01de71b5.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90_20220303_183714.log.json) +| ResNet50 (1024,1024,200) | 69.51 | le90 | Gaussian | 4 | 1x | 2.60 | 24.0 | - | 2 | [rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90](./rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90-b4271aed.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90_20220321_010033.log.json) + + +## Citation +``` +@inproceedings{yang2020arbitrary, + title={Arbitrary-Oriented Object Detection with Circular Smooth Label}, + author={Yang, Xue and Yan, Junchi}, + booktitle={European Conference on Computer Vision}, + pages={677--694}, + year={2020} +} +``` diff --git a/configs/csl/metafile.yml b/configs/csl/metafile.yml new file mode 100644 index 000000000..46dbcda9c --- /dev/null +++ b/configs/csl/metafile.yml @@ -0,0 +1,27 @@ +Collections: +- Name: CSL + Metadata: + Training Data: DOTAv1.0 + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 1x Quadro RTX 8000 + Architecture: + - ResNet + Paper: + URL: https://link.springer.com/chapter/10.1007/978-3-030-58598-3_40 + Title: 'Arbitrary-Oriented Object Detection with Circular Smooth Label' + README: configs/csl/README.md + +Models: + - Name: rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90 + In Collection: csl + Config: configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py + Metadata: + Training Data: DOTAv1.0 + Results: + - Task: Oriented Object Detection + Dataset: DOTAv1.0 + Metrics: + mAP: 69.51 + Weights: https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90-b4271aed.pth diff --git a/configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py b/configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py new file mode 100644 index 000000000..f7e8d06a8 --- /dev/null +++ b/configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py @@ -0,0 +1,22 @@ +_base_ = \ + ['../rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90.py'] + +angle_version = 'le90' +model = dict( + bbox_head=dict( + type='CSLRRetinaHead', + angle_coder=dict( + type='CSLCoder', + angle_version=angle_version, + omega=4, + window='gaussian', + radius=3), + loss_cls=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0), + loss_angle=dict( + type='SmoothFocalLoss', gamma=2.0, alpha=0.25, loss_weight=0.8))) diff --git a/configs/redet/redet_re50_refpn_1x_dota_ms_rr_le90.py b/configs/redet/redet_re50_refpn_1x_dota_ms_rr_le90.py index 6b12f69c6..a033006ba 100644 --- a/configs/redet/redet_re50_refpn_1x_dota_ms_rr_le90.py +++ b/configs/redet/redet_re50_refpn_1x_dota_ms_rr_le90.py @@ -1,6 +1,6 @@ -_base_ = ['./redet_re50_fpn_1x_dota_le90.py'] +_base_ = ['./redet_re50_refpn_1x_dota_le90.py'] -data_root = '/cluster/home/it_stu198/main/datasets/split_ms_dota1_0/' +data_root = 'datasets/split_ms_dota1_0/' angle_version = 'le90' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) diff --git a/docs/en/model_zoo.md b/docs/en/model_zoo.md index 55f08ad41..4a4612f25 100644 --- a/docs/en/model_zoo.md +++ b/docs/en/model_zoo.md @@ -5,6 +5,7 @@ - [Rotated RepPoints-OBB](../../configs/rotated_reppoints/README.md) (ICCV'2019) - [RoI Transformer](../../configs/roi_trans/README.md) (CVPR'2019) - [Gliding Vertex](../../configs/gliding_vertex/README.md) (TPAMI'2020) +- [CSL](../../configs/csl/README.md) (ECCV'2020) - [R3Det](../../configs/r3det/README.md) (AAAI'2021) - [S2A-Net](../../configs/s2anet/README.md) (TGRS'2021) - [ReDet](../../configs/redet/README.md) (CVPR'2021) @@ -26,6 +27,7 @@ | ResNet50 (1024,1024,200) | 68.42 | le90 | 1x | 3.38 | 16.9 | - | 2 | [rotated_retinanet_obb_r50_fpn_1x_dota_le90](../../configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90/rotated_retinanet_obb_r50_fpn_1x_dota_le90-c0097bc4.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90/rotated_retinanet_obb_r50_fpn_1x_dota_le90_20220128_130740.log.json) | ResNet50 (1024,1024,200) | 68.79 | le90 | 1x | 2.36 | 22.4 | - | 2 | [rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90](../../configs/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90-01de71b5.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90_20220303_183714.log.json) | ResNet50 (1024,1024,200) | 69.49 | le135 | 1x | 4.05 | 8.6 | - | 2 | [g_reppoints_r50_fpn_1x_dota_le135](../../configs/g_reppoints/g_reppoints_r50_fpn_1x_dota_le135.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/g_reppoints/g_reppoints_r50_fpn_1x_dota_le135/g_reppoints_r50_fpn_1x_dota_le135-b840eed7.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/g_reppoints/g_reppoints_r50_fpn_1x_dota_le135/g_reppoints_r50_fpn_1x_dota_le135_20220202_233631.log.json) +| ResNet50 (1024,1024,200) | 69.51 | le90 | 1x | 4.40 | 24.0 | - | 2 | [rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90](../../configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90-b4271aed.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90_20220321_010033.log.json) | ResNet50 (1024,1024,200) | 69.55 | oc | 1x | 3.39 | 15.5 | - | 2 | [rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc](../../configs/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc-41fd7805.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc_20220120_152421.log.json) | ResNet50 (1024,1024,200) | 69.60 | le90 | 1x | 3.38 | 15.1 | - | 2 | [rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90](../../configs/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90-03e02f75.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90_20220209_173225.log.json) | ResNet50 (1024,1024,200) | 69.63 | le135 | 1x | 3.45 | 16.1 | - | 2 | [cfa_r50_fpn_1x_dota_le135](../../configs/cfa/cfa_r50_fpn_1x_dota_le135.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/cfa/cfa_r50_fpn_1x_dota_le135/cfa_r50_fpn_1x_dota_le135-aed1cbc6.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/cfa/cfa_r50_fpn_1x_dota_le135/cfa_r50_fpn_1x_dota_le135_20220205_144859.log.json) @@ -57,4 +59,4 @@ - `MS` means multiple scale image split. - `RR` means random rotation. -The above models are trained with 1 * 1080Ti and inferred with 1 * 2080Ti. +The above models are trained with 1 * 1080Ti/2080Ti and inferred with 1 * 2080Ti. diff --git a/docs/zh_cn/model_zoo.md b/docs/zh_cn/model_zoo.md index 4ebf2cc0d..314918cf7 100644 --- a/docs/zh_cn/model_zoo.md +++ b/docs/zh_cn/model_zoo.md @@ -5,6 +5,7 @@ - [Rotated RepPoints-OBB](../../configs/rotated_reppoints/README.md) (ICCV'2019) - [RoI Transformer](../../configs/roi_trans/README.md) (CVPR'2019) - [Gliding Vertex](../../configs/gliding_vertex/README.md) (TPAMI'2020) +- [CSL](../../configs/csl/README.md) (ECCV'2020) - [R3Det](../../configs/r3det/README.md) (AAAI'2021) - [S2A-Net](../../configs/s2anet/README.md) (TGRS'2021) - [ReDet](../../configs/redet/README.md) (CVPR'2021) @@ -26,6 +27,7 @@ | ResNet50 (1024,1024,200) | 68.42 | le90 | 1x | 3.38 | 16.9 | - | 2 | [rotated_retinanet_obb_r50_fpn_1x_dota_le90](../../configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90/rotated_retinanet_obb_r50_fpn_1x_dota_le90-c0097bc4.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90/rotated_retinanet_obb_r50_fpn_1x_dota_le90_20220128_130740.log.json) | ResNet50 (1024,1024,200) | 69.49 | le135 | 1x | 4.05 | 8.6 | - | 2 | [g_reppoints_r50_fpn_1x_dota_le135](../../configs/g_reppoints/g_reppoints_r50_fpn_1x_dota_le135.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/g_reppoints/g_reppoints_r50_fpn_1x_dota_le135/g_reppoints_r50_fpn_1x_dota_le135-b840eed7.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/g_reppoints/g_reppoints_r50_fpn_1x_dota_le135/g_reppoints_r50_fpn_1x_dota_le135_20220202_233631.log.json) | ResNet50 (1024,1024,200) | 68.79 | le90 | 1x | 2.36 | 22.4 | - | 2 | [rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90](../../configs/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90-01de71b5.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/rotated_retinanet/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_r50_fpn_fp16_1x_dota_le90_20220303_183714.log.json) +| ResNet50 (1024,1024,200) | 69.51 | le90 | 1x | 4.40 | 24.0 | - | 2 | [rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90](../../configs/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90-b4271aed.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/csl/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90/rotated_retinanet_obb_csl_gaussian_r50_fpn_fp16_1x_dota_le90_20220321_010033.log.json) | ResNet50 (1024,1024,200) | 69.55 | oc | 1x | 3.39 | 15.5 | - | 2 | [rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc](../../configs/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc-41fd7805.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/gwd/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc/rotated_retinanet_hbb_gwd_r50_fpn_1x_dota_oc_20220120_152421.log.json) | ResNet50 (1024,1024,200) | 69.60 | le90 | 1x | 3.38 | 15.1 | - | 2 | [rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90](../../configs/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90-03e02f75.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/kfiou/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90/rotated_retinanet_hbb_kfiou_r50_fpn_1x_dota_le90_20220209_173225.log.json) | ResNet50 (1024,1024,200) | 69.63 | le135 | 1x | 3.45 | 16.1 | - | 2 | [cfa_r50_fpn_1x_dota_le135](../../configs/cfa/cfa_r50_fpn_1x_dota_le135.py) | [model](https://download.openmmlab.com/mmrotate/v0.1.0/cfa/cfa_r50_fpn_1x_dota_le135/cfa_r50_fpn_1x_dota_le135-aed1cbc6.pth) | [log](https://download.openmmlab.com/mmrotate/v0.1.0/cfa/cfa_r50_fpn_1x_dota_le135/cfa_r50_fpn_1x_dota_le135_20220205_144859.log.json) @@ -57,4 +59,4 @@ - `MS` 表示多尺度图像增强。 - `RR` 表示随机旋转增强。 -上述模型都是使用 1 * 1080ti 训练得到的,并且在 1 * 2080ti 上进行推理测试。 +上述模型都是使用 1 * 1080ti/2080ti 训练得到的,并且在 1 * 2080ti 上进行推理测试。 diff --git a/mmrotate/core/anchor/anchor_generator.py b/mmrotate/core/anchor/anchor_generator.py index dec8f31dc..bdb378368 100644 --- a/mmrotate/core/anchor/anchor_generator.py +++ b/mmrotate/core/anchor/anchor_generator.py @@ -8,7 +8,10 @@ @ROTATED_ANCHOR_GENERATORS.register_module() class RotatedAnchorGenerator(AnchorGenerator): - """Standard rotate anchor generator for 2D anchor-based detectors.""" + """Fake rotate anchor generator for 2D anchor-based detectors. + + Horizontal bounding box represented by (x,y,w,h,theta). + """ def single_level_grid_priors(self, featmap_size, @@ -34,6 +37,11 @@ def single_level_grid_priors(self, anchors = super(RotatedAnchorGenerator, self).single_level_grid_priors( featmap_size, level_idx, dtype=dtype, device=device) + # The correct usage is: + # from ..bbox.transforms import hbb2obb + # anchors = hbb2obb(anchors, self.angle_version) + # instead of rudely setting the angle to all 0. + # However, the experiment shows that the performance has decreased. num_anchors = anchors.size(0) xy = (anchors[:, 2:] + anchors[:, :2]) / 2 wh = anchors[:, 2:] - anchors[:, :2] diff --git a/mmrotate/core/bbox/coder/__init__.py b/mmrotate/core/bbox/coder/__init__.py index 24ed61710..cfd7fb1a1 100644 --- a/mmrotate/core/bbox/coder/__init__.py +++ b/mmrotate/core/bbox/coder/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .angle_coder import CSLCoder from .delta_midpointoffset_rbbox_coder import MidpointOffsetCoder from .delta_xywha_hbbox_coder import DeltaXYWHAHBBoxCoder from .delta_xywha_rbbox_coder import DeltaXYWHAOBBoxCoder @@ -6,5 +7,5 @@ __all__ = [ 'DeltaXYWHAOBBoxCoder', 'DeltaXYWHAHBBoxCoder', 'MidpointOffsetCoder', - 'GVFixCoder', 'GVRatioCoder' + 'GVFixCoder', 'GVRatioCoder', 'CSLCoder' ] diff --git a/mmrotate/core/bbox/coder/angle_coder.py b/mmrotate/core/bbox/coder/angle_coder.py new file mode 100644 index 000000000..b84000a5e --- /dev/null +++ b/mmrotate/core/bbox/coder/angle_coder.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +from mmdet.core.bbox.coder.base_bbox_coder import BaseBBoxCoder + +from ..builder import ROTATED_BBOX_CODERS + + +@ROTATED_BBOX_CODERS.register_module() +class CSLCoder(BaseBBoxCoder): + """Circular Smooth Label Coder. + + `Circular Smooth Label (CSL) + `_ . + + Args: + angle_version (str): Angle definition. + omega (float, optional): Angle discretization granularity. + Default: 1. + window (str, optional): Window function. Default: gaussian. + radius (int/float): window radius, int type for + ['triangle', 'rect', 'pulse'], float type for + ['gaussian']. Default: 6. + """ + + def __init__(self, angle_version, omega=1, window='gaussian', radius=6): + super().__init__() + self.angle_version = angle_version + assert angle_version in ['oc', 'le90', 'le135'] + assert window in ['gaussian', 'triangle', 'rect', 'pulse'] + self.angle_range = 90 if angle_version == 'oc' else 180 + self.angle_offset_dict = {'oc': 0, 'le90': 90, 'le135': 45} + self.angle_offset = self.angle_offset_dict[angle_version] + self.omega = omega + self.window = window + self.radius = radius + self.coding_len = int(self.angle_range // omega) + + def encode(self, angle_targets): + """Circular Smooth Label Encoder. + + Args: + angle_targets (Tensor): Angle offset for each scale level + Has shape (num_anchors * H * W, 1) + + Returns: + list[Tensor]: The csl encoding of angle offset for each + scale level. Has shape (num_anchors * H * W, coding_len) + """ + + # radius to degree + angle_targets_deg = angle_targets * (180 / math.pi) + # empty label + smooth_label = torch.zeros_like(angle_targets).repeat( + 1, self.coding_len) + angle_targets_deg = (angle_targets_deg + + self.angle_offset) / self.omega + # Float to Int + angle_targets_long = angle_targets_deg.long() + + if self.window == 'pulse': + radius_range = angle_targets_long % self.coding_len + smooth_value = 1.0 + elif self.window == 'rect': + base_radius_range = torch.arange( + -self.radius, self.radius, device=angle_targets_long.device) + radius_range = (base_radius_range + + angle_targets_long) % self.coding_len + smooth_value = 1.0 + elif self.window == 'triangle': + base_radius_range = torch.arange( + -self.radius, self.radius, device=angle_targets_long.device) + radius_range = (base_radius_range + + angle_targets_long) % self.coding_len + smooth_value = 1.0 - torch.abs( + (1 / self.radius) * base_radius_range) + + elif self.window == 'gaussian': + base_radius_range = torch.arange( + -self.angle_range // 2, + self.angle_range // 2, + device=angle_targets_long.device) + + radius_range = (base_radius_range + + angle_targets_long) % self.coding_len + smooth_value = torch.exp(-torch.pow(base_radius_range, 2) / + (2 * self.radius**2)) + + else: + raise NotImplementedError + + if isinstance(smooth_value, torch.Tensor): + smooth_value = smooth_value.unsqueeze(0).repeat( + smooth_label.size(0), 1) + + return smooth_label.scatter(1, radius_range, smooth_value) + + def decode(self, angle_preds): + """Circular Smooth Label Decoder. + + Args: + angle_preds (Tensor): The csl encoding of angle offset + for each scale level. + Has shape (num_anchors * H * W, coding_len) + + Returns: + list[Tensor]: Angle offset for each scale level. + Has shape (num_anchors * H * W, 1) + """ + angle_cls_inds = torch.argmax(angle_preds, dim=1) + angle_pred = ((angle_cls_inds + 0.5) * + self.omega) % self.angle_range - self.angle_offset + return angle_pred * (math.pi / 180) diff --git a/mmrotate/core/bbox/transforms.py b/mmrotate/core/bbox/transforms.py index 239eb2e7a..21bdfd3dc 100644 --- a/mmrotate/core/bbox/transforms.py +++ b/mmrotate/core/bbox/transforms.py @@ -573,10 +573,10 @@ def hbb2obb_oc(hbboxes): Returns: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] """ - x = (hbboxes[:, 0::4] + hbboxes[:, 2::4]) * 0.5 - y = (hbboxes[:, 1::4] + hbboxes[:, 3::4]) * 0.5 - w = hbboxes[:, 2::4] - hbboxes[:, 0::4] - h = hbboxes[:, 3::4] - hbboxes[:, 1::4] + x = (hbboxes[..., 0] + hbboxes[..., 2]) * 0.5 + y = (hbboxes[..., 1] + hbboxes[..., 3]) * 0.5 + w = hbboxes[..., 2] - hbboxes[..., 0] + h = hbboxes[..., 3] - hbboxes[..., 1] theta = x.new_zeros(*x.shape) rbboxes = torch.stack([x, y, h, w, theta + np.pi / 2], dim=-1) return rbboxes diff --git a/mmrotate/datasets/pipelines/transforms.py b/mmrotate/datasets/pipelines/transforms.py index b04484f47..3a4510844 100644 --- a/mmrotate/datasets/pipelines/transforms.py +++ b/mmrotate/datasets/pipelines/transforms.py @@ -84,7 +84,7 @@ def bbox_flip(self, bboxes, img_shape, direction): if self.version == 'oc': rotated_flag = (bboxes[:, 4] != np.pi / 2) flipped[rotated_flag, 4] = np.pi / 2 - bboxes[rotated_flag, 4] - flipped[rotated_flag, 2] = bboxes[rotated_flag, 3], + flipped[rotated_flag, 2] = bboxes[rotated_flag, 3] flipped[rotated_flag, 3] = bboxes[rotated_flag, 2] else: flipped[:, 4] = norm_angle(np.pi - bboxes[:, 4], self.version) diff --git a/mmrotate/models/dense_heads/__init__.py b/mmrotate/models/dense_heads/__init__.py index 3d28b1afa..0adcbdb8a 100644 --- a/mmrotate/models/dense_heads/__init__.py +++ b/mmrotate/models/dense_heads/__init__.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +from .csl_rotated_retina_head import CSLRRetinaHead from .kfiou_odm_refine_head import KFIoUODMRefineHead from .kfiou_rotate_retina_head import KFIoURRetinaHead from .kfiou_rotate_retina_refine_head import KFIoURRetinaRefineHead @@ -15,5 +16,5 @@ 'RotatedAnchorHead', 'RotatedRetinaHead', 'RotatedRPNHead', 'OrientedRPNHead', 'RotatedRetinaRefineHead', 'ODMRefineHead', 'KFIoURRetinaHead', 'KFIoURRetinaRefineHead', 'KFIoUODMRefineHead', - 'RotatedRepPointsHead', 'SAMRepPointsHead' + 'RotatedRepPointsHead', 'SAMRepPointsHead', 'CSLRRetinaHead' ] diff --git a/mmrotate/models/dense_heads/csl_rotated_retina_head.py b/mmrotate/models/dense_heads/csl_rotated_retina_head.py new file mode 100644 index 000000000..2cc19fc88 --- /dev/null +++ b/mmrotate/models/dense_heads/csl_rotated_retina_head.py @@ -0,0 +1,579 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import torch +import torch.nn as nn +from mmcv.runner import force_fp32 +from mmdet.core import images_to_levels, multi_apply, unmap + +from mmrotate.core import build_bbox_coder, multiclass_nms_rotated +from ... import obb2hbb, rotated_anchor_inside_flags +from ..builder import ROTATED_HEADS, build_loss +from .rotated_retina_head import RotatedRetinaHead + + +@ROTATED_HEADS.register_module() +class CSLRRetinaHead(RotatedRetinaHead): + """Rotational Anchor-based refine head. + + Args: + use_encoded_angle (bool): Decide whether to use encoded angle or + gt angle as target. Default: True. + shield_reg_angle (bool): Decide whether to shield the angle loss from + reg branch. Default: False. + angle_coder (dict): Config of angle coder. + loss_angle (dict): Config of angle classification loss. + init_cfg (dict or list[dict], optional): Initialization config dict. + """ # noqa: W605 + + def __init__(self, + use_encoded_angle=True, + shield_reg_angle=False, + angle_coder=dict( + type='CSLCoder', + angle_version='le90', + omega=1, + window='gaussian', + radius=6), + loss_angle=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0), + init_cfg=dict( + type='Normal', + layer='Conv2d', + std=0.01, + override=[ + dict( + type='Normal', + name='retina_cls', + std=0.01, + bias_prob=0.01), + dict( + type='Normal', + name='retina_angle_cls', + std=0.01, + bias_prob=0.01), + ]), + **kwargs): + self.angle_coder = build_bbox_coder(angle_coder) + self.coding_len = self.angle_coder.coding_len + super(CSLRRetinaHead, self).__init__(**kwargs, init_cfg=init_cfg) + self.shield_reg_angle = shield_reg_angle + self.loss_angle = build_loss(loss_angle) + self.use_encoded_angle = use_encoded_angle + + def _init_layers(self): + """Initialize layers of the head.""" + super(CSLRRetinaHead, self)._init_layers() + self.retina_angle_cls = nn.Conv2d( + self.feat_channels, + self.num_anchors * self.coding_len, + 3, + padding=1) + + def forward_single(self, x): + """Forward feature of a single scale level. + + Args: + x (torch.Tensor): Features of a single scale level. + + Returns: + tuple: + cls_score (torch.Tensor): Cls scores for a single scale level + the channels number is num_anchors * num_classes. + bbox_pred (torch.Tensor): Box energies / deltas for a single + scale level, the channels number is num_anchors * 5. + angle_cls (torch.Tensor): Angle for a single scale level + the channels number is num_anchors * coding_len. + """ + cls_feat = x + reg_feat = x + for cls_conv in self.cls_convs: + cls_feat = cls_conv(cls_feat) + for reg_conv in self.reg_convs: + reg_feat = reg_conv(reg_feat) + cls_score = self.retina_cls(cls_feat) + bbox_pred = self.retina_reg(reg_feat) + angle_cls = self.retina_angle_cls(reg_feat) + return cls_score, bbox_pred, angle_cls + + def loss_single(self, cls_score, bbox_pred, angle_cls, anchors, labels, + label_weights, bbox_targets, bbox_weights, angle_targets, + angle_weights, num_total_samples): + """Compute loss of a single scale level. + + Args: + cls_score (torch.Tensor): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W). + bbox_pred (torch.Tensor): Box energies / deltas for each scale + level with shape (N, num_anchors * 5, H, W). + anchors (torch.Tensor): Box reference for each scale level with + shape (N, num_total_anchors, 5). + labels (torch.Tensor): Labels of each anchors with shape + (N, num_total_anchors). + label_weights (torch.Tensor): Label weights of each anchor with + shape (N, num_total_anchors) + bbox_targets (torch.Tensor): BBox regression targets of each anchor + weight shape (N, num_total_anchors, 5). + bbox_weights (torch.Tensor): BBox regression loss weights of each + anchor with shape (N, num_total_anchors, 5). + angle_targets (torch.Tensor): Angle classification targets of + each anchor weight shape (N, num_total_anchors, coding_len). + angle_weights (torch.Tensor): Angle classification loss weights + of each anchor with shape (N, num_total_anchors, 1). + num_total_samples (int): If sampling, num total samples equal to + the number of total anchors; Otherwise, it is the number of + positive anchors. + + Returns: + loss_cls (torch.Tensor): cls. loss for each scale level. + loss_bbox (torch.Tensor): reg. loss for each scale level. + loss_angle (torch.Tensor): angle cls. loss for each scale level. + """ + # Classification loss + labels = labels.reshape(-1) + label_weights = label_weights.reshape(-1) + cls_score = cls_score.permute(0, 2, 3, + 1).reshape(-1, self.cls_out_channels) + loss_cls = self.loss_cls( + cls_score, labels, label_weights, avg_factor=num_total_samples) + # Regression loss + bbox_targets = bbox_targets.reshape(-1, 5) + bbox_weights = bbox_weights.reshape(-1, 5) + # Shield angle in reg. branch + if self.shield_reg_angle: + bbox_weights[:, -1] = 0. + bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5) + if self.reg_decoded_bbox: + anchors = anchors.reshape(-1, 5) + bbox_pred = self.bbox_coder.decode(anchors, bbox_pred) + + loss_bbox = self.loss_bbox( + bbox_pred, + bbox_targets, + bbox_weights, + avg_factor=num_total_samples) + + angle_cls = angle_cls.permute(0, 2, 3, 1).reshape(-1, self.coding_len) + angle_targets = angle_targets.reshape(-1, self.coding_len) + angle_weights = angle_weights.reshape(-1, 1) + + loss_angle = self.loss_angle( + angle_cls, + angle_targets, + weight=angle_weights, + avg_factor=num_total_samples) + + return loss_cls, loss_bbox, loss_angle + + @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses')) + def loss(self, + cls_scores, + bbox_preds, + angle_clses, + gt_bboxes, + gt_labels, + img_metas, + gt_bboxes_ignore=None): + """Compute losses of the head. + + Args: + cls_scores (list[Tensor]): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W) + bbox_preds (list[Tensor]): Box energies / deltas for each scale + level with shape (N, num_anchors * 5, H, W) + angle_clses (list[Tensor]): Box angles for each scale + level with shape (N, num_anchors * coding_len, H, W) + gt_bboxes (list[Tensor]): Ground truth bboxes for each image with + shape (num_gts, 5) in [cx, cy, w, h, a] format. + gt_labels (list[Tensor]): class indices corresponding to each box + img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + gt_bboxes_ignore (None | list[Tensor]): specify which bounding + boxes can be ignored when computing the loss. Default: None + + Returns: + dict[str, Tensor]: A dictionary of loss components. + """ + featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] + assert len(featmap_sizes) == self.anchor_generator.num_levels + + device = cls_scores[0].device + + anchor_list, valid_flag_list = self.get_anchors( + featmap_sizes, img_metas, device=device) + label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 + cls_reg_targets = self.get_targets( + anchor_list, + valid_flag_list, + gt_bboxes, + img_metas, + gt_bboxes_ignore_list=gt_bboxes_ignore, + gt_labels_list=gt_labels, + label_channels=label_channels) + if cls_reg_targets is None: + return None + (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list, + num_total_pos, num_total_neg, angel_target_list, + angel_weight_list) = cls_reg_targets + num_total_samples = ( + num_total_pos + num_total_neg if self.sampling else num_total_pos) + + # Anchor number of multi levels + num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]] + # Concat all level anchors and flags to a single tensor + concat_anchor_list = [] + for i, _ in enumerate(anchor_list): + concat_anchor_list.append(torch.cat(anchor_list[i])) + all_anchor_list = images_to_levels(concat_anchor_list, + num_level_anchors) + + losses_cls, losses_bbox, losses_angle = multi_apply( + self.loss_single, + cls_scores, + bbox_preds, + angle_clses, + all_anchor_list, + labels_list, + label_weights_list, + bbox_targets_list, + bbox_weights_list, + angel_target_list, + angel_weight_list, + num_total_samples=num_total_samples) + return dict( + loss_cls=losses_cls, + loss_bbox=losses_bbox, + loss_angle=losses_angle) + + def _get_targets_single(self, + flat_anchors, + valid_flags, + gt_bboxes, + gt_bboxes_ignore, + gt_labels, + img_meta, + label_channels=1, + unmap_outputs=True): + """Compute regression and classification targets for anchors in a + single image. + + Args: + flat_anchors (torch.Tensor): Multi-level anchors of the image, + which are concatenated into a single tensor of shape + (num_anchors, 5) + valid_flags (torch.Tensor): Multi level valid flags of the image, + which are concatenated into a single tensor of + shape (num_anchors,). + gt_bboxes (torch.Tensor): Ground truth bboxes of the image, + shape (num_gts, 5). + img_meta (dict): Meta info of the image. + gt_bboxes_ignore (torch.Tensor): Ground truth bboxes to be + ignored, shape (num_ignored_gts, 5). + img_meta (dict): Meta info of the image. + gt_labels (torch.Tensor): Ground truth labels of each box, + shape (num_gts,). + label_channels (int): Channel of label. Default: 1. + unmap_outputs (bool): Whether to map outputs back to the original + set of anchors. Default: True. + + Returns: + tuple: + labels_list (list[Tensor]): Labels of each level + label_weights_list (list[Tensor]): Label weights of each level + bbox_targets_list (list[Tensor]): BBox targets of each level + bbox_weights_list (list[Tensor]): BBox weights of each level + angle_targets_list (list[Tensor]): Angle targets of each level + angle_weights_list (list[Tensor]): Angle weights of each level + num_total_pos (int): Number of positive samples in all images + num_total_neg (int): Number of negative samples in all images + """ + inside_flags = rotated_anchor_inside_flags( + flat_anchors, valid_flags, img_meta['img_shape'][:2], + self.train_cfg.allowed_border) + if not inside_flags.any(): + return (None, ) * 9 + # Assign gt and sample anchors + anchors = flat_anchors[inside_flags, :] + + if self.assign_by_circumhbbox is not None: + gt_bboxes_assign = obb2hbb(gt_bboxes, self.assign_by_circumhbbox) + assign_result = self.assigner.assign( + anchors, gt_bboxes_assign, gt_bboxes_ignore, + None if self.sampling else gt_labels) + else: + assign_result = self.assigner.assign( + anchors, gt_bboxes, gt_bboxes_ignore, + None if self.sampling else gt_labels) + + sampling_result = self.sampler.sample(assign_result, anchors, + gt_bboxes) + + num_valid_anchors = anchors.shape[0] + bbox_targets = torch.zeros_like(anchors) + bbox_weights = torch.zeros_like(anchors) + angle_targets = torch.zeros_like(bbox_targets[:, 4:5]) + angle_weights = torch.zeros_like(bbox_targets[:, 4:5]) + labels = anchors.new_full((num_valid_anchors, ), + self.num_classes, + dtype=torch.long) + label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) + + pos_inds = sampling_result.pos_inds + neg_inds = sampling_result.neg_inds + if len(pos_inds) > 0: + if not self.reg_decoded_bbox: + pos_bbox_targets = self.bbox_coder.encode( + sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes) + else: + pos_bbox_targets = sampling_result.pos_gt_bboxes + bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_weights[pos_inds, :] = 1.0 + + if self.use_encoded_angle: + # Get encoded angle as target + angle_targets[pos_inds, :] = pos_bbox_targets[:, 4:5] + else: + # Get gt angle as target + angle_targets[pos_inds, :] = \ + sampling_result.pos_gt_bboxes[:, 4:5] + # Angle encoder + angle_targets = self.angle_coder.encode(angle_targets) + angle_weights[pos_inds, :] = 1.0 + + if gt_labels is None: + # Only rpn gives gt_labels as None + # Foreground is the first class since v2.5.0 + labels[pos_inds] = 0 + else: + labels[pos_inds] = gt_labels[ + sampling_result.pos_assigned_gt_inds] + if self.train_cfg.pos_weight <= 0: + label_weights[pos_inds] = 1.0 + else: + label_weights[pos_inds] = self.train_cfg.pos_weight + if len(neg_inds) > 0: + label_weights[neg_inds] = 1.0 + + # Map up to original set of anchors + if unmap_outputs: + num_total_anchors = flat_anchors.size(0) + labels = unmap( + labels, num_total_anchors, inside_flags, + fill=self.num_classes) # fill bg label + label_weights = unmap(label_weights, num_total_anchors, + inside_flags) + bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) + bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) + angle_targets = unmap(angle_targets, num_total_anchors, + inside_flags) + angle_weights = unmap(angle_weights, num_total_anchors, + inside_flags) + + return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, + neg_inds, sampling_result, angle_targets, angle_weights) + + def _get_bboxes_single(self, + cls_score_list, + bbox_pred_list, + angle_cls_list, + mlvl_anchors, + img_shape, + scale_factor, + cfg, + rescale=False, + with_nms=True): + """Transform outputs for a single batch item into bbox predictions. + + Args: + cls_score_list (list[Tensor]): Box scores for a single scale level + Has shape (num_anchors * num_classes, H, W). + bbox_pred_list (list[Tensor]): Box energies / deltas for a single + scale level with shape (num_anchors * 5, H, W). + angle_cls_list (list[Tensor]): Angle deltas for a single + scale level with shape (num_anchors * coding_len, H, W). + mlvl_anchors (list[Tensor]): Box reference for a single scale level + with shape (num_total_anchors, 5). + img_shape (tuple[int]): Shape of the input image, + (height, width, 3). + scale_factor (ndarray): Scale factor of the image arange as + (w_scale, h_scale, w_scale, h_scale). + cfg (mmcv.Config): Test / postprocessing configuration, + if None, test_cfg would be used. + rescale (bool): If True, return boxes in original image space. + Default: False. + with_nms (bool): If True, do nms before return boxes. + Default: True. + + Returns: + Tensor: Labeled boxes in shape (n, 5), where the first 4 columns + are bounding box positions (cx, cy, w, h, a) and the + 6-th column is a score between 0 and 1. + """ + cfg = self.test_cfg if cfg is None else cfg + assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors) + mlvl_bboxes = [] + mlvl_scores = [] + for cls_score, bbox_pred, angle_cls, anchors in zip( + cls_score_list, bbox_pred_list, angle_cls_list, mlvl_anchors): + assert cls_score.size()[-2:] == bbox_pred.size()[-2:] + cls_score = cls_score.permute(1, 2, + 0).reshape(-1, self.cls_out_channels) + if self.use_sigmoid_cls: + scores = cls_score.sigmoid() + else: + scores = cls_score.softmax(-1) + bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 5) + + angle_cls = angle_cls.permute(1, 2, 0).reshape( + -1, self.coding_len).sigmoid() + + nms_pre = cfg.get('nms_pre', -1) + if scores.shape[0] > nms_pre > 0: + # Get maximum scores for foreground classes. + if self.use_sigmoid_cls: + max_scores, _ = scores.max(dim=1) + else: + # Remind that we set FG labels to [0, num_class-1] + # since mmdet v2.0 + # BG cat_id: num_class + max_scores, _ = scores[:, :-1].max(dim=1) + _, topk_inds = max_scores.topk(nms_pre) + anchors = anchors[topk_inds, :] + bbox_pred = bbox_pred[topk_inds, :] + scores = scores[topk_inds, :] + angle_cls = angle_cls[topk_inds, :] + + # Angle decoder + angle_pred = self.angle_coder.decode(angle_cls) + + if self.use_encoded_angle: + bbox_pred[..., -1] = angle_pred + bboxes = self.bbox_coder.decode( + anchors, bbox_pred, max_shape=img_shape) + else: + bboxes = self.bbox_coder.decode( + anchors, bbox_pred, max_shape=img_shape) + bboxes[..., -1] = angle_pred + + mlvl_bboxes.append(bboxes) + mlvl_scores.append(scores) + mlvl_bboxes = torch.cat(mlvl_bboxes) + if rescale: + # Angle should not be rescaled + mlvl_bboxes[:, :4] = mlvl_bboxes[:, :4] / mlvl_bboxes.new_tensor( + scale_factor) + mlvl_scores = torch.cat(mlvl_scores) + if self.use_sigmoid_cls: + # Add a dummy background class to the backend when using sigmoid + # Remind that we set FG labels to [0, num_class-1] since mmdet v2.0 + # BG cat_id: num_class + padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) + mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) + + if with_nms: + det_bboxes, det_labels = multiclass_nms_rotated( + mlvl_bboxes, mlvl_scores, cfg.score_thr, cfg.nms, + cfg.max_per_img) + return det_bboxes, det_labels + else: + return mlvl_bboxes, mlvl_scores + + @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'angle_clses')) + def get_bboxes(self, + cls_scores, + bbox_preds, + angle_clses, + img_metas, + cfg=None, + rescale=False, + with_nms=True): + """Transform network output for a batch into bbox predictions. + + Args: + cls_scores (list[Tensor]): Box scores for each scale level + Has shape (N, num_anchors * num_classes, H, W) + bbox_preds (list[Tensor]): Box energies / deltas for each scale + level with shape (N, num_anchors * 5, H, W) + angle_clses (list[Tensor]): Box angles for each scale + level with shape (N, num_anchors * coding_len, H, W) + img_metas (list[dict]): Meta information of each image, e.g., + image size, scaling factor, etc. + cfg (mmcv.Config | None): Test / postprocessing configuration, + if None, test_cfg would be used + rescale (bool): If True, return boxes in original image space. + Default: False. + with_nms (bool): If True, do nms before return boxes. + Default: True. + + Returns: + list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple. + The first item is an (n, 6) tensor, where the first 5 columns + are bounding box positions (cx, cy, w, h, a) and the + 6-th column is a score between 0 and 1. The second item is a + (n,) tensor where each item is the predicted class label of the + corresponding box. + + Example: + >>> import mmcv + >>> self = AnchorHead( + >>> num_classes=9, + >>> in_channels=1, + >>> anchor_generator=dict( + >>> type='AnchorGenerator', + >>> scales=[8], + >>> ratios=[0.5, 1.0, 2.0], + >>> strides=[4,])) + >>> img_metas = [{'img_shape': (32, 32, 3), 'scale_factor': 1}] + >>> cfg = mmcv.Config(dict( + >>> score_thr=0.00, + >>> nms=dict(type='nms', iou_thr=1.0), + >>> max_per_img=10)) + >>> feat = torch.rand(1, 1, 3, 3) + >>> cls_score, bbox_pred = self.forward_single(feat) + >>> # Note the input lists are over different levels, not images + >>> cls_scores, bbox_preds = [cls_score], [bbox_pred] + >>> result_list = self.get_bboxes(cls_scores, bbox_preds, + >>> img_metas, cfg) + >>> det_bboxes, det_labels = result_list[0] + >>> assert len(result_list) == 1 + >>> assert det_bboxes.shape[1] == 5 + >>> assert len(det_bboxes) == len(det_labels) == cfg.max_per_img + """ + assert len(cls_scores) == len(bbox_preds) + num_levels = len(cls_scores) + + device = cls_scores[0].device + featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)] + mlvl_anchors = self.anchor_generator.grid_priors( + featmap_sizes, device=device) + + result_list = [] + for img_id, _ in enumerate(img_metas): + cls_score_list = [ + cls_scores[i][img_id].detach() for i in range(num_levels) + ] + bbox_pred_list = [ + bbox_preds[i][img_id].detach() for i in range(num_levels) + ] + angle_cls_list = [ + angle_clses[i][img_id].detach() for i in range(num_levels) + ] + img_shape = img_metas[img_id]['img_shape'] + scale_factor = img_metas[img_id]['scale_factor'] + if with_nms: + # Some heads don't support with_nms argument + proposals = self._get_bboxes_single(cls_score_list, + bbox_pred_list, + angle_cls_list, + mlvl_anchors, img_shape, + scale_factor, cfg, rescale) + else: + proposals = self._get_bboxes_single(cls_score_list, + bbox_pred_list, + angle_cls_list, + mlvl_anchors, img_shape, + scale_factor, cfg, rescale, + with_nms) + result_list.append(proposals) + return result_list diff --git a/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py b/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py index 19c56f512..a1448b42c 100644 --- a/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py +++ b/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py @@ -83,7 +83,8 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - dict[str, Tensor]: A dictionary of loss components. + loss_cls (torch.Tensor): cls. loss for each scale level. + loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/oriented_rpn_head.py b/mmrotate/models/dense_heads/oriented_rpn_head.py index ca35c0e31..823a8ed6c 100644 --- a/mmrotate/models/dense_heads/oriented_rpn_head.py +++ b/mmrotate/models/dense_heads/oriented_rpn_head.py @@ -155,7 +155,8 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - dict[str, Tensor]: A dictionary of loss components. + loss_cls (torch.Tensor): cls. loss for each scale level. + loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/rotated_anchor_head.py b/mmrotate/models/dense_heads/rotated_anchor_head.py index 3dad51205..0b7f46bd0 100644 --- a/mmrotate/models/dense_heads/rotated_anchor_head.py +++ b/mmrotate/models/dense_heads/rotated_anchor_head.py @@ -187,7 +187,7 @@ def _get_targets_single(self, Args: flat_anchors (torch.Tensor): Multi-level anchors of the image, which are concatenated into a single tensor of shape - (num_anchors ,4) + (num_anchors, 5) valid_flags (torch.Tensor): Multi level valid flags of the image, which are concatenated into a single tensor of shape (num_anchors,). @@ -296,7 +296,7 @@ def get_targets(self, anchor_list (list[list[Tensor]]): Multi level anchors of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of - the inner list is a tensor of shape (num_anchors, 4). + the inner list is a tensor of shape (num_anchors, 5). valid_flag_list (list[list[Tensor]]): Multi level valid flags of each image. The outer list indicates images, and the inner list corresponds to feature levels of the image. Each element of @@ -405,7 +405,8 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - dict[str, Tensor]: A dictionary of loss components. + loss_cls (torch.Tensor): cls. loss for each scale level. + loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/rotated_rpn_head.py b/mmrotate/models/dense_heads/rotated_rpn_head.py index 632acbb1e..eb6606867 100644 --- a/mmrotate/models/dense_heads/rotated_rpn_head.py +++ b/mmrotate/models/dense_heads/rotated_rpn_head.py @@ -274,7 +274,8 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - dict[str, Tensor]: A dictionary of loss components. + loss_cls (torch.Tensor): cls. loss for each scale level. + loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/losses/__init__.py b/mmrotate/models/losses/__init__.py index b7050fbde..8594630e3 100644 --- a/mmrotate/models/losses/__init__.py +++ b/mmrotate/models/losses/__init__.py @@ -4,8 +4,9 @@ from .gaussian_dist_loss_v1 import GDLoss_v1 from .kf_iou_loss import KFLoss from .kld_reppoints_loss import KLDRepPointsLoss +from .smooth_focal_loss import SmoothFocalLoss __all__ = [ 'GDLoss', 'GDLoss_v1', 'KFLoss', 'ConvexGIoULoss', 'BCConvexGIoULoss', - 'KLDRepPointsLoss' + 'KLDRepPointsLoss', 'SmoothFocalLoss' ] diff --git a/mmrotate/models/losses/gaussian_dist_loss.py b/mmrotate/models/losses/gaussian_dist_loss.py index 1976f0c46..638d1a870 100644 --- a/mmrotate/models/losses/gaussian_dist_loss.py +++ b/mmrotate/models/losses/gaussian_dist_loss.py @@ -91,6 +91,31 @@ def postprocess(distance, fun='log1p', tau=1.0): @weighted_loss def gwd_loss(pred, target, fun='log1p', tau=1.0, alpha=1.0, normalize=True): """Gaussian Wasserstein distance loss. + Derivation and simplification: + Given any positive-definite symmetrical 2*2 matrix Z: + :math:`Tr(Z^{1/2}) = λ_1^{1/2} + λ_2^{1/2}` + where :math:`λ_1` and :math:`λ_2` are the eigen values of Z + Meanwhile we have: + :math:`Tr(Z) = λ_1 + λ_2` + + :math:`det(Z) = λ_1 * λ_2` + Combination with following formula: + :math:`(λ_1^{1/2}+λ_2^{1/2})^2 = λ_1+λ_2+2 *(λ_1 * λ_2)^{1/2}` + Yield: + :math:`Tr(Z^{1/2}) = (Tr(Z) + 2 * (det(Z))^{1/2})^{1/2}` + For gwd loss the frustrating coupling part is: + :math:`Tr((Σ_p^{1/2} * Σ_t * Σp^{1/2})^{1/2})` + Assuming :math:`Z = Σ_p^{1/2} * Σ_t * Σ_p^{1/2}` then: + :math:`Tr(Z) = Tr(Σ_p^{1/2} * Σ_t * Σ_p^{1/2}) + = Tr(Σ_p^{1/2} * Σ_p^{1/2} * Σ_t) + = Tr(Σ_p * Σ_t)` + :math:`det(Z) = det(Σ_p^{1/2} * Σ_t * Σ_p^{1/2}) + = det(Σ_p^{1/2}) * det(Σ_t) * det(Σ_p^{1/2}) + = det(Σ_p * Σ_t)` + and thus we can rewrite the coupling part as: + :math:`Tr(Z^{1/2}) = (Tr(Z) + 2 * (det(Z))^{1/2})^{1/2}` + :math:`Tr((Σ_p^{1/2} * Σ_t * Σ_p^{1/2})^{1/2}) + = (Tr(Σ_p * Σ_t) + 2 * (det(Σ_p * Σ_t))^{1/2})^{1/2}` Args: pred (torch.Tensor): Predicted bboxes. @@ -102,6 +127,7 @@ def gwd_loss(pred, target, fun='log1p', tau=1.0, alpha=1.0, normalize=True): Returns: loss (torch.Tensor) + """ xy_p, Sigma_p = pred xy_t, Sigma_t = target diff --git a/mmrotate/models/losses/smooth_focal_loss.py b/mmrotate/models/losses/smooth_focal_loss.py new file mode 100644 index 000000000..f05c9a42c --- /dev/null +++ b/mmrotate/models/losses/smooth_focal_loss.py @@ -0,0 +1,129 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F +from mmdet.models import weight_reduce_loss + +from ..builder import ROTATED_LOSSES + + +def smooth_focal_loss(pred, + target, + weight=None, + gamma=2.0, + alpha=0.25, + reduction='mean', + avg_factor=None): + """Smooth Focal Loss proposed in Circular Smooth Label (CSL). + + `Circular Smooth Label (CSL) + `_ . + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 0.25. + reduction (str, optional): The reduction method used to + override the original reduction method of the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + + Returns: + torch.Tensor: The calculated loss + """ + + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * + (1 - target)) * pt.pow(gamma) + loss = F.binary_cross_entropy_with_logits( + pred, target, reduction='none') * focal_weight + if weight is not None: + if weight.shape != loss.shape: + if weight.size(0) == loss.size(0): + # For most cases, weight is of shape (num_priors, ), + # which means it does not have the second axis num_class + weight = weight.view(-1, 1) + else: + # Sometimes, weight per anchor per class is also needed. e.g. + # in FSAF. But it may be flattened of shape + # (num_priors x num_class, ), while loss is still of shape + # (num_priors, num_class). + assert weight.numel() == loss.numel() + weight = weight.view(loss.size(0), -1) + assert weight.ndim == loss.ndim + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + +@ROTATED_LOSSES.register_module() +class SmoothFocalLoss(nn.Module): + + def __init__(self, + gamma=2.0, + alpha=0.25, + reduction='mean', + loss_weight=1.0): + """Smooth Focal Loss. + + Args: + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 0.25. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and + "sum". + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + + Returns: + loss (torch.Tensor) + """ + super(SmoothFocalLoss, self).__init__() + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None): + """Forward function. + + Args: + pred (torch.Tensor): The prediction. + target (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + reduction_override (str, optional): The reduction method used to + override the original reduction method of the loss. + Options are "none", "mean" and "sum". + + Returns: + torch.Tensor: The calculated loss + """ + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + + loss_cls = self.loss_weight * smooth_focal_loss( + pred, + target, + weight, + gamma=self.gamma, + alpha=self.alpha, + reduction=reduction, + avg_factor=avg_factor) + + return loss_cls From 96de045156934f741484c95faedb4e18985c1821 Mon Sep 17 00:00:00 2001 From: jbwang1997 Date: Wed, 30 Mar 2022 19:38:04 +0800 Subject: [PATCH 08/12] [Fix] Improve the arguments of img_split.py (#168) * Normalize img_split.py * Update README.md * Update train.py diff_seed --- tools/data/dota/README.md | 8 ++--- tools/data/dota/split/img_split.py | 29 ++++++--------- .../split/split_configs/dota1_0/ms_train.json | 34 ------------------ .../split_configs/dota1_0/ms_trainval.json | 36 ------------------- .../split/split_configs/dota1_0/ss_test.json | 30 ---------------- .../split/split_configs/dota1_0/ss_train.json | 32 ----------------- .../split_configs/dota1_0/ss_trainval.json | 34 ------------------ .../split/split_configs/dota1_5/ms_test.json | 32 ----------------- .../split/split_configs/dota1_5/ms_train.json | 34 ------------------ .../split_configs/dota1_5/ms_trainval.json | 36 ------------------- .../split/split_configs/dota1_5/ss_test.json | 30 ---------------- .../split/split_configs/dota1_5/ss_train.json | 32 ----------------- .../split_configs/dota1_5/ss_trainval.json | 34 ------------------ .../split/split_configs/dota2_0/ms_test.json | 32 ----------------- .../split/split_configs/dota2_0/ms_train.json | 34 ------------------ .../split_configs/dota2_0/ms_trainval.json | 36 ------------------- .../split/split_configs/dota2_0/ss_train.json | 32 ----------------- .../split_configs/dota2_0/ss_trainval.json | 34 ------------------ .../split_configs/{dota1_0 => }/ms_test.json | 8 ++--- .../{dota2_0/ms_val.json => ms_train.json} | 10 ++---- .../{dota1_5/ms_val.json => ms_trainval.json} | 12 +++---- .../split_configs/{dota1_0 => }/ms_val.json | 10 ++---- .../split_configs/{dota2_0 => }/ss_test.json | 8 ++--- .../{dota2_0/ss_val.json => ss_train.json} | 10 ++---- .../{dota1_5/ss_val.json => ss_trainval.json} | 12 +++---- .../split_configs/{dota1_0 => }/ss_val.json | 10 ++---- tools/train.py | 2 +- 27 files changed, 41 insertions(+), 610 deletions(-) delete mode 100644 tools/data/dota/split/split_configs/dota1_0/ms_train.json delete mode 100644 tools/data/dota/split/split_configs/dota1_0/ms_trainval.json delete mode 100644 tools/data/dota/split/split_configs/dota1_0/ss_test.json delete mode 100644 tools/data/dota/split/split_configs/dota1_0/ss_train.json delete mode 100644 tools/data/dota/split/split_configs/dota1_0/ss_trainval.json delete mode 100644 tools/data/dota/split/split_configs/dota1_5/ms_test.json delete mode 100644 tools/data/dota/split/split_configs/dota1_5/ms_train.json delete mode 100644 tools/data/dota/split/split_configs/dota1_5/ms_trainval.json delete mode 100644 tools/data/dota/split/split_configs/dota1_5/ss_test.json delete mode 100644 tools/data/dota/split/split_configs/dota1_5/ss_train.json delete mode 100644 tools/data/dota/split/split_configs/dota1_5/ss_trainval.json delete mode 100644 tools/data/dota/split/split_configs/dota2_0/ms_test.json delete mode 100644 tools/data/dota/split/split_configs/dota2_0/ms_train.json delete mode 100644 tools/data/dota/split/split_configs/dota2_0/ms_trainval.json delete mode 100644 tools/data/dota/split/split_configs/dota2_0/ss_train.json delete mode 100644 tools/data/dota/split/split_configs/dota2_0/ss_trainval.json rename tools/data/dota/split/split_configs/{dota1_0 => }/ms_test.json (63%) rename tools/data/dota/split/split_configs/{dota2_0/ms_val.json => ms_train.json} (59%) rename tools/data/dota/split/split_configs/{dota1_5/ms_val.json => ms_trainval.json} (58%) rename tools/data/dota/split/split_configs/{dota1_0 => }/ms_val.json (59%) rename tools/data/dota/split/split_configs/{dota2_0 => }/ss_test.json (61%) rename tools/data/dota/split/split_configs/{dota2_0/ss_val.json => ss_train.json} (57%) rename tools/data/dota/split/split_configs/{dota1_5/ss_val.json => ss_trainval.json} (57%) rename tools/data/dota/split/split_configs/{dota1_0 => }/ss_val.json (57%) diff --git a/tools/data/dota/README.md b/tools/data/dota/README.md index 3e759902f..d8045e53c 100644 --- a/tools/data/dota/README.md +++ b/tools/data/dota/README.md @@ -34,19 +34,19 @@ Please crop the original images into 1024×1024 patches with an overlap of 200 b ```shell python tools/data/dota/split/img_split.py --base_json \ - tools/data/dota/split/split_configs/dota1_0/ss_trainval.json + tools/data/dota/split/split_configs/ss_trainval.json python tools/data/dota/split/img_split.py --base_json \ - tools/data/dota/split/split_configs/dota1_0/ss_test.json + tools/data/dota/split/split_configs/ss_test.json ``` If you want to get a multiple scale dataset, you can run the following command. ```shell python tools/data/dota/split/img_split.py --base_json \ - tools/data/dota/split/split_configs/dota1_0/ms_trainval.json + tools/data/dota/split/split_configs/ms_trainval.json python tools/data/dota/split/img_split.py --base_json \ - tools/data/dota/split/split_configs/dota1_0/ms_test.json + tools/data/dota/split/split_configs/ms_test.json ``` Please change the `img_dirs` and `ann_dirs` in json. (Forked from [BboxToolkit](https://github.com/jbwang1997/BboxToolkit), which is faster then DOTA_Devkit.) diff --git a/tools/data/dota/split/img_split.py b/tools/data/dota/split/img_split.py index f535652f0..63279d3c4 100644 --- a/tools/data/dota/split/img_split.py +++ b/tools/data/dota/split/img_split.py @@ -30,7 +30,7 @@ def add_parser(parser): """Add arguments.""" parser.add_argument( - '--base_json', + '--base-json', type=str, default=None, help='json config file for split images') @@ -39,25 +39,17 @@ def add_parser(parser): # argument for loading data parser.add_argument( - '--load_type', type=str, default=None, help='loading function type') - parser.add_argument( - '--img_dirs', + '--img-dirs', nargs='+', type=str, default=None, help='images dirs, must give a value') parser.add_argument( - '--ann_dirs', + '--ann-dirs', nargs='+', type=str, default=None, help='annotations dirs, optional') - parser.add_argument( - '--classes', - nargs='+', - type=str, - default=None, - help='the classes and order for loading data') # argument for splitting image parser.add_argument( @@ -79,21 +71,21 @@ def add_parser(parser): default=[1.], help='same as DOTA devkit rate, but only change windows size') parser.add_argument( - '--img_rate_thr', + '--img-rate-thr', type=float, default=0.6, help='the minimal rate of image in window and window') parser.add_argument( - '--iof_thr', + '--iof-thr', type=float, default=0.7, help='the minimal iof between a object and a window') parser.add_argument( - '--no_padding', + '--no-padding', action='store_true', help='not padding patches in regular size') parser.add_argument( - '--padding_value', + '--padding-value', nargs='+', type=int, default=[0], @@ -101,12 +93,12 @@ def add_parser(parser): # argument for saving parser.add_argument( - '--save_dir', + '--save-dir', type=str, default='.', help='to save pkl and split images') parser.add_argument( - '--save_ext', + '--save-ext', type=str, default='.png', help='the extension of saving images') @@ -127,10 +119,9 @@ def parse_args(): not hasattr(action, 'default'): continue action.default = prior_config[action.dest] - args = parser.parse_args() + args = parser.parse_args() # assert arguments - assert args.load_type is not None, "argument load_type can't be None" assert args.img_dirs is not None, "argument img_dirs can't be None" assert args.ann_dirs is None or len(args.ann_dirs) == len(args.img_dirs) assert len(args.sizes) == len(args.gaps) diff --git a/tools/data/dota/split/split_configs/dota1_0/ms_train.json b/tools/data/dota/split/split_configs/dota1_0/ms_train.json deleted file mode 100644 index c44ecd9bd..000000000 --- a/tools/data/dota/split/split_configs/dota1_0/ms_train.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota", - "img_dirs": [ - "data/DOTA1_0/train/images/" - ], - "ann_dirs": [ - "data/DOTA1_0/train/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota1_0/train/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_0/ms_trainval.json b/tools/data/dota/split/split_configs/dota1_0/ms_trainval.json deleted file mode 100644 index a9504a486..000000000 --- a/tools/data/dota/split/split_configs/dota1_0/ms_trainval.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota", - "img_dirs": [ - "/cluster/home/it_stu198/main/datasets/DOTA/train/images/", - "/cluster/home/it_stu198/main/datasets/DOTA/val/images/" - ], - "ann_dirs": [ - "/cluster/home/it_stu198/main/datasets/DOTA/train/labelTxt/", - "/cluster/home/it_stu198/main/datasets/DOTA/val/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "/cluster/home/it_stu198/main/datasets/split_ms_dota1_0/trainval/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_0/ss_test.json b/tools/data/dota/split/split_configs/dota1_0/ss_test.json deleted file mode 100644 index 569aeff3e..000000000 --- a/tools/data/dota/split/split_configs/dota1_0/ss_test.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota", - "img_dirs": [ - "/cluster/home/it_stu198/main/datasets/DOTA/test/images/" - ], - "ann_dirs": null, - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "/cluster/home/it_stu198/main/datasets/split_1024_dota1_0/test/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_0/ss_train.json b/tools/data/dota/split/split_configs/dota1_0/ss_train.json deleted file mode 100644 index 8ec1d6c1a..000000000 --- a/tools/data/dota/split/split_configs/dota1_0/ss_train.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota", - "img_dirs": [ - "data/DOTA1_0/train/images/" - ], - "ann_dirs": [ - "data/DOTA1_0/train/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ss_dota1_0/train/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_0/ss_trainval.json b/tools/data/dota/split/split_configs/dota1_0/ss_trainval.json deleted file mode 100644 index 94168a830..000000000 --- a/tools/data/dota/split/split_configs/dota1_0/ss_trainval.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota", - "img_dirs": [ - "/cluster/home/it_stu198/main/datasets/DOTA/train/images/", - "/cluster/home/it_stu198/main/datasets/DOTA/val/images/" - ], - "ann_dirs": [ - "/cluster/home/it_stu198/main/datasets/DOTA/train/labelTxt/", - "/cluster/home/it_stu198/main/datasets/DOTA/val/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "/cluster/home/it_stu198/main/datasets/split_1024_dota1_0/trainval/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_5/ms_test.json b/tools/data/dota/split/split_configs/dota1_5/ms_test.json deleted file mode 100644 index 5ec74d11c..000000000 --- a/tools/data/dota/split/split_configs/dota1_5/ms_test.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota1.5", - "img_dirs": [ - "data/DOTA1_5/test/images/" - ], - "ann_dirs": null, - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota1_5/test/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_5/ms_train.json b/tools/data/dota/split/split_configs/dota1_5/ms_train.json deleted file mode 100644 index cf0be5fb6..000000000 --- a/tools/data/dota/split/split_configs/dota1_5/ms_train.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota1.5", - "img_dirs": [ - "data/DOTA1_5/train/images/" - ], - "ann_dirs": [ - "data/DOTA1_5/train/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota1_5/train/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_5/ms_trainval.json b/tools/data/dota/split/split_configs/dota1_5/ms_trainval.json deleted file mode 100644 index faeb5b5aa..000000000 --- a/tools/data/dota/split/split_configs/dota1_5/ms_trainval.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota1.5", - "img_dirs": [ - "data/DOTA1_5/train/images/", - "data/DOTA1_5/val/images/" - ], - "ann_dirs": [ - "data/DOTA1_5/train/labelTxt/", - "data/DOTA1_5/val/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota1_5/trainval/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_5/ss_test.json b/tools/data/dota/split/split_configs/dota1_5/ss_test.json deleted file mode 100644 index 795aabbe7..000000000 --- a/tools/data/dota/split/split_configs/dota1_5/ss_test.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota1.5", - "img_dirs": [ - "data/DOTA1_5/test/images/" - ], - "ann_dirs": null, - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ss_dota1_5/test/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_5/ss_train.json b/tools/data/dota/split/split_configs/dota1_5/ss_train.json deleted file mode 100644 index 7abd069d8..000000000 --- a/tools/data/dota/split/split_configs/dota1_5/ss_train.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota1.5", - "img_dirs": [ - "data/DOTA1_5/train/images/" - ], - "ann_dirs": [ - "data/DOTA1_5/train/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ss_dota1_5/train/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_5/ss_trainval.json b/tools/data/dota/split/split_configs/dota1_5/ss_trainval.json deleted file mode 100644 index f7f70ab9d..000000000 --- a/tools/data/dota/split/split_configs/dota1_5/ss_trainval.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota1.5", - "img_dirs": [ - "data/DOTA1_5/train/images/", - "data/DOTA1_5/val/images/" - ], - "ann_dirs": [ - "data/DOTA1_5/train/labelTxt/", - "data/DOTA1_5/val/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ss_dota1_5/trainval/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota2_0/ms_test.json b/tools/data/dota/split/split_configs/dota2_0/ms_test.json deleted file mode 100644 index 265e0ae72..000000000 --- a/tools/data/dota/split/split_configs/dota2_0/ms_test.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota2", - "img_dirs": [ - "data/DOTA2_0/test/images/" - ], - "ann_dirs": null, - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota2_0/test/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota2_0/ms_train.json b/tools/data/dota/split/split_configs/dota2_0/ms_train.json deleted file mode 100644 index d48bb30c2..000000000 --- a/tools/data/dota/split/split_configs/dota2_0/ms_train.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota2", - "img_dirs": [ - "data/DOTA2_0/train/images/" - ], - "ann_dirs": [ - "data/DOTA2_0/train/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota2_0/train/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota2_0/ms_trainval.json b/tools/data/dota/split/split_configs/dota2_0/ms_trainval.json deleted file mode 100644 index cdb6c91a2..000000000 --- a/tools/data/dota/split/split_configs/dota2_0/ms_trainval.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota2", - "img_dirs": [ - "data/DOTA2_0/train/images/", - "data/DOTA2_0/val/images/" - ], - "ann_dirs": [ - "data/DOTA2_0/train/labelTxt/", - "data/DOTA2_0/val/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 500 - ], - "rates": [ - 0.5, - 1.0, - 1.5 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ms_dota2_0/trainval/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota2_0/ss_train.json b/tools/data/dota/split/split_configs/dota2_0/ss_train.json deleted file mode 100644 index 4421721a8..000000000 --- a/tools/data/dota/split/split_configs/dota2_0/ss_train.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota2", - "img_dirs": [ - "data/DOTA2_0/train/images/" - ], - "ann_dirs": [ - "data/DOTA2_0/train/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ss_dota2_0/train/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota2_0/ss_trainval.json b/tools/data/dota/split/split_configs/dota2_0/ss_trainval.json deleted file mode 100644 index 23079319b..000000000 --- a/tools/data/dota/split/split_configs/dota2_0/ss_trainval.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "nproc": 10, - "load_type": "dota2", - "img_dirs": [ - "data/DOTA2_0/train/images/", - "data/DOTA2_0/val/images/" - ], - "ann_dirs": [ - "data/DOTA2_0/train/labelTxt/", - "data/DOTA2_0/val/labelTxt/" - ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", - "sizes": [ - 1024 - ], - "gaps": [ - 200 - ], - "rates": [ - 1.0 - ], - "img_rate_thr": 0.6, - "iof_thr": 0.7, - "no_padding": false, - "padding_value": [ - 104, - 116, - 124 - ], - "save_dir": "data/split_ss_dota2_0/trainval/", - "save_ext": ".png" -} diff --git a/tools/data/dota/split/split_configs/dota1_0/ms_test.json b/tools/data/dota/split/split_configs/ms_test.json similarity index 63% rename from tools/data/dota/split/split_configs/dota1_0/ms_test.json rename to tools/data/dota/split/split_configs/ms_test.json index 351a1623c..0e4edc1da 100644 --- a/tools/data/dota/split/split_configs/dota1_0/ms_test.json +++ b/tools/data/dota/split/split_configs/ms_test.json @@ -1,13 +1,9 @@ { "nproc": 10, - "load_type": "dota", "img_dirs": [ - "data/DOTA1_0/test/images/" + "data/DOTA/test/images/" ], "ann_dirs": null, - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -27,6 +23,6 @@ 116, 124 ], - "save_dir": "data/split_ms_dota1_0/test/", + "save_dir": "data/split_ms_dota/test/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota2_0/ms_val.json b/tools/data/dota/split/split_configs/ms_train.json similarity index 59% rename from tools/data/dota/split/split_configs/dota2_0/ms_val.json rename to tools/data/dota/split/split_configs/ms_train.json index 2ca492500..f1c98bb69 100644 --- a/tools/data/dota/split/split_configs/dota2_0/ms_val.json +++ b/tools/data/dota/split/split_configs/ms_train.json @@ -1,15 +1,11 @@ { "nproc": 10, - "load_type": "dota2", "img_dirs": [ - "data/DOTA2_0/val/images/" + "data/DOTA/train/images/" ], "ann_dirs": [ - "data/DOTA2_0/val/labelTxt/" + "data/DOTA/train/labelTxt/" ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -29,6 +25,6 @@ 116, 124 ], - "save_dir": "data/split_ms_dota2_0/val/", + "save_dir": "data/split_ms_dota/train/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota1_5/ms_val.json b/tools/data/dota/split/split_configs/ms_trainval.json similarity index 58% rename from tools/data/dota/split/split_configs/dota1_5/ms_val.json rename to tools/data/dota/split/split_configs/ms_trainval.json index 1c00033f7..bd50967e6 100644 --- a/tools/data/dota/split/split_configs/dota1_5/ms_val.json +++ b/tools/data/dota/split/split_configs/ms_trainval.json @@ -1,15 +1,13 @@ { "nproc": 10, - "load_type": "dota1.5", "img_dirs": [ - "data/DOTA1_5/val/images/" + "data/DOTA/train/images/", + "data/DOTA/val/images/" ], "ann_dirs": [ - "data/DOTA1_5/val/labelTxt/" + "data/DOTA/train/labelTxt/", + "data/DOTA/val/labelTxt/" ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -29,6 +27,6 @@ 116, 124 ], - "save_dir": "data/split_ms_dota1_5/val/", + "save_dir": "data/split_ms_dota/trainval/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota1_0/ms_val.json b/tools/data/dota/split/split_configs/ms_val.json similarity index 59% rename from tools/data/dota/split/split_configs/dota1_0/ms_val.json rename to tools/data/dota/split/split_configs/ms_val.json index 16fbeadd2..c3ba777c8 100644 --- a/tools/data/dota/split/split_configs/dota1_0/ms_val.json +++ b/tools/data/dota/split/split_configs/ms_val.json @@ -1,15 +1,11 @@ { "nproc": 10, - "load_type": "dota", "img_dirs": [ - "data/DOTA1_0/val/images/" + "data/DOTA/val/images/" ], "ann_dirs": [ - "data/DOTA1_0/val/labelTxt/" + "data/DOTA/val/labelTxt/" ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -29,6 +25,6 @@ 116, 124 ], - "save_dir": "data/split_ms_dota1_0/val/", + "save_dir": "data/split_ms_dota/val/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota2_0/ss_test.json b/tools/data/dota/split/split_configs/ss_test.json similarity index 61% rename from tools/data/dota/split/split_configs/dota2_0/ss_test.json rename to tools/data/dota/split/split_configs/ss_test.json index 0432e4c6f..e5fe2b2c0 100644 --- a/tools/data/dota/split/split_configs/dota2_0/ss_test.json +++ b/tools/data/dota/split/split_configs/ss_test.json @@ -1,13 +1,9 @@ { "nproc": 10, - "load_type": "dota2", "img_dirs": [ - "data/DOTA2_0/test/images/" + "data/DOTA/test/images/" ], "ann_dirs": null, - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -25,6 +21,6 @@ 116, 124 ], - "save_dir": "data/split_ss_dota2_0/test/", + "save_dir": "data/split_ss_dota/test/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota2_0/ss_val.json b/tools/data/dota/split/split_configs/ss_train.json similarity index 57% rename from tools/data/dota/split/split_configs/dota2_0/ss_val.json rename to tools/data/dota/split/split_configs/ss_train.json index ab66c59c7..1e0604d93 100644 --- a/tools/data/dota/split/split_configs/dota2_0/ss_val.json +++ b/tools/data/dota/split/split_configs/ss_train.json @@ -1,15 +1,11 @@ { "nproc": 10, - "load_type": "dota2", "img_dirs": [ - "data/DOTA2_0/val/images/" + "data/DOTA/train/images/" ], "ann_dirs": [ - "data/DOTA2_0/val/labelTxt/" + "data/DOTA/train/labelTxt/" ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -27,6 +23,6 @@ 116, 124 ], - "save_dir": "data/split_ss_dota2_0/val/", + "save_dir": "data/split_ss_dota/train/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota1_5/ss_val.json b/tools/data/dota/split/split_configs/ss_trainval.json similarity index 57% rename from tools/data/dota/split/split_configs/dota1_5/ss_val.json rename to tools/data/dota/split/split_configs/ss_trainval.json index 06e8fe529..087224278 100644 --- a/tools/data/dota/split/split_configs/dota1_5/ss_val.json +++ b/tools/data/dota/split/split_configs/ss_trainval.json @@ -1,15 +1,13 @@ { "nproc": 10, - "load_type": "dota1.5", "img_dirs": [ - "data/DOTA1_5/val/images/" + "data/DOTA/train/images/", + "data/DOTA/val/images/" ], "ann_dirs": [ - "data/DOTA1_5/val/labelTxt/" + "data/DOTA/train/labelTxt/", + "data/DOTA/val/labelTxt/" ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -27,6 +25,6 @@ 116, 124 ], - "save_dir": "data/split_ss_dota1_5/val/", + "save_dir": "data/split_ss_dota/trainval/", "save_ext": ".png" } diff --git a/tools/data/dota/split/split_configs/dota1_0/ss_val.json b/tools/data/dota/split/split_configs/ss_val.json similarity index 57% rename from tools/data/dota/split/split_configs/dota1_0/ss_val.json rename to tools/data/dota/split/split_configs/ss_val.json index ccd577760..59c903b92 100644 --- a/tools/data/dota/split/split_configs/dota1_0/ss_val.json +++ b/tools/data/dota/split/split_configs/ss_val.json @@ -1,15 +1,11 @@ { "nproc": 10, - "load_type": "dota", "img_dirs": [ - "data/DOTA1_0/val/images/" + "data/DOTA/val/images/" ], "ann_dirs": [ - "data/DOTA1_0/val/labelTxt/" + "data/DOTA/val/labelTxt/" ], - "classes": null, - "prior_annfile": null, - "merge_type": "addition", "sizes": [ 1024 ], @@ -27,6 +23,6 @@ 116, 124 ], - "save_dir": "data/split_ss_dota1_0/val/", + "save_dir": "data/split_ss_dota/val/", "save_ext": ".png" } diff --git a/tools/train.py b/tools/train.py index 1cf9983ab..86f39ef36 100644 --- a/tools/train.py +++ b/tools/train.py @@ -49,7 +49,7 @@ def parse_args(): '(only applicable to non-distributed training)') parser.add_argument('--seed', type=int, default=None, help='random seed') parser.add_argument( - '--diff_seed', + '--diff-seed', action='store_true', help='Whether or not set different seeds for different ranks') parser.add_argument( From a362cd3617f155c3e73f4702e83b8d5270d090b7 Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Wed, 30 Mar 2022 22:44:24 +0800 Subject: [PATCH 09/12] Update from-scratch install script in install.md (#166) * update readme * Update .gitignore * update * update * Update .gitignore * delete torchvision version * update * update --- docs/en/install.md | 41 +++++++++++++++++------------------------ docs/zh_cn/install.md | 39 ++++++++++++++++----------------------- 2 files changed, 33 insertions(+), 47 deletions(-) diff --git a/docs/en/install.md b/docs/en/install.md index 9afaa3777..383fac78d 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -21,6 +21,23 @@ If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`. ## Installation +### A from-scratch setup script + +Assuming that you already have CUDA 10.1 installed, here is a full script for setting up MMRotate with conda. +You can refer to the step-by-step installation instructions in the next section. + +```shell +conda create -n open-mmlab python=3.7 pytorch==1.7.0 cudatoolkit=10.1 torchvision -c pytorch -y +conda activate open-mmlab +pip install openmim +mim install mmcv-full +mim install mmdet +git clone https://github.com/open-mmlab/mmrotate.git +cd mmrotate +pip install -r requirements/build.txt +pip install -v -e . +``` + ### Prepare environment 1. Create a conda virtual environment and activate it. @@ -126,30 +143,6 @@ Run it with docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmrotate/data mmrotate ``` -### A from-scratch setup script - -Assuming that you already have CUDA 10.1 installed, here is a full script for setting up MMDetection with conda. - -```shell -conda create -n openmmlab python=3.7 -y -conda activate openmmlab - -conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.1 -c pytorch - -# install the latest mmcv -pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html - -# install mmdetection -pip install mmdet - -# install mmrotate -git clone https://github.com/open-mmlab/mmrotate.git -cd mmrotate -pip install -r requirements/build.txt -pip install -v -e . # or "python setup.py develop" -``` - - ## Verification To verify whether MMRotate is installed correctly, we can run the demo code and inference a demo image. diff --git a/docs/zh_cn/install.md b/docs/zh_cn/install.md index 1e2f40c14..ce4bb75e0 100644 --- a/docs/zh_cn/install.md +++ b/docs/zh_cn/install.md @@ -20,6 +20,22 @@ MMRotate 和 MMCV, MMDet 版本兼容性如下所示,需要安装正确的版 ## 安装流程 +### 从零开始设置脚本 + +假设当前已经成功安装 CUDA 10.1,这里提供了一个完整的基于 conda 安装 MMRotate 的脚本。您可以参考下一节中的分步安装说明。 + +```shell +conda create -n open-mmlab python=3.7 pytorch==1.7.0 cudatoolkit=10.1 torchvision -c pytorch -y +conda activate open-mmlab +pip install openmim +mim install mmcv-full +mim install mmdet +git clone https://github.com/open-mmlab/mmrotate.git +cd mmrotate +pip install -r requirements/build.txt +pip install -v -e . +``` + ### 准备环境 1. 使用 conda 新建虚拟环境,并进入该虚拟环境; @@ -132,29 +148,6 @@ docker build -t mmrotate docker/ docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmrotate/data mmrotate ``` -### 从零开始设置脚本 - -假设当前已经成功安装 CUDA 10.1,这里提供了一个完整的基于 conda 安装 MMDetection 的脚本: - -```shell -conda create -n openmmlab python=3.7 -y -conda activate openmmlab - -conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.1 -c pytorch - -# 安装最新版本的 mmcv -pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.7.0/index.html - -# 安装 mmdetection -pip install mmdet - -# 安装 mmrotate -git clone https://github.com/open-mmlab/mmrotate.git -cd mmrotate -pip install -r requirements/build.txt -pip install -v -e . # or "python setup.py develop" -``` - ## 验证 From 27ad53861455aef82e6bf4d73b052a7ff6daa8b7 Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Fri, 1 Apr 2022 16:03:26 +0800 Subject: [PATCH 10/12] fix bug (#175) --- mmrotate/datasets/hrsc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mmrotate/datasets/hrsc.py b/mmrotate/datasets/hrsc.py index 66ea41a4d..f2caeeb98 100644 --- a/mmrotate/datasets/hrsc.py +++ b/mmrotate/datasets/hrsc.py @@ -108,9 +108,9 @@ def load_annotations(self, ann_file): for obj in root.findall('HRSC_Objects/HRSC_Object'): if self.classwise: class_id = obj.find('Class_ID').text - if class_id not in self.CLASSES_ID: + label = self.catid2label.get(class_id) + if label is None: continue - label = self.catid2label[class_id] else: label = 0 From d11241fe3ec3972ee4f992008103847fe87d8c15 Mon Sep 17 00:00:00 2001 From: Yue Zhou <592267829@qq.com> Date: Fri, 1 Apr 2022 22:42:04 +0800 Subject: [PATCH 11/12] Fix bug in docstring. (#137) * Update rotated_anchor_head.py * fix lint * update docstring * Update * update docstring * fix link in docs. * update * add install in reademe * Revert "add install in reademe" This reverts commit e0a0f09923e48490f289a11373cbf7d9d59a718f. * Update .gitignore * fix docstring error * fix readthedocs api bug * fix hyperlink * Update smooth_focal_loss.py * fix lint * fix bugs * update return in docstring * fix bugs * update api.rst * update assigner * fix bugs * update * update * fix bugs in return * add download link in demo * fix typos --- .gitignore | 4 +- demo/huge_image_demo.py | 3 +- demo/image_demo.py | 7 +- docs/en/api.rst | 80 +++++++++++++++---- docs/en/get_started.md | 4 +- docs/en/install.md | 2 +- docs/en/model_zoo.md | 30 +++---- docs/zh_cn/api.rst | 80 +++++++++++++++---- docs/zh_cn/get_started.md | 6 +- docs/zh_cn/install.md | 2 +- docs/zh_cn/model_zoo.md | 30 +++---- .../core/bbox/assigners/convex_assigner.py | 5 ++ .../bbox/assigners/max_convex_iou_assigner.py | 14 +++- mmrotate/core/bbox/assigners/sas_assigner.py | 13 ++- .../coder/delta_midpointoffset_rbbox_coder.py | 31 +++---- .../bbox/coder/delta_xywha_hbbox_coder.py | 20 ++--- .../bbox/coder/delta_xywha_rbbox_coder.py | 30 +++---- .../core/bbox/coder/gliding_vertex_coder.py | 15 +++- .../rotate_iou2d_calculator.py | 2 + mmrotate/core/bbox/transforms.py | 25 ++++++ mmrotate/core/bbox/utils/gmm.py | 10 ++- mmrotate/core/evaluation/eval_map.py | 1 + .../core/post_processing/bbox_nms_rotated.py | 21 ++++- mmrotate/datasets/dota.py | 20 ++--- mmrotate/models/backbones/re_resnet.py | 1 + .../dense_heads/csl_rotated_retina_head.py | 44 +++++----- .../dense_heads/kfiou_odm_refine_head.py | 29 ++++--- .../dense_heads/kfiou_rotate_retina_head.py | 26 +++--- .../kfiou_rotate_retina_refine_head.py | 32 ++++---- .../models/dense_heads/odm_refine_head.py | 40 +++++----- .../models/dense_heads/oriented_rpn_head.py | 24 +++--- .../models/dense_heads/rotated_anchor_head.py | 55 ++++++++----- .../dense_heads/rotated_reppoints_head.py | 54 +++++++++---- .../models/dense_heads/rotated_retina_head.py | 37 +++++---- .../dense_heads/rotated_retina_refine_head.py | 37 ++++----- .../models/dense_heads/rotated_rpn_head.py | 3 +- .../models/dense_heads/sam_reppoints_head.py | 29 ++++--- mmrotate/models/dense_heads/utils.py | 16 +++- mmrotate/models/detectors/oriented_rcnn.py | 5 +- mmrotate/models/detectors/r3det.py | 4 +- mmrotate/models/detectors/redet.py | 6 +- mmrotate/models/detectors/roi_transformer.py | 8 +- .../models/detectors/rotate_faster_rcnn.py | 4 +- .../models/detectors/rotated_reppoints.py | 2 +- .../models/detectors/rotated_retinanet.py | 4 +- mmrotate/models/detectors/s2anet.py | 9 ++- mmrotate/models/detectors/single_stage.py | 6 +- mmrotate/models/detectors/utils.py | 20 ++++- mmrotate/models/losses/convex_giou_loss.py | 8 +- mmrotate/models/losses/smooth_focal_loss.py | 35 ++++---- mmrotate/models/necks/re_fpn.py | 26 +++++- .../roi_heads/bbox_heads/gv_bbox_head.py | 2 - .../roi_heads/bbox_heads/rotated_bbox_head.py | 2 - .../models/roi_heads/gv_ratio_roi_head.py | 10 +-- .../roi_heads/oriented_standard_roi_head.py | 10 +-- .../rotate_single_level_roi_extractor.py | 2 +- .../roi_heads/rotate_standard_roi_head.py | 11 +-- mmrotate/models/utils/enn.py | 45 ++++++++++- mmrotate/models/utils/orconv.py | 18 ++++- mmrotate/models/utils/ripool.py | 7 +- mmrotate/utils/logger.py | 1 + mmrotate/utils/misc.py | 3 + requirements/readthedocs.txt | 1 + tools/analysis_tools/benchmark.py | 58 +++++++------- 64 files changed, 761 insertions(+), 428 deletions(-) diff --git a/.gitignore b/.gitignore index e8019d30b..7af248056 100644 --- a/.gitignore +++ b/.gitignore @@ -66,7 +66,9 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +docs/en/_build/ +docs/zh_cn/_build/ +src # PyBuilder target/ diff --git a/demo/huge_image_demo.py b/demo/huge_image_demo.py index 075942fc6..68afd9d88 100644 --- a/demo/huge_image_demo.py +++ b/demo/huge_image_demo.py @@ -3,10 +3,11 @@ Example: ``` +wget -P checkpoint https://download.openmmlab.com/mmrotate/v0.1.0/oriented_rcnn/oriented_rcnn_r50_fpn_1x_dota_le90/oriented_rcnn_r50_fpn_1x_dota_le90-6d2b2ce0.pth # noqa: E501, E261. python demo/huge_image_demo.py \ demo/dota_demo.jpg \ configs/oriented_rcnn/oriented_rcnn_r50_fpn_1x_dota_v3.py \ - work_dirs/oriented_rcnn_r50_fpn_1x_dota_v3/epoch_12.pth \ + checkpoint/oriented_rcnn_r50_fpn_1x_dota_le90-6d2b2ce0.pth \ ``` """ # nowq diff --git a/demo/image_demo.py b/demo/image_demo.py index aaa5363e8..25c2ecd7a 100644 --- a/demo/image_demo.py +++ b/demo/image_demo.py @@ -2,11 +2,14 @@ """Inference on single image. Example: + + ``` +wget -P checkpoint https://download.openmmlab.com/mmrotate/v0.1.0/oriented_rcnn/oriented_rcnn_r50_fpn_1x_dota_le90/oriented_rcnn_r50_fpn_1x_dota_le90-6d2b2ce0.pth # noqa: E501, E261. python demo/image_demo.py \ demo/demo.jpg \ - configs/oriented_rcnn/oriented_rcnn_r50_fpn_1x_dota_v3.py \ - work_dirs/oriented_rcnn_r50_fpn_1x_dota_v3/epoch_12.pth \ + configs/oriented_rcnn/oriented_rcnn_r50_fpn_1x_dota_le90.py \ + checkpoint/oriented_rcnn_r50_fpn_1x_dota_le90-6d2b2ce0.pth \ demo/vis.jpg ``` """ # nowq diff --git a/docs/en/api.rst b/docs/en/api.rst index 4ed91a232..e5234ee68 100644 --- a/docs/en/api.rst +++ b/docs/en/api.rst @@ -1,29 +1,70 @@ -mmrotate -================= +mmrotate.apis +-------------- +.. automodule:: mmrotate.apis + :members: -mmrotate.models ------------------------------- -.. automodule:: mmrotate.models +mmrotate.core +-------------- + +anchor +^^^^^^^^^^ +.. automodule:: mmrotate.core.anchor :members: -backbones +bbox ^^^^^^^^^^ -.. automodule:: mmrotate.models.backbones +.. automodule:: mmrotate.core.bbox :members: -dense_heads +patch ^^^^^^^^^^ -.. automodule:: mmrotate.models.dense_heads +.. automodule:: mmrotate.core.patch + :members: + +evaluation +^^^^^^^^^^ +.. automodule:: mmrotate.core.evaluation + :members: + +post_processing +^^^^^^^^^^^^^^^ +.. automodule:: mmrotate.core.post_processing + :members: + +mmrotate.datasets +-------------- + +datasets +^^^^^^^^^^ +.. automodule:: mmrotate.datasets + :members: + +pipelines +^^^^^^^^^^ +.. automodule:: mmrotate.datasets.pipelines :members: +mmrotate.models +-------------- + detectors ^^^^^^^^^^ .. automodule:: mmrotate.models.detectors :members: -losses +backbones ^^^^^^^^^^ -.. automodule:: mmrotate.models.losses +.. automodule:: mmrotate.models.backbones + :members: + +necks +^^^^^^^^^^^^ +.. automodule:: mmrotate.models.necks + :members: + +dense_heads +^^^^^^^^^^^^ +.. automodule:: mmrotate.models.dense_heads :members: roi_heads @@ -31,12 +72,17 @@ roi_heads .. automodule:: mmrotate.models.roi_heads :members: -mmrotate.core ------------------------------- -.. automodule:: mmrotate.core +losses +^^^^^^^^^^ +.. automodule:: mmrotate.models.losses :members: -mmrotate.datasets ------------------------------- -.. automodule:: mmrotate.datasets +utils +^^^^^^^^^^ +.. automodule:: mmrotate.models.utils + :members: + +mmrotate.utils +-------------- +.. automodule:: mmrotate.utils :members: diff --git a/docs/en/get_started.md b/docs/en/get_started.md index b0c5372e3..5e581b1b4 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -20,7 +20,7 @@ python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] --la Examples: -Inference RotatedRetinaNet on DOTA-1.0 dataset, which can generate compressed files for online [submission](https://captain-whu.github.io/DOTA/evaluation.html). (Please change the [data_root](../../configs/_base_/datasets/dotav1.py) firstly.) +Inference RotatedRetinaNet on DOTA-1.0 dataset, which can generate compressed files for online [submission](https://captain-whu.github.io/DOTA/evaluation.html). (Please change the [data_root](https://github.com/open-mmlab/mmrotate/tree/main/configs/_base_/datasets/dotav1.py) firstly.) ```shell python ./tools/test.py \ configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py \ @@ -35,7 +35,7 @@ or --eval-options submission_dir=work_dirs/Task1_results ``` -You can change the test set path in the [data_root](../../configs/_base_/datasets/dotav1.py) to the val set or trainval set for the offline evaluation. +You can change the test set path in the [data_root](https://github.com/open-mmlab/mmrotate/tree/main/configs/_base_/datasets/dotav1.py) to the val set or trainval set for the offline evaluation. ```shell python ./tools/test.py \ configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py \ diff --git a/docs/en/install.md b/docs/en/install.md index 383fac78d..ad1bf281a 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -102,7 +102,7 @@ Or you can still install MMRotate manually: 3. Install MMRotate. - You can simply install mmrotate with the following command: + You can simply install MMRotate with the following command: ```shell pip install mmrotate diff --git a/docs/en/model_zoo.md b/docs/en/model_zoo.md index 4a4612f25..33d5f209f 100644 --- a/docs/en/model_zoo.md +++ b/docs/en/model_zoo.md @@ -1,21 +1,21 @@ ## Benchmark and Model Zoo -- [Rotated RetinaNet-OBB/HBB](../../configs/rotated_retinanet/README.md) (ICCV'2017) -- [Rotated FasterRCNN-OBB](../../configs/rotated_faster_rcnn/README.md) (TPAMI'2017) -- [Rotated RepPoints-OBB](../../configs/rotated_reppoints/README.md) (ICCV'2019) -- [RoI Transformer](../../configs/roi_trans/README.md) (CVPR'2019) -- [Gliding Vertex](../../configs/gliding_vertex/README.md) (TPAMI'2020) -- [CSL](../../configs/csl/README.md) (ECCV'2020) -- [R3Det](../../configs/r3det/README.md) (AAAI'2021) -- [S2A-Net](../../configs/s2anet/README.md) (TGRS'2021) -- [ReDet](../../configs/redet/README.md) (CVPR'2021) -- [Beyond Bounding-Box](../../configs/cfa/README.md) (CVPR'2021) -- [Oriented R-CNN](../../configs/oriented_rcnn/README.md) (ICCV'2021) -- [GWD](../../configs/gwd/README.md) (ICML'2021) -- [KLD](../../configs/kld/README.md) (NeurIPS'2021) +- [Rotated RetinaNet-OBB/HBB](https://github.com/open-mmlab/mmrotate/tree/main/configs/rotated_retinanet/README.md) (ICCV'2017) +- [Rotated FasterRCNN-OBB](https://github.com/open-mmlab/mmrotate/tree/main/configs/rotated_faster_rcnn/README.md) (TPAMI'2017) +- [Rotated RepPoints-OBB](https://github.com/open-mmlab/mmrotate/tree/main/configs/rotated_reppoints/README.md) (ICCV'2019) +- [RoI Transformer](https://github.com/open-mmlab/mmrotate/tree/main/configs/roi_trans/README.md) (CVPR'2019) +- [Gliding Vertex](https://github.com/open-mmlab/mmrotate/tree/main/configs/gliding_vertex/README.md) (TPAMI'2020) +- [CSL](https://github.com/open-mmlab/mmrotate/tree/main/configs/csl/README.md) (ECCV'2020) +- [R3Det](https://github.com/open-mmlab/mmrotate/tree/main/configs/r3det/README.md) (AAAI'2021) +- [S2A-Net](https://github.com/open-mmlab/mmrotate/tree/main/configs/s2anet/README.md) (TGRS'2021) +- [ReDet](https://github.com/open-mmlab/mmrotate/tree/main/configs/redet/README.md) (CVPR'2021) +- [Beyond Bounding-Box](https://github.com/open-mmlab/mmrotate/tree/main/configs/cfa/README.md) (CVPR'2021) +- [Oriented R-CNN](https://github.com/open-mmlab/mmrotate/tree/main/configs/oriented_rcnn/README.md) (ICCV'2021) +- [GWD](https://github.com/open-mmlab/mmrotate/tree/main/configs/gwd/README.md) (ICML'2021) +- [KLD](https://github.com/open-mmlab/mmrotate/tree/main/configs/kld/README.md) (NeurIPS'2021) - [SASM](configs/sasm_reppoints/README.md) (AAAI'2022) -- [KFIoU](../../configs/kfiou/README.md) (arXiv) -- [G-Rep](../../configs/g_reppoints/README.md) (stay tuned) +- [KFIoU](https://github.com/open-mmlab/mmrotate/tree/main/configs/kfiou/README.md) (arXiv) +- [G-Rep](https://github.com/open-mmlab/mmrotate/tree/main/configs/g_reppoints/README.md) (stay tuned) ### Results on DOTA v1.0 diff --git a/docs/zh_cn/api.rst b/docs/zh_cn/api.rst index 4ed91a232..e5234ee68 100644 --- a/docs/zh_cn/api.rst +++ b/docs/zh_cn/api.rst @@ -1,29 +1,70 @@ -mmrotate -================= +mmrotate.apis +-------------- +.. automodule:: mmrotate.apis + :members: -mmrotate.models ------------------------------- -.. automodule:: mmrotate.models +mmrotate.core +-------------- + +anchor +^^^^^^^^^^ +.. automodule:: mmrotate.core.anchor :members: -backbones +bbox ^^^^^^^^^^ -.. automodule:: mmrotate.models.backbones +.. automodule:: mmrotate.core.bbox :members: -dense_heads +patch ^^^^^^^^^^ -.. automodule:: mmrotate.models.dense_heads +.. automodule:: mmrotate.core.patch + :members: + +evaluation +^^^^^^^^^^ +.. automodule:: mmrotate.core.evaluation + :members: + +post_processing +^^^^^^^^^^^^^^^ +.. automodule:: mmrotate.core.post_processing + :members: + +mmrotate.datasets +-------------- + +datasets +^^^^^^^^^^ +.. automodule:: mmrotate.datasets + :members: + +pipelines +^^^^^^^^^^ +.. automodule:: mmrotate.datasets.pipelines :members: +mmrotate.models +-------------- + detectors ^^^^^^^^^^ .. automodule:: mmrotate.models.detectors :members: -losses +backbones ^^^^^^^^^^ -.. automodule:: mmrotate.models.losses +.. automodule:: mmrotate.models.backbones + :members: + +necks +^^^^^^^^^^^^ +.. automodule:: mmrotate.models.necks + :members: + +dense_heads +^^^^^^^^^^^^ +.. automodule:: mmrotate.models.dense_heads :members: roi_heads @@ -31,12 +72,17 @@ roi_heads .. automodule:: mmrotate.models.roi_heads :members: -mmrotate.core ------------------------------- -.. automodule:: mmrotate.core +losses +^^^^^^^^^^ +.. automodule:: mmrotate.models.losses :members: -mmrotate.datasets ------------------------------- -.. automodule:: mmrotate.datasets +utils +^^^^^^^^^^ +.. automodule:: mmrotate.models.utils + :members: + +mmrotate.utils +-------------- +.. automodule:: mmrotate.utils :members: diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index dc6b8c3ea..69fb54a50 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -1,4 +1,4 @@ -## Test a model +## 测试一个模型 - 单个 GPU - 单个节点多个 GPU @@ -20,7 +20,7 @@ python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments] --la 例子: -在 DOTA-1.0 数据集推理 RotatedRetinaNet,可以生成压缩文件用于在线[提交](https://captain-whu.github.io/DOTA/evaluation.html)。(首先请修改 [data_root](../../configs/_base_/datasets/dotav1.py)) +在 DOTA-1.0 数据集推理 RotatedRetinaNet 并生成压缩文件用于在线[提交](https://captain-whu.github.io/DOTA/evaluation.html) (首先请修改 [data_root](https://github.com/open-mmlab/mmrotate/tree/main/configs/_base_/datasets/dotav1.py))。 ```shell python ./tools/test.py \ configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py \ @@ -35,7 +35,7 @@ python ./tools/test.py \ --eval-options submission_dir=work_dirs/Task1_results ``` -您可以修改 [data_root](../../configs/_base_/datasets/dotav1.py) 中测试集的路径为验证集或训练集路径用于离线的验证。 +您可以修改 [data_root](https://github.com/open-mmlab/mmrotate/tree/main/configs/_base_/datasets/dotav1.py) 中测试集的路径为验证集或训练集路径用于离线的验证。 ```shell python ./tools/test.py \ configs/rotated_retinanet/rotated_retinanet_obb_r50_fpn_1x_dota_le90.py \ diff --git a/docs/zh_cn/install.md b/docs/zh_cn/install.md index ce4bb75e0..e1ae9493d 100644 --- a/docs/zh_cn/install.md +++ b/docs/zh_cn/install.md @@ -110,7 +110,7 @@ MIM 能够自动地安装 OpenMMLab 的项目以及对应的依赖包。 3. 安装 MMRotate. - 你可以直接通过如下命令从 pip 安装使用 mmrotate: + 你可以直接通过如下命令从 pip 安装使用 MMRotate: ```shell pip install mmrotate diff --git a/docs/zh_cn/model_zoo.md b/docs/zh_cn/model_zoo.md index 314918cf7..865e6855b 100644 --- a/docs/zh_cn/model_zoo.md +++ b/docs/zh_cn/model_zoo.md @@ -1,21 +1,21 @@ ## 基准和模型库 -- [Rotated RetinaNet-OBB/HBB](../../configs/rotated_retinanet/README.md) (ICCV'2017) -- [Rotated FasterRCNN-OBB](../../configs/rotated_faster_rcnn/README.md) (TPAMI'2017) -- [Rotated RepPoints-OBB](../../configs/rotated_reppoints/README.md) (ICCV'2019) -- [RoI Transformer](../../configs/roi_trans/README.md) (CVPR'2019) -- [Gliding Vertex](../../configs/gliding_vertex/README.md) (TPAMI'2020) -- [CSL](../../configs/csl/README.md) (ECCV'2020) -- [R3Det](../../configs/r3det/README.md) (AAAI'2021) -- [S2A-Net](../../configs/s2anet/README.md) (TGRS'2021) -- [ReDet](../../configs/redet/README.md) (CVPR'2021) -- [Beyond Bounding-Box](../../configs/cfa/README.md) (CVPR'2021) -- [Oriented R-CNN](../../configs/oriented_rcnn/README.md) (ICCV'2021) -- [GWD](../../configs/gwd/README.md) (ICML'2021) -- [KLD](../../configs/kld/README.md) (NeurIPS'2021) +- [Rotated RetinaNet-OBB/HBB](https://github.com/open-mmlab/mmrotate/tree/main/configs/rotated_retinanet/README.md) (ICCV'2017) +- [Rotated FasterRCNN-OBB](https://github.com/open-mmlab/mmrotate/tree/main/configs/rotated_faster_rcnn/README.md) (TPAMI'2017) +- [Rotated RepPoints-OBB](https://github.com/open-mmlab/mmrotate/tree/main/configs/rotated_reppoints/README.md) (ICCV'2019) +- [RoI Transformer](https://github.com/open-mmlab/mmrotate/tree/main/configs/roi_trans/README.md) (CVPR'2019) +- [Gliding Vertex](https://github.com/open-mmlab/mmrotate/tree/main/configs/gliding_vertex/README.md) (TPAMI'2020) +- [CSL](https://github.com/open-mmlab/mmrotate/tree/main/configs/csl/README.md) (ECCV'2020) +- [R3Det](https://github.com/open-mmlab/mmrotate/tree/main/configs/r3det/README.md) (AAAI'2021) +- [S2A-Net](https://github.com/open-mmlab/mmrotate/tree/main/configs/s2anet/README.md) (TGRS'2021) +- [ReDet](https://github.com/open-mmlab/mmrotate/tree/main/configs/redet/README.md) (CVPR'2021) +- [Beyond Bounding-Box](https://github.com/open-mmlab/mmrotate/tree/main/configs/cfa/README.md) (CVPR'2021) +- [Oriented R-CNN](https://github.com/open-mmlab/mmrotate/tree/main/configs/oriented_rcnn/README.md) (ICCV'2021) +- [GWD](https://github.com/open-mmlab/mmrotate/tree/main/configs/gwd/README.md) (ICML'2021) +- [KLD](https://github.com/open-mmlab/mmrotate/tree/main/configs/kld/README.md) (NeurIPS'2021) - [SASM](configs/sasm_reppoints/README.md) (AAAI'2022) -- [KFIoU](../../configs/kfiou/README.md) (arXiv) -- [G-Rep](../../configs/g_reppoints/README.md) (stay tuned) +- [KFIoU](https://github.com/open-mmlab/mmrotate/tree/main/configs/kfiou/README.md) (arXiv) +- [G-Rep](https://github.com/open-mmlab/mmrotate/tree/main/configs/g_reppoints/README.md) (stay tuned) ### DOTA v1.0 数据集上的结果 diff --git a/mmrotate/core/bbox/assigners/convex_assigner.py b/mmrotate/core/bbox/assigners/convex_assigner.py index 621b2d8eb..65053ef6f 100644 --- a/mmrotate/core/bbox/assigners/convex_assigner.py +++ b/mmrotate/core/bbox/assigners/convex_assigner.py @@ -14,6 +14,7 @@ class ConvexAssigner(BaseAssigner): - 0: negative sample, no assigned gt - positive integer: positive sample, index (1-based) of assigned gt + Args: scale (float): IoU threshold for positive bboxes. pos_num (float): find the nearest pos_num points to gt center in this @@ -29,6 +30,7 @@ def get_horizontal_bboxes(self, gt_rbboxes): Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). + Returns: gt_rect_bboxes (torch.Tensor): The horizontal bboxes, shape (k, 4). """ @@ -54,6 +56,7 @@ def assign(self, """Assign gt to bboxes. The assignment is done in following steps + 1. compute iou between all bbox (bbox of all pyramid levels) and gt 2. compute center distance between all bbox and gt 3. on each pyramid level, for each gt, select k bbox whose center @@ -64,12 +67,14 @@ def assign(self, 5. select these candidates whose iou are greater than or equal to the threshold as positive 6. limit the positive sample's center in gt + Args: points (torch.Tensor): Points to be assigned, shape(n, 18). gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). gt_rbboxes_ignore (Tensor, optional): Ground truth polygons that are labelled as `ignored`, e.g., crowd boxes in COCO. gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ). + Returns: :obj:`AssignResult`: The assign result. """ diff --git a/mmrotate/core/bbox/assigners/max_convex_iou_assigner.py b/mmrotate/core/bbox/assigners/max_convex_iou_assigner.py index db41a09fd..ebd8605dd 100644 --- a/mmrotate/core/bbox/assigners/max_convex_iou_assigner.py +++ b/mmrotate/core/bbox/assigners/max_convex_iou_assigner.py @@ -15,6 +15,7 @@ class MaxConvexIoUAssigner(BaseAssigner): - -1: negative sample, no assigned gt - semi-positive integer: positive sample, index (0-based) of assigned gt + Args: pos_iou_thr (float): IoU threshold for positive bboxes. neg_iou_thr (float or tuple): IoU threshold for negative bboxes. @@ -61,6 +62,7 @@ def assign( """Assign gt to bboxes. The assignment is done in following steps + 1. compute iou between all bbox (bbox of all pyramid levels) and gt 2. compute center distance between all bbox and gt 3. on each pyramid level, for each gt, select k bbox whose center @@ -71,6 +73,7 @@ def assign( 5. select these candidates whose iou are greater than or equal to the threshold as positive 6. limit the positive sample's center in gt + Args: points (torch.Tensor): Points to be assigned, shape(n, 18). gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). @@ -79,6 +82,7 @@ def assign( gt_rbboxes_ignore (Tensor, optional): Ground truth polygons that are labelled as `ignored`, e.g., crowd boxes in COCO. gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ). + Returns: :obj:`AssignResult`: The assign result. """ @@ -121,10 +125,12 @@ def assign_wrt_overlaps(self, overlaps, gt_labels=None): """Assign w.r.t. the overlaps of bboxes with gts. + Args: overlaps (torch.Tensor): Overlaps between k gt_bboxes and n bboxes, shape(k, n). gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ). + Returns: :obj:`AssignResult`: The assign result. """ @@ -196,13 +202,15 @@ def assign_wrt_overlaps(self, overlaps, gt_labels=None): num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels) def convex_overlaps(self, gt_rbboxes, points): - """ Compute overlaps between polygons and points + """Compute overlaps between polygons and points. + Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). points (torch.Tensor): Points to be assigned, shape(n, 18). + Returns: - overlaps (torch.Tensor): Overlaps between k gt_bboxes and n bboxes, - shape(k, n). + overlaps (torch.Tensor): Overlaps between k gt_bboxes and n \ + bboxes, shape(k, n). """ overlaps = convex_iou(points, gt_rbboxes) overlaps = overlaps.transpose(1, 0) diff --git a/mmrotate/core/bbox/assigners/sas_assigner.py b/mmrotate/core/bbox/assigners/sas_assigner.py index 5dcaaf798..f491f398b 100644 --- a/mmrotate/core/bbox/assigners/sas_assigner.py +++ b/mmrotate/core/bbox/assigners/sas_assigner.py @@ -8,10 +8,12 @@ def convex_overlaps(gt_rbboxes, points): - """ Compute overlaps between polygons and points + """Compute overlaps between polygons and points. + Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). points (torch.Tensor): Points to be assigned, shape(n, 18). + Returns: overlaps (torch.Tensor): Overlaps between k gt_bboxes and n bboxes, shape(k, n). @@ -23,10 +25,11 @@ def convex_overlaps(gt_rbboxes, points): def get_horizontal_bboxes(gt_rbboxes): - """get_horizontal_bboxes from polygons. + """Get horizontal bboxes from polygons. Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). + Returns: gt_rect_bboxes (torch.Tensor): The horizontal bboxes, shape (k, 4). """ @@ -43,9 +46,11 @@ def get_horizontal_bboxes(gt_rbboxes): def AspectRatio(gt_rbboxes): - """compute the aspect ratio of all gts + """Compute the aspect ratio of all gts. + Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). + Returns: ratios (torch.Tensor): The aspect ratio of gt_rbboxes, shape (k, 1). """ @@ -71,6 +76,7 @@ class SASAssigner(BaseAssigner): - 0: negative sample, no assigned gt - positive integer: positive sample, index (1-based) of assigned gt + Args: scale (float): IoU threshold for positive bboxes. pos_num (float): find the nearest pos_num points to gt center in this @@ -101,7 +107,6 @@ def assign(self, the threshold as positive 6. limit the positive sample's center in gt - Args: bboxes (torch.Tensor): Bounding boxes to be assigned, shape(n, 4). num_level_bboxes (List): num of bboxes in each level diff --git a/mmrotate/core/bbox/coder/delta_midpointoffset_rbbox_coder.py b/mmrotate/core/bbox/coder/delta_midpointoffset_rbbox_coder.py index 62ff2fa61..c6831a8ab 100644 --- a/mmrotate/core/bbox/coder/delta_midpointoffset_rbbox_coder.py +++ b/mmrotate/core/bbox/coder/delta_midpointoffset_rbbox_coder.py @@ -11,11 +11,10 @@ @ROTATED_BBOX_CODERS.register_module() class MidpointOffsetCoder(BaseBBoxCoder): - """Mid point offset coder. + """Mid point offset coder. This coder encodes bbox (x1, y1, x2, y2) into \ + delta (dx, dy, dw, dh, da, db) and decodes delta (dx, dy, dw, dh, da, db) \ + back to original bbox (x1, y1, x2, y2). - This coder encodes bbox (x1, y1, x2, y2) into delta - (dx, dy, dw, dh, da, db) and decodes delta (dx, dy, dw, dh, da, db) - back to original bbox (x1, y1, x2, y2). Args: target_means (Sequence[float]): Denormalizing means of target for delta coordinates @@ -58,12 +57,13 @@ def decode(self, max_shape=None, wh_ratio_clip=16 / 1000): """Apply transformation `pred_bboxes` to `bboxes`. + Args: bboxes (torch.Tensor): Basic boxes. Shape (B, N, 4) or (N, 4) pred_bboxes (torch.Tensor): Encoded offsets with respect to each roi. Has shape (B, N, 5) or (N, 5). - Note N = num_anchors * W * H - when rois is a grid of anchors.Offset encoding follows [1]_. + Note N = num_anchors * W * H when rois is a grid of anchors. + max_shape (Sequence[int] or torch.Tensor or Sequence[ Sequence[int]],optional): Maximum bounds for boxes, specifies (H, W, C) or (H, W). If bboxes shape is (B, N, 6), then @@ -90,12 +90,12 @@ def bbox2delta(proposals, means=(0., 0., 0., 0., 0., 0.), stds=(1., 1., 1., 1., 1., 1.), version='oc'): - """Compute deltas of proposals w.r.t. + """Compute deltas of proposals w.r.t. gt. - gt. We usually compute the deltas of x, y, w, h, a, b of proposals w.r.t ground - truth bboxes to get regression target. - This is the inverse function of :func:`delta2bbox`. + truth bboxes to get regression target. This is the inverse function of + :func:`delta2bbox`. + Args: proposals (torch.Tensor): Boxes to be transformed, shape (N, ..., 4) gt (torch.Tensor): Gt bboxes to be used as base, shape (N, ..., 5) @@ -157,15 +157,18 @@ def delta2bbox(rois, wh_ratio_clip=16 / 1000, version='oc'): """Apply deltas to shift/scale base boxes. - Typically the rois are anchor or proposed bounding boxes and the deltas are - network outputs used to shift/scale those boxes. - This is the inverse function of :func:`bbox2delta`. + + Typically the rois are anchor or proposed bounding boxes and the deltas + are network outputs used to shift/scale those boxes. This is the inverse + function of :func:`bbox2delta`. + + Args: rois (torch.Tensor): Boxes to be transformed. Has shape (N, 4). deltas (torch.Tensor): Encoded offsets relative to each roi. Has shape (N, num_classes * 4) or (N, 4). Note N = num_base_anchors * W * H, when rois is a grid of - anchors. Offset encoding follows [1]_. + anchors. means (Sequence[float]): Denormalizing means for delta coordinates. Default (0., 0., 0., 0., 0., 0.). stds (Sequence[float]): Denormalizing standard deviation for delta diff --git a/mmrotate/core/bbox/coder/delta_xywha_hbbox_coder.py b/mmrotate/core/bbox/coder/delta_xywha_hbbox_coder.py index 63a551056..0d99a4f22 100644 --- a/mmrotate/core/bbox/coder/delta_xywha_hbbox_coder.py +++ b/mmrotate/core/bbox/coder/delta_xywha_hbbox_coder.py @@ -13,8 +13,8 @@ class DeltaXYWHAHBBoxCoder(BaseBBoxCoder): """Delta XYWHA HBBox coder. this coder encodes bbox (x1, y1, x2, y2) into delta (dx, dy, dw, dh, da) - and decodes delta (dx, dy, dw, dh, da) back to original bbox - (cx, cy, w, h, a). + and decodes delta (dx, dy, dw, dh, da) back to original bbox + (cx, cy, w, h, a). Args: target_means (Sequence[float]): Denormalizing means of target for @@ -30,8 +30,8 @@ class DeltaXYWHAHBBoxCoder(BaseBBoxCoder): add_ctr_clamp (bool): Whether to add center clamp, when added, the predicted box is clamped is its center is too far away from the original anchor's center. Only used by YOLOF. Default False. - ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. - Default 32. + ctr_clamp (int): the maximum pixel shift to clamp. Only used by + YOLOF. Default 32. """ def __init__(self, @@ -87,7 +87,7 @@ def decode(self, pred_bboxes (torch.Tensor): Encoded offsets with respect to each roi. Has shape (B, N, num_classes * 5) or (B, N, 5) or (N, num_classes * 5) or (N, 5). Note N = num_anchors * W * H - when rois is a grid of anchors.Offset encoding follows [1]_. + when rois is a grid of anchors. max_shape (Sequence[int] or torch.Tensor or Sequence[ Sequence[int]],optional): Maximum bounds for boxes, specifies (H, W, C) or (H, W). If bboxes shape is (B, N, 5), then @@ -190,8 +190,8 @@ def delta2bbox(rois, angle_range='oc', norm_factor=None, edge_swap=False): - """Apply deltas to shift/scale base boxes. Typically the rois are anchor or - proposed bounding boxes and the deltas are network outputs used to + """Apply deltas to shift/scale base boxes. Typically the rois are anchor + or proposed bounding boxes and the deltas are network outputs used to shift/scale those boxes. This is the inverse function of :func:`bbox2delta`. @@ -200,7 +200,7 @@ def delta2bbox(rois, deltas (torch.Tensor): Encoded offsets relative to each roi. Has shape (N, num_classes * 5) or (N, 5). Note N = num_base_anchors * W * H, when rois is a grid of - anchors. Offset encoding follows [1]_. + anchors. means (Sequence[float]): Denormalizing means for delta coordinates. Default (0., 0., 0., 0., 0.). stds (Sequence[float]): Denormalizing standard deviation for delta @@ -210,8 +210,8 @@ def delta2bbox(rois, add_ctr_clamp (bool): Whether to add center clamp, when added, the predicted box is clamped is its center is too far away from the original anchor's center. Only used by YOLOF. Default False. - ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. - Default 32. + ctr_clamp (int): the maximum pixel shift to clamp. Only used by + YOLOF. Default 32. angle_range (str, optional): Angle representations. Defaults to 'oc'. norm_factor (None|float, optional): Regularization factor of angle. edge_swap (bool, optional): Whether swap the edge if w < h. diff --git a/mmrotate/core/bbox/coder/delta_xywha_rbbox_coder.py b/mmrotate/core/bbox/coder/delta_xywha_rbbox_coder.py index 6c38acdc3..e14ced676 100644 --- a/mmrotate/core/bbox/coder/delta_xywha_rbbox_coder.py +++ b/mmrotate/core/bbox/coder/delta_xywha_rbbox_coder.py @@ -29,8 +29,8 @@ class DeltaXYWHAOBBoxCoder(BaseBBoxCoder): add_ctr_clamp (bool): Whether to add center clamp, when added, the predicted box is clamped is its center is too far away from the original anchor's center. Only used by YOLOF. Default False. - ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. - Default 32. + ctr_clamp (int): the maximum pixel shift to clamp. Only used by + YOLOF. Default 32. """ def __init__(self, @@ -83,14 +83,14 @@ def decode(self, Args: bboxes (torch.Tensor): Basic boxes. Shape (B, N, 5) or (N, 5) - pred_bboxes (torch.Tensor): Encoded offsets with respect to each - roi. Has shape (B, N, num_classes * 5) or (B, N, 5) or - (N, num_classes * 5) or (N, 5). Note N = num_anchors * W * H - when rois is a grid of anchors.Offset encoding follows [1]_. - max_shape (Sequence[int] or torch.Tensor or Sequence[ - Sequence[int]],optional): Maximum bounds for boxes, specifies - (H, W, C) or (H, W). If bboxes shape is (B, N, 5), then - the max_shape should be a Sequence[Sequence[int]] + pred_bboxes (torch.Tensor): Encoded offsets with respect to each \ + roi. Has shape (B, N, num_classes * 5) or (B, N, 5) or \ + (N, num_classes * 5) or (N, 5). Note N = num_anchors * W * H \ + when rois is a grid of anchors. + max_shape (Sequence[int] or torch.Tensor or Sequence[ \ + Sequence[int]],optional): Maximum bounds for boxes, specifies \ + (H, W, C) or (H, W). If bboxes shape is (B, N, 5), then \ + the max_shape should be a Sequence[Sequence[int]] \ and the length of max_shape should also be B. wh_ratio_clip (float, optional): The allowed ratio between width and height. @@ -189,8 +189,8 @@ def delta2bbox(rois, norm_factor=None, edge_swap=False, proj_xy=False): - """Apply deltas to shift/scale base boxes. Typically the rois are anchor or - proposed bounding boxes and the deltas are network outputs used to + """Apply deltas to shift/scale base boxes. Typically the rois are anchor + or proposed bounding boxes and the deltas are network outputs used to shift/scale those boxes. This is the inverse function of :func:`bbox2delta`. @@ -199,7 +199,7 @@ def delta2bbox(rois, deltas (torch.Tensor): Encoded offsets relative to each roi. Has shape (N, num_classes * 5) or (N, 5). Note N = num_base_anchors * W * H, when rois is a grid of - anchors. Offset encoding follows [1]_. + anchors. means (Sequence[float]): Denormalizing means for delta coordinates. Default (0., 0., 0., 0., 0.). stds (Sequence[float]): Denormalizing standard deviation for delta @@ -214,8 +214,8 @@ def delta2bbox(rois, add_ctr_clamp (bool): Whether to add center clamp, when added, the predicted box is clamped is its center is too far away from the original anchor's center. Only used by YOLOF. Default False. - ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF. - Default 32. + ctr_clamp (int): the maximum pixel shift to clamp. Only used by + YOLOF. Default 32. angle_range (str, optional): Angle representations. Defaults to 'oc'. norm_factor (None|float, optional): Regularization factor of angle. edge_swap (bool, optional): Whether swap the edge if w < h. diff --git a/mmrotate/core/bbox/coder/gliding_vertex_coder.py b/mmrotate/core/bbox/coder/gliding_vertex_coder.py index 505bec6b9..377ffa24b 100644 --- a/mmrotate/core/bbox/coder/gliding_vertex_coder.py +++ b/mmrotate/core/bbox/coder/gliding_vertex_coder.py @@ -13,6 +13,7 @@ class GVFixCoder(BaseBBoxCoder): this coder encodes bbox (cx, cy, w, h, a) into delta (dt, dr, dd, dl) and decodes delta (dt, dr, dd, dl) back to original bbox (cx, cy, w, h, a). + Args: angle_range (str, optional): Angle representations. Defaults to 'oc'. """ @@ -27,6 +28,7 @@ def encode(self, rbboxes): Args: rbboxes (torch.Tensor): Source boxes, e.g., object proposals. + Returns: torch.Tensor: Box transformation deltas """ @@ -66,12 +68,14 @@ def encode(self, rbboxes): def decode(self, hbboxes, fix_deltas): """Apply transformation `fix_deltas` to `boxes`. + Args: hbboxes (torch.Tensor): Basic boxes. Shape (B, N, 4) or (N, 4) - fix_deltas (torch.Tensor): Encoded offsets with respect to each - roi. Has shape (B, N, num_classes * 4) or (B, N, 4) or - (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H - when rois is a grid of anchors.Offset encoding follows [1]_. + fix_deltas (torch.Tensor): Encoded offsets with respect to each \ + roi. Has shape (B, N, num_classes * 4) or (B, N, 4) or \ + (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H \ + when rois is a grid of anchors. + Returns: torch.Tensor: Decoded boxes. """ @@ -100,6 +104,7 @@ class GVRatioCoder(BaseBBoxCoder): """Gliding vertex ratio coder. this coder encodes bbox (cx, cy, w, h, a) into delta (ratios). + Args: angle_range (str, optional): Angle representations. Defaults to 'oc'. """ @@ -113,6 +118,7 @@ def encode(self, rbboxes): Args: rbboxes (torch.Tensor): Source boxes, e.g., object proposals. + Returns: torch.Tensor: Box transformation deltas """ @@ -145,6 +151,7 @@ def decode(self, bboxes, bboxes_pred): Args: bboxes (torch.Tensor) bboxes_pred (torch.Tensor) + Returns: NotImplementedError """ diff --git a/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py b/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py index 63bffaf80..756eeb117 100644 --- a/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py +++ b/mmrotate/core/bbox/iou_calculators/rotate_iou2d_calculator.py @@ -15,6 +15,7 @@ def __call__(self, is_aligned=False, version='oc'): """Calculate IoU between 2D bboxes. + Args: bboxes1 (torch.Tensor): bboxes have shape (m, 5) in format, or shape (m, 6) in @@ -62,6 +63,7 @@ def rbbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): Default "iou". is_aligned (bool, optional): If True, then m and n must be equal. Default False. + Returns: Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,) """ diff --git a/mmrotate/core/bbox/transforms.py b/mmrotate/core/bbox/transforms.py index 21bdfd3dc..01d51651e 100644 --- a/mmrotate/core/bbox/transforms.py +++ b/mmrotate/core/bbox/transforms.py @@ -270,6 +270,7 @@ def poly2obb_le135(polys): Args: polys (torch.Tensor): [x0,y0,x1,y1,x2,y2,x3,y3] + Returns: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] """ @@ -302,6 +303,7 @@ def poly2obb_le90(polys): Args: polys (torch.Tensor): [x0,y0,x1,y1,x2,y2,x3,y3] + Returns: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] """ @@ -334,6 +336,7 @@ def poly2obb_np_oc(poly): Args: polys (ndarray): [x0,y0,x1,y1,x2,y2,x3,y3] + Returns: obbs (ndarray): [x_ctr,y_ctr,w,h,angle] """ @@ -359,6 +362,7 @@ def poly2obb_np_le135(poly): Args: polys (ndarray): [x0,y0,x1,y1,x2,y2,x3,y3] + Returns: obbs (ndarray): [x_ctr,y_ctr,w,h,angle] """ @@ -391,6 +395,7 @@ def poly2obb_np_le90(poly): Args: polys (ndarray): [x0,y0,x1,y1,x2,y2,x3,y3] + Returns: obbs (ndarray): [x_ctr,y_ctr,w,h,angle] """ @@ -418,6 +423,7 @@ def obb2poly_oc(rboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: polys (torch.Tensor): [x0,y0,x1,y1,x2,y2,x3,y3] """ @@ -442,6 +448,7 @@ def obb2poly_le135(rboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: polys (torch.Tensor): [x0,y0,x1,y1,x2,y2,x3,y3] """ @@ -469,6 +476,7 @@ def obb2poly_le90(rboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: polys (torch.Tensor): [x0,y0,x1,y1,x2,y2,x3,y3] """ @@ -496,6 +504,7 @@ def obb2hbb_oc(rbboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: hbbs (torch.Tensor): [x_ctr,y_ctr,w,h,pi/2] """ @@ -518,6 +527,7 @@ def obb2hbb_le135(rotatex_boxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: hbbs (torch.Tensor): [x_ctr,y_ctr,w,h,-pi/2] """ @@ -545,6 +555,7 @@ def obb2hbb_le90(obboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: hbbs (torch.Tensor): [x_ctr,y_ctr,w,h,-pi/2] """ @@ -570,6 +581,7 @@ def hbb2obb_oc(hbboxes): Args: hbbs (torch.Tensor): [x_lt,y_lt,x_rb,y_rb] + Returns: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] """ @@ -587,6 +599,7 @@ def hbb2obb_le135(hbboxes): Args: hbbs (torch.Tensor): [x_lt,y_lt,x_rb,y_rb] + Returns: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] """ @@ -606,6 +619,7 @@ def hbb2obb_le90(hbboxes): Args: hbbs (torch.Tensor): [x_lt,y_lt,x_rb,y_rb] + Returns: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] """ @@ -625,6 +639,7 @@ def obb2xyxy_oc(rbboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: hbbs (torch.Tensor): [x_lt,y_lt,x_rb,y_rb] """ @@ -652,6 +667,7 @@ def obb2xyxy_le135(rotatex_boxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: hbbs (torch.Tensor): [x_lt,y_lt,x_rb,y_rb] """ @@ -671,6 +687,7 @@ def obb2xyxy_le90(obboxes): Args: obbs (torch.Tensor): [x_ctr,y_ctr,w,h,angle] + Returns: hbbs (torch.Tensor): [x_lt,y_lt,x_rb,y_rb] """ @@ -690,6 +707,7 @@ def obb2poly_np_oc(rbboxes): Args: obbs (ndarray): [x_ctr,y_ctr,w,h,angle,score] + Returns: polys (ndarray): [x0,y0,x1,y1,x2,y2,x3,y3,score] """ @@ -717,6 +735,7 @@ def obb2poly_np_le135(rrects): Args: obbs (ndarray): [x_ctr,y_ctr,w,h,angle,score] + Returns: polys (ndarray): [x0,y0,x1,y1,x2,y2,x3,y3,score] """ @@ -743,6 +762,7 @@ def obb2poly_np_le90(obboxes): Args: obbs (ndarray): [x_ctr,y_ctr,w,h,angle,score] + Returns: polys (ndarray): [x0,y0,x1,y1,x2,y2,x3,y3,score] """ @@ -769,6 +789,7 @@ def cal_line_length(point1, point2): Args: point1 (List): [x,y] point2 (List): [x,y] + Returns: length (float) """ @@ -782,6 +803,7 @@ def get_best_begin_point_single(coordinate): Args: coordinate (List): [x1, y1, x2, y2, x3, y3, x4, y4, score] + Returns: reorder coordinate (List): [x1, y1, x2, y2, x3, y3, x4, y4, score] """ @@ -816,6 +838,7 @@ def get_best_begin_point(coordinates): Args: coordinate (ndarray): shape(n, 9). + Returns: reorder coordinate (ndarray): shape(n, 9). """ @@ -830,6 +853,7 @@ def norm_angle(angle, angle_range): Args: angle (ndarray): shape(n, ). angle_range (Str): angle representations. + Returns: angle (ndarray): shape(n, ). """ @@ -849,6 +873,7 @@ def dist_torch(point1, point2): Args: point1 (torch.Tensor): shape(n, 2). point2 (torch.Tensor): shape(n, 2). + Returns: distance (torch.Tensor): shape(n, 1). """ diff --git a/mmrotate/core/bbox/utils/gmm.py b/mmrotate/core/bbox/utils/gmm.py index 07454ed0a..20ebf9b32 100644 --- a/mmrotate/core/bbox/utils/gmm.py +++ b/mmrotate/core/bbox/utils/gmm.py @@ -191,8 +191,8 @@ def estimate_log_prob(self, x): x (torch.Tensor): (T, n, d) or (T, n, 1, d) Returns: - torch.Tensor: log-likelihood probability that samples belong to the - k-th Gaussian with dimensions (T, n, k, 1). + torch.Tensor: log-likelihood probability that samples belong to \ + the k-th Gaussian with dimensions (T, n, k, 1). """ x = self.check_size(x) @@ -225,9 +225,10 @@ def log_resp_step(self, x): Returns: tuple: - log_prob_norm (torch.Tensor): the mean of the mean of the + + log_prob_norm (torch.Tensor): the mean of the mean of the \ logarithms of the probabilities. - log_resp (torch.Tensor): log-responses that indicate the + log_resp (torch.Tensor): log-responses that indicate the \ posterior belief. """ x = self.check_size(x) @@ -250,6 +251,7 @@ def EM_step(self, x, log_resp): Returns: tuple: + pi (torch.Tensor): (T, k, 1) mu (torch.Tensor): (T, k, d) var (torch.Tensor): (T, k, d) or (T, k, d, d) diff --git a/mmrotate/core/evaluation/eval_map.py b/mmrotate/core/evaluation/eval_map.py index 8c2d05c09..583f85fdf 100644 --- a/mmrotate/core/evaluation/eval_map.py +++ b/mmrotate/core/evaluation/eval_map.py @@ -255,6 +255,7 @@ def print_map_summary(mean_ap, A table will be printed to show the gts/dets/recall/AP of each class and the mAP. + Args: mean_ap (float): Calculated from `eval_map()`. results (list[dict]): Calculated from `eval_map()`. diff --git a/mmrotate/core/post_processing/bbox_nms_rotated.py b/mmrotate/core/post_processing/bbox_nms_rotated.py index 99f59b9de..8dc9cb744 100644 --- a/mmrotate/core/post_processing/bbox_nms_rotated.py +++ b/mmrotate/core/post_processing/bbox_nms_rotated.py @@ -18,7 +18,7 @@ def multiclass_nms_rotated(multi_bboxes, contains scores of the background class, but this will be ignored. score_thr (float): bbox threshold, bboxes with scores lower than it will not be considered. - nms (float): NMS + nms (float): Config of NMS. max_num (int, optional): if there are more than max_num bboxes after NMS, only top max_num will be kept. Default to -1. score_factors (Tensor, optional): The factors multiplied to scores @@ -27,7 +27,7 @@ def multiclass_nms_rotated(multi_bboxes, bboxes. Default to False. Returns: - tuple: (dets, labels, indices (optional)), tensors of shape (k, 5), + tuple (dets, labels, indices (optional)): tensors of shape (k, 5), \ (k), and (k). Dets are boxes with scores. Labels are 0-based. """ num_classes = multi_scores.size(1) - 1 @@ -88,6 +88,23 @@ def multiclass_nms_rotated(multi_bboxes, def aug_multiclass_nms_rotated(merged_bboxes, merged_labels, score_thr, nms, max_num, classes): + """NMS for aug multi-class bboxes. + + Args: + multi_bboxes (torch.Tensor): shape (n, #class*5) or (n, 5) + multi_scores (torch.Tensor): shape (n, #class), where the last column + contains scores of the background class, but this will be ignored. + score_thr (float): bbox threshold, bboxes with scores lower than it + will not be considered. + nms (float): Config of NMS. + max_num (int, optional): if there are more than max_num bboxes after + NMS, only top max_num will be kept. Default to -1. + classes (int): number of classes. + + Returns: + tuple (dets, labels): tensors of shape (k, 5), and (k). Dets are boxes + with scores. Labels are 0-based. + """ bboxes, labels = [], [] for cls in range(classes): diff --git a/mmrotate/datasets/dota.py b/mmrotate/datasets/dota.py index 5b504051d..6f8912488 100644 --- a/mmrotate/datasets/dota.py +++ b/mmrotate/datasets/dota.py @@ -52,7 +52,7 @@ def __len__(self): def load_annotations(self, ann_folder): """ - Params: + Args: ann_folder: folder that contains DOTA v1 annotations txt files """ cls_map = {c: i @@ -210,7 +210,7 @@ def evaluate(self, def merge_det(self, results, nproc=4): """Merging patch bboxes into full image. - Params: + Args: results (list): Testing results of the dataset. nproc (int): number of process. Default: 4. """ @@ -252,7 +252,7 @@ def merge_det(self, results, nproc=4): def _results2submission(self, id_list, dets_list, out_folder=None): """Generate the submission of full images. - Params: + Args: id_list (list): Id of images. dets_list (list): Detection results of per class. out_folder (str, optional): Folder of submission. @@ -296,14 +296,16 @@ def format_results(self, results, submission_dir=None, nproc=4, **kwargs): Args: results (list): Testing results of the dataset. submission_dir (str, optional): The folder that contains submission - files. - If not specified, a temp folder will be created. Default: None. + files. If not specified, a temp folder will be created. + Default: None. nproc (int, optional): number of process. Returns: - tuple: (result_files, tmp_dir), result_files is a dict containing - the json filepaths, tmp_dir is the temporal directory created - for saving json files when submission_dir is not specified. + tuple: + + - result_files (dict): a dict containing the json filepaths + - tmp_dir (str): the temporal directory created for saving \ + json files when submission_dir is not specified. """ nproc = min(nproc, os.cpu_count()) assert isinstance(results, list), 'results must be a list' @@ -330,7 +332,7 @@ def format_results(self, results, submission_dir=None, nproc=4, **kwargs): def _merge_func(info, CLASSES, iou_thr): """Merging patch bboxes into full image. - Params: + Args: CLASSES (list): Label category. iou_thr (float): Threshold of IoU. """ diff --git a/mmrotate/models/backbones/re_resnet.py b/mmrotate/models/backbones/re_resnet.py index a81654baf..1ce13cb37 100644 --- a/mmrotate/models/backbones/re_resnet.py +++ b/mmrotate/models/backbones/re_resnet.py @@ -35,6 +35,7 @@ class BasicBlock(enn.EquivariantModule): Default: None norm_cfg (dict): dictionary to construct and config norm layer. Default: dict(type='BN') + init_cfg (dict or list[dict], optional): Initialization config dict. """ def __init__(self, diff --git a/mmrotate/models/dense_heads/csl_rotated_retina_head.py b/mmrotate/models/dense_heads/csl_rotated_retina_head.py index 2cc19fc88..0c2896c1c 100644 --- a/mmrotate/models/dense_heads/csl_rotated_retina_head.py +++ b/mmrotate/models/dense_heads/csl_rotated_retina_head.py @@ -78,12 +78,13 @@ def forward_single(self, x): x (torch.Tensor): Features of a single scale level. Returns: - tuple: - cls_score (torch.Tensor): Cls scores for a single scale level - the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a single - scale level, the channels number is num_anchors * 5. - angle_cls (torch.Tensor): Angle for a single scale level + tuple (torch.Tensor): + + - cls_score (torch.Tensor): Cls scores for a single scale \ + level the channels number is num_anchors * num_classes. + - bbox_pred (torch.Tensor): Box energies / deltas for a \ + single scale level, the channels number is num_anchors * 5. + - angle_cls (torch.Tensor): Angle for a single scale level \ the channels number is num_anchors * coding_len. """ cls_feat = x @@ -126,9 +127,12 @@ def loss_single(self, cls_score, bbox_pred, angle_cls, anchors, labels, positive anchors. Returns: - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. - loss_angle (torch.Tensor): angle cls. loss for each scale level. + tuple (torch.Tensor): + + - loss_cls (torch.Tensor): cls. loss for each scale level. + - loss_bbox (torch.Tensor): reg. loss for each scale level. + - loss_angle (torch.Tensor): angle cls. loss for each scale \ + level. """ # Classification loss labels = labels.reshape(-1) @@ -278,15 +282,19 @@ def _get_targets_single(self, set of anchors. Default: True. Returns: - tuple: - labels_list (list[Tensor]): Labels of each level - label_weights_list (list[Tensor]): Label weights of each level - bbox_targets_list (list[Tensor]): BBox targets of each level - bbox_weights_list (list[Tensor]): BBox weights of each level - angle_targets_list (list[Tensor]): Angle targets of each level - angle_weights_list (list[Tensor]): Angle weights of each level - num_total_pos (int): Number of positive samples in all images - num_total_neg (int): Number of negative samples in all images + tuple (list[Tensor]): + + - labels_list (list[Tensor]): Labels of each level + - label_weights_list (list[Tensor]): Label weights of each \ + level + - bbox_targets_list (list[Tensor]): BBox targets of each level + - bbox_weights_list (list[Tensor]): BBox weights of each level + - angle_targets_list (list[Tensor]): Angle targets of each \ + level + - angle_weights_list (list[Tensor]): Angle weights of each + level + - num_total_pos (int): Number of positive samples in all images + - num_total_neg (int): Number of negative samples in all images """ inside_flags = rotated_anchor_inside_flags( flat_anchors, valid_flags, img_meta['img_shape'][:2], diff --git a/mmrotate/models/dense_heads/kfiou_odm_refine_head.py b/mmrotate/models/dense_heads/kfiou_odm_refine_head.py index 3161f7247..5c8bbbe18 100644 --- a/mmrotate/models/dense_heads/kfiou_odm_refine_head.py +++ b/mmrotate/models/dense_heads/kfiou_odm_refine_head.py @@ -10,7 +10,11 @@ @ROTATED_HEADS.register_module() class KFIoUODMRefineHead(KFIoURRetinaHead): - """Rotational Anchor-based refine head. + """Rotated Anchor-based refine head for KFIoU. It's a part of the Oriented + Detection Module (ODM), which produces orientation-sensitive features for + classification and orientation-invariant features for localization. The + difference from `ODMRefineHead` is that its loss_bbox requires bbox_pred, + bbox_targets, pred_decode and targets_decode as inputs. Args: num_classes (int): Number of categories excluding the background @@ -109,11 +113,12 @@ def forward_single(self, x): x (torch.Tensor): Features of a single scale level. Returns: - tuple: - cls_score (torch.Tensor): Cls scores for a single scale level - the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a single - scale level, the channels number is num_anchors * 4. + tuple (torch.Tensor): + + - cls_score (torch.Tensor): Cls scores for a single scale \ + level the channels number is num_anchors * num_classes. + - bbox_pred (torch.Tensor): Box energies / deltas for a \ + single scale level, the channels number is num_anchors * 4. """ or_feat = self.or_conv(x) reg_feat = or_feat @@ -138,8 +143,9 @@ def get_anchors(self, featmap_sizes, img_metas, device='cuda'): Returns: tuple: - anchor_list (list[Tensor]): Anchors of each image - valid_flag_list (list[Tensor]): Valid flags of each image + + - anchor_list (list[Tensor]): Anchors of each image + - valid_flag_list (list[Tensor]): Valid flags of each image """ anchor_list = [[ bboxes_img_lvl.clone().detach() for bboxes_img_lvl in bboxes_img @@ -183,7 +189,7 @@ def get_bboxes(self, cfg=None, rescale=False, rois=None): - """Transform network output for a batch into labeled boxes.s. + """Transform network output for a batch into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for each scale level @@ -192,10 +198,9 @@ def get_bboxes(self, level with shape (N, num_anchors * 5, H, W) img_metas (list[dict]): size / scale info for each image cfg (mmcv.Config): test / postprocessing configuration - rois (list[list[Tensor]]): input rbboxes of each level of each - image. - rois output by former stages and are to be refined rescale (bool): if True, return boxes in original image space + rois (list[list[Tensor]]): input rbboxes of each level of each + image. rois output by former stages and are to be refined. Returns: list[tuple[Tensor, Tensor]]: each item in result_list is 2-tuple. diff --git a/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py b/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py index a1448b42c..adb2f593b 100644 --- a/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py +++ b/mmrotate/models/dense_heads/kfiou_rotate_retina_head.py @@ -5,24 +5,20 @@ @ROTATED_HEADS.register_module() class KFIoURRetinaHead(RotatedRetinaHead): - """Rotational Anchor-based refine head. + """Rotated Anchor-based head for KFIoU. The difference from `RRetinaHead` + is that its loss_bbox requires bbox_pred, bbox_targets, pred_decode and + targets_decode as inputs. Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - feat_channels (int): Number of hidden channels. Used in child classes. + stacked_convs (int, optional): Number of stacked convolutions. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. anchor_generator (dict): Config dict for anchor generator - bbox_coder (dict): Config of bounding box coder. - reg_decoded_bbox (bool): If true, the regression loss would be - applied on decoded bounding boxes. Default: False - background_label (int | None): Label ID of background, set as 0 for - RPN and num_classes for other heads. It will automatically set as - num_classes if None is given. - loss_cls (dict): Config of classification loss. - loss_bbox (dict): Config of localization loss. - train_cfg (dict): Training config of anchor head. - test_cfg (dict): Testing config of anchor head. init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 @@ -83,8 +79,10 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. + tuple (torch.Tensor): + + - loss_cls (torch.Tensor): cls. loss for each scale level. + - loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/kfiou_rotate_retina_refine_head.py b/mmrotate/models/dense_heads/kfiou_rotate_retina_refine_head.py index 2d8f4428e..0c5fa718e 100644 --- a/mmrotate/models/dense_heads/kfiou_rotate_retina_refine_head.py +++ b/mmrotate/models/dense_heads/kfiou_rotate_retina_refine_head.py @@ -8,24 +8,21 @@ @ROTATED_HEADS.register_module() class KFIoURRetinaRefineHead(KFIoURRetinaHead): - """Rotational Anchor-based refine head. + """Rotational Anchor-based refine head. The difference from + `RRetinaRefineHead` is that its loss_bbox requires bbox_pred, bbox_targets, + pred_decode and targets_decode as inputs. Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - feat_channels (int): Number of hidden channels. Used in child classes. + stacked_convs (int, optional): Number of stacked convolutions. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. anchor_generator (dict): Config dict for anchor generator bbox_coder (dict): Config of bounding box coder. - reg_decoded_bbox (bool): If true, the regression loss would be - applied on decoded bounding boxes. Default: False - background_label (int | None): Label ID of background, set as 0 for - RPN and num_classes for other heads. It will automatically set as - num_classes if None is given. - loss_cls (dict): Config of classification loss. - loss_bbox (dict): Config of localization loss. - train_cfg (dict): Training config of anchor head. - test_cfg (dict): Testing config of anchor head. init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 @@ -79,8 +76,8 @@ def refine_bboxes(self, cls_scores, bbox_preds, rois): image. rois output by former stages and are to be refined Returns: - list[list[Tensor]]: best or refined rbboxes of each level of each - image. + list[list[Tensor]]: best or refined rbboxes of each level of each \ + image. """ num_levels = len(cls_scores) assert num_levels == len(bbox_preds) @@ -117,9 +114,10 @@ def get_anchors(self, featmap_sizes, img_metas, device='cuda'): device (torch.device | str): Device for returned tensors Returns: - tuple: - anchor_list (list[Tensor]): Anchors of each image - valid_flag_list (list[Tensor]): Valid flags of each image + tuple (list[Tensor]): + + - anchor_list (list[Tensor]): Anchors of each image + - valid_flag_list (list[Tensor]): Valid flags of each image """ anchor_list = [[ bboxes_img_lvl.clone().detach() for bboxes_img_lvl in bboxes_img @@ -162,7 +160,7 @@ def get_bboxes(self, cfg=None, rescale=False, rois=None): - """Transform network output for a batch into labeled boxes.s. + """Transform network output for a batch into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for each scale level diff --git a/mmrotate/models/dense_heads/odm_refine_head.py b/mmrotate/models/dense_heads/odm_refine_head.py index 0190b9c02..9346f5cdf 100644 --- a/mmrotate/models/dense_heads/odm_refine_head.py +++ b/mmrotate/models/dense_heads/odm_refine_head.py @@ -10,24 +10,20 @@ @ROTATED_HEADS.register_module() class ODMRefineHead(RotatedRetinaHead): - """Rotational Anchor-based refine head. + """Rotated Anchor-based refine head. It's a part of the Oriented Detection + Module (ODM), which produces orientation-sensitive features for + classification and orientation-invariant features for localization. Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - feat_channels (int): Number of hidden channels. Used in child classes. + stacked_convs (int, optional): Number of stacked convolutions. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. anchor_generator (dict): Config dict for anchor generator - bbox_coder (dict): Config of bounding box coder. - reg_decoded_bbox (bool): If true, the regression loss would be - applied on decoded bounding boxes. Default: False - background_label (int | None): Label ID of background, set as 0 for - RPN and num_classes for other heads. It will automatically set as - num_classes if None is given. - loss_cls (dict): Config of classification loss. - loss_bbox (dict): Config of localization loss. - train_cfg (dict): Training config of anchor head. - test_cfg (dict): Testing config of anchor head. init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 @@ -109,11 +105,12 @@ def forward_single(self, x): x (torch.Tensor): Features of a single scale level. Returns: - tuple: - cls_score (torch.Tensor): Cls scores for a single scale level - the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a single - scale level, the channels number is num_anchors * 4. + tuple (torch.Tensor): + + - cls_score (torch.Tensor): Cls scores for a single scale \ + level the channels number is num_anchors * num_classes. + - bbox_pred (torch.Tensor): Box energies / deltas for a \ + single scale level, the channels number is num_anchors * 4. """ or_feat = self.or_conv(x) reg_feat = or_feat @@ -137,9 +134,10 @@ def get_anchors(self, featmap_sizes, img_metas, device='cuda'): device (torch.device | str): Device for returned tensors Returns: - tuple: - anchor_list (list[Tensor]): Anchors of each image - valid_flag_list (list[Tensor]): Valid flags of each image + tuple (list[Tensor]): + + - anchor_list (list[Tensor]): Anchors of each image + - valid_flag_list (list[Tensor]): Valid flags of each image """ anchor_list = [[ bboxes_img_lvl.clone().detach() for bboxes_img_lvl in bboxes_img @@ -182,7 +180,7 @@ def get_bboxes(self, cfg=None, rescale=False, rois=None): - """Transform network output for a batch into labeled boxes.s. + """Transform network output for a batch into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for each scale level diff --git a/mmrotate/models/dense_heads/oriented_rpn_head.py b/mmrotate/models/dense_heads/oriented_rpn_head.py index 823a8ed6c..953d333af 100644 --- a/mmrotate/models/dense_heads/oriented_rpn_head.py +++ b/mmrotate/models/dense_heads/oriented_rpn_head.py @@ -13,7 +13,7 @@ @ROTATED_HEADS.register_module() class OrientedRPNHead(RotatedRPNHead): - """Oriented RPN head for rotated bboxes.""" + """Oriented RPN head for Oriented R-CNN.""" def _init_layers(self): """Initialize layers of the head.""" @@ -54,13 +54,15 @@ def _get_targets_single(self, set of anchors. Returns: - tuple: - labels_list (list[Tensor]): Labels of each level - label_weights_list (list[Tensor]): Label weights of each level - bbox_targets_list (list[Tensor]): BBox targets of each level - bbox_weights_list (list[Tensor]): BBox weights of each level - num_total_pos (int): Number of positive samples in all images - num_total_neg (int): Number of negative samples in all images + tuple (list[Tensor]): + + - labels_list (list[Tensor]): Labels of each level + - label_weights_list (list[Tensor]): Label weights of each \ + level + - bbox_targets_list (list[Tensor]): BBox targets of each level + - bbox_weights_list (list[Tensor]): BBox weights of each level + - num_total_pos (int): Number of positive samples in all images + - num_total_neg (int): Number of negative samples in all images """ inside_flags = rotated_anchor_inside_flags( flat_anchors, valid_flags, img_meta['img_shape'][:2], @@ -155,8 +157,10 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. + tuple (torch.Tensor): + + - loss_cls (torch.Tensor): cls. loss for each scale level. + - loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/rotated_anchor_head.py b/mmrotate/models/dense_heads/rotated_anchor_head.py index 0b7f46bd0..6aed0623c 100644 --- a/mmrotate/models/dense_heads/rotated_anchor_head.py +++ b/mmrotate/models/dense_heads/rotated_anchor_head.py @@ -17,7 +17,7 @@ @ROTATED_HEADS.register_module() class RotatedAnchorHead(BaseDenseHead): - """Anchor-based head (RotatedRPN, RotatedRetinaNet, etc.). + """Rotated Anchor-based head (RotatedRPN, RotatedRetinaNet, etc.). Args: num_classes (int): Number of categories excluding the background @@ -28,6 +28,10 @@ class RotatedAnchorHead(BaseDenseHead): bbox_coder (dict): Config of bounding box coder. reg_decoded_bbox (bool): If true, the regression loss would be applied on decoded bounding boxes. Default: False + assign_by_circumhbbox (str): If None, assigner will assign according to + the IoU between anchor and GT (OBB), called RetinaNet-OBB. + If angle definition method, assigner will assign according to the + IoU between anchor and GT's circumbox (HBB), called RetinaNet-HBB. loss_cls (dict): Config of classification loss. loss_bbox (dict): Config of localization loss. train_cfg (dict): Training config of anchor head. @@ -113,11 +117,12 @@ def forward_single(self, x): x (torch.Tensor): Features of a single scale level. Returns: - tuple: - cls_score (torch.Tensor): Cls scores for a single scale level - the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a single - scale level, the channels number is num_anchors * 5. + tuple (torch.Tensor): + + - cls_score (torch.Tensor): Cls scores for a single scale \ + level the channels number is num_anchors * num_classes. + - bbox_pred (torch.Tensor): Box energies / deltas for a \ + single scale level, the channels number is num_anchors * 5. """ cls_score = self.conv_cls(x) bbox_pred = self.conv_reg(x) @@ -133,11 +138,11 @@ def forward(self, feats): Returns: tuple: A tuple of classification scores and bbox prediction. - - cls_scores (list[Tensor]): Classification scores for all - scale levels, each is a 4D-tensor, the channels number + - cls_scores (list[Tensor]): Classification scores for all \ + scale levels, each is a 4D-tensor, the channels number \ is num_anchors * num_classes. - - bbox_preds (list[Tensor]): Box energies / deltas for all - scale levels, each is a 4D-tensor, the channels number + - bbox_preds (list[Tensor]): Box energies / deltas for all \ + scale levels, each is a 4D-tensor, the channels number \ is num_anchors * 5. """ return multi_apply(self.forward_single, feats) @@ -151,9 +156,10 @@ def get_anchors(self, featmap_sizes, img_metas, device='cuda'): device (torch.device | str): Device for returned tensors Returns: - tuple: - anchor_list (list[Tensor]): Anchors of each image. - valid_flag_list (list[Tensor]): Valid flags of each image. + tuple (list[Tensor]): + + - anchor_list (list[Tensor]): Anchors of each image. + - valid_flag_list (list[Tensor]): Valid flags of each image. """ num_imgs = len(img_metas) @@ -204,13 +210,15 @@ def _get_targets_single(self, set of anchors. Returns: - tuple: - labels_list (list[Tensor]): Labels of each level - label_weights_list (list[Tensor]): Label weights of each level - bbox_targets_list (list[Tensor]): BBox targets of each level - bbox_weights_list (list[Tensor]): BBox weights of each level - num_total_pos (int): Number of positive samples in all images - num_total_neg (int): Number of negative samples in all images + tuple (list[Tensor]): + + - labels_list (list[Tensor]): Labels of each level + - label_weights_list (list[Tensor]): Label weights of each \ + level + - bbox_targets_list (list[Tensor]): BBox targets of each level + - bbox_weights_list (list[Tensor]): BBox weights of each level + - num_total_pos (int): Number of positive samples in all images + - num_total_neg (int): Number of negative samples in all images """ inside_flags = rotated_anchor_inside_flags( flat_anchors, valid_flags, img_meta['img_shape'][:2], @@ -322,6 +330,7 @@ def get_targets(self, images. - num_total_neg (int): Number of negative samples in all \ images. + additional_returns: This function enables user-defined returns from `self._get_targets_single`. These returns are currently refined to properties at each feature map (i.e. having HxW dimension). @@ -405,8 +414,10 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. + tuple (torch.Tensor): + + - loss_cls (torch.Tensor): cls. loss for each scale level. + - loss_bbox (torch.Tensor): reg. loss for each scale level. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/rotated_reppoints_head.py b/mmrotate/models/dense_heads/rotated_reppoints_head.py index 2fb3fff19..84d9b318e 100644 --- a/mmrotate/models/dense_heads/rotated_reppoints_head.py +++ b/mmrotate/models/dense_heads/rotated_reppoints_head.py @@ -43,7 +43,7 @@ class RotatedRepPointsHead(BaseDenseHead): center_init (bool, optional): Whether to use center point assignment. transform_method (str, optional): The methods to transform RepPoints to bbox. - use_reassign (bool, optional): Whether to use cfa. + use_reassign (bool, optional): Whether to reassign samples. topk (int, optional): Number of the highest topk points. Defaults to 9. anti_factor (float, optional): Feature anti-aliasing coefficient. version (str, optional): Angle representations. Defaults to 'oc'. @@ -378,6 +378,7 @@ def get_targets(self, unmap_outputs=True): """Compute corresponding GT box and classification targets for proposals. + Args: proposals_list (list[list]): Multi level points/bboxes of each image. @@ -395,14 +396,20 @@ def get_targets(self, set of anchors. Returns: - tuple: + tuple (list[Tensor]): + - labels_list (list[Tensor]): Labels of each level. - - label_weights_list (list[Tensor]): Label weights of each level. # noqa: E501 + - label_weights_list (list[Tensor]): Label weights of each \ + level. - bbox_gt_list (list[Tensor]): Ground truth bbox of each level. - - proposal_list (list[Tensor]): Proposals(points/bboxes) of each level. # noqa: E501 - - proposal_weights_list (list[Tensor]): Proposal weights of each level. # noqa: E501 - - num_total_pos (int): Number of positive samples in all images. # noqa: E501 - - num_total_neg (int): Number of negative samples in all images. # noqa: E501 + - proposal_list (list[Tensor]): Proposals(points/bboxes) of \ + each level. + - proposal_weights_list (list[Tensor]): Proposal weights of \ + each level. + - num_total_pos (int): Number of positive samples in all \ + images. + - num_total_neg (int): Number of negative samples in all \ + images. """ assert stage in ['init', 'refine'] num_imgs = len(img_metas) @@ -464,6 +471,7 @@ def get_cfa_targets(self, unmap_outputs=True): """Compute corresponding GT box and classification targets for proposals. + Args: proposals_list (list[list]): Multi level points/bboxes of each image. @@ -483,12 +491,17 @@ def get_cfa_targets(self, Returns: tuple: - all_labels (list[Tensor]): Labels of each level. - - all_label_weights (list[Tensor]): Label weights of each level. # noqa: E501 + - all_label_weights (list[Tensor]): Label weights of each \ + level. - all_bbox_gt (list[Tensor]): Ground truth bbox of each level. - - all_proposals (list[Tensor]): Proposals(points/bboxes) of each level. # noqa: E501 - - all_proposal_weights (list[Tensor]): Proposal weights of each level. # noqa: E501 - - pos_inds (list[Tensor]): Index of positive samples in all images. # noqa: E501 - - gt_inds (list[Tensor]): Index of ground truth bbox in all images. # noqa: E501 + - all_proposals (list[Tensor]): Proposals(points/bboxes) of \ + each level. + - all_proposal_weights (list[Tensor]): Proposal weights of \ + each level. + - pos_inds (list[Tensor]): Index of positive samples in all \ + images. + - gt_inds (list[Tensor]): Index of ground truth bbox in all \ + images. """ assert stage in ['init', 'refine'] num_imgs = len(img_metas) @@ -596,6 +609,7 @@ def loss(self, img_metas, gt_bboxes_ignore=None): """Loss function of CFA head.""" + featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores] assert len(featmap_sizes) == self.prior_generator.num_levels label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1 @@ -804,6 +818,7 @@ def get_pos_loss(self, cls_score, pts_pred, label, bbox_gt, label_weight, (num_anchors, 4). pos_inds (Tensor): Index of all positive samples got from first assign process. + Returns: Tensor: Losses of all positive samples in single image. """ @@ -841,6 +856,7 @@ def reassign(self, num_proposals_each_level=None, num_level=None): """CFA reassign process. + Args: pos_losses (Tensor): Losses of all positive samples in single image. @@ -862,13 +878,14 @@ def reassign(self, Returns: tuple: Usually returns a tuple containing learning targets. - - label (Tensor): classification target of each anchor after + + - label (Tensor): classification target of each anchor after \ paa assign, with shape (num_anchors,) - - label_weight (Tensor): Classification loss weight of each + - label_weight (Tensor): Classification loss weight of each \ anchor after paa assign, with shape (num_anchors). - - convex_weight (Tensor): Bbox weight of each anchor with shape - (num_anchors, 4). - - pos_normalize_term (list): pos normalize term for refine + - convex_weight (Tensor): Bbox weight of each anchor with \ + shape (num_anchors, 4). + - pos_normalize_term (list): pos normalize term for refine \ points losses. """ if len(pos_inds) == 0: @@ -999,6 +1016,7 @@ def get_bboxes(self, with_nms=True, **kwargs): """Transform network outputs of a batch into bbox results. + Args: cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, has shape @@ -1016,6 +1034,7 @@ def get_bboxes(self, Default False. with_nms (bool): If True, do nms before return boxes. Default True. + Returns: list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple. The first item is an (n, 6) tensor, where the first 4 columns @@ -1084,6 +1103,7 @@ def _get_bboxes_single(self, mlvl_scores, else return mlvl_bboxes, mlvl_scores and mlvl_score_factor. Usually with_nms is False is used for aug test. If with_nms is True, then return the following format + - det_bboxes (Tensor): Predicted bboxes with shape \ [num_bboxes, 5], where the first 4 columns are bounding \ box positions (cx, cy, w, h, a) and the 5-th \ diff --git a/mmrotate/models/dense_heads/rotated_retina_head.py b/mmrotate/models/dense_heads/rotated_retina_head.py index cc6bcaa90..ef4ab011c 100644 --- a/mmrotate/models/dense_heads/rotated_retina_head.py +++ b/mmrotate/models/dense_heads/rotated_retina_head.py @@ -15,17 +15,18 @@ class RotatedRetinaHead(RotatedAnchorHead): The head contains two subnetworks. The first classifies anchor boxes and the second regresses deltas for the anchors. - Example: - >>> import torch - >>> self = RetinaHead(11, 7) - >>> x = torch.rand(1, 7, 32, 32) - >>> cls_score, bbox_pred = self.forward_single(x) - >>> # Each anchor predicts a score for each class except background - >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors - >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors - >>> assert cls_per_anchor == (self.num_classes) - >>> assert box_per_anchor == 4 - """ + Args: + num_classes (int): Number of categories excluding the background + category. + in_channels (int): Number of channels in the input feature map. + stacked_convs (int, optional): Number of stacked convolutions. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. + anchor_generator (dict): Config dict for anchor generator + init_cfg (dict or list[dict], optional): Initialization config dict. + """ # noqa: W605 def __init__(self, num_classes, @@ -49,6 +50,7 @@ def __init__(self, std=0.01, bias_prob=0.01)), **kwargs): + self.stacked_convs = stacked_convs self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg @@ -99,11 +101,12 @@ def forward_single(self, x): x (torch.Tensor): Features of a single scale level. Returns: - tuple: - cls_score (torch.Tensor): Cls scores for a single scale level - the channels number is num_anchors * num_classes. - bbox_pred (torch.Tensor): Box energies / deltas for a single - scale level, the channels number is num_anchors * 4. + tuple (torch.Tensor): + + - cls_score (torch.Tensor): Cls scores for a single scale \ + level the channels number is num_anchors * num_classes. + - bbox_pred (torch.Tensor): Box energies / deltas for a \ + single scale level, the channels number is num_anchors * 4. """ cls_feat = x reg_feat = x @@ -129,7 +132,7 @@ def filter_bboxes(self, cls_scores, bbox_preds): level with shape (N, num_anchors * 5, H, W) Returns: - list[list[Tensor]]: best or refined rbboxes of each level + list[list[Tensor]]: best or refined rbboxes of each level \ of each image. """ num_levels = len(cls_scores) diff --git a/mmrotate/models/dense_heads/rotated_retina_refine_head.py b/mmrotate/models/dense_heads/rotated_retina_refine_head.py index 35a21b396..f58f6e3e0 100644 --- a/mmrotate/models/dense_heads/rotated_retina_refine_head.py +++ b/mmrotate/models/dense_heads/rotated_retina_refine_head.py @@ -8,24 +8,20 @@ @ROTATED_HEADS.register_module() class RotatedRetinaRefineHead(RotatedRetinaHead): - """Rotational Anchor-based refine head. + """Rotated Anchor-based refine head. Args: num_classes (int): Number of categories excluding the background category. in_channels (int): Number of channels in the input feature map. - feat_channels (int): Number of hidden channels. Used in child classes. + stacked_convs (int, optional): Number of stacked convolutions. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. anchor_generator (dict): Config dict for anchor generator bbox_coder (dict): Config of bounding box coder. - reg_decoded_bbox (bool): If true, the regression loss would be - applied on decoded bounding boxes. Default: False - background_label (int | None): Label ID of background, set as 0 for - RPN and num_classes for other heads. It will automatically set as - num_classes if None is given. - loss_cls (dict): Config of classification loss. - loss_bbox (dict): Config of localization loss. - train_cfg (dict): Training config of anchor head. - test_cfg (dict): Testing config of anchor head. + init_cfg (dict or list[dict], optional): Initialization config dict. """ # noqa: W605 def __init__(self, @@ -75,12 +71,11 @@ def refine_bboxes(self, cls_scores, bbox_preds, rois): bbox_preds (list[Tensor]): Box energies / deltas for each scale level with shape (N, 5, H, W) rois (list[list[Tensor]]): input rbboxes of each level of each - image. - rois output by former stages and are to be refined + image. rois output by former stages and are to be refined Returns: - list[list[Tensor]]: best or refined rbboxes of each level of each - image. + list[list[Tensor]]: best or refined rbboxes of each level of each \ + image. """ num_levels = len(cls_scores) assert num_levels == len(bbox_preds) @@ -118,9 +113,10 @@ def get_anchors(self, featmap_sizes, img_metas, device='cuda'): device (torch.device | str): Device for returned tensors Returns: - tuple: - anchor_list (list[Tensor]): Anchors of each image - valid_flag_list (list[Tensor]): Valid flags of each image + tuple (list[Tensor]): + + - anchor_list (list[Tensor]): Anchors of each image + - valid_flag_list (list[Tensor]): Valid flags of each image """ anchor_list = [[ bboxes_img_lvl.clone().detach() for bboxes_img_lvl in bboxes_img @@ -163,7 +159,7 @@ def get_bboxes(self, cfg=None, rescale=False, rois=None): - """Transform network output for a batch into labeled boxes.s. + """Transform network output for a batch into labeled boxes. Args: cls_scores (list[Tensor]): Box scores for each scale level @@ -173,8 +169,7 @@ def get_bboxes(self, img_metas (list[dict]): size / scale info for each image cfg (mmcv.Config): test / postprocessing configuration rois (list[list[Tensor]]): input rbboxes of each level of each - image. - rois output by former stages and are to be refined + image. rois output by former stages and are to be refined rescale (bool): if True, return boxes in original image space Returns: diff --git a/mmrotate/models/dense_heads/rotated_rpn_head.py b/mmrotate/models/dense_heads/rotated_rpn_head.py index eb6606867..632acbb1e 100644 --- a/mmrotate/models/dense_heads/rotated_rpn_head.py +++ b/mmrotate/models/dense_heads/rotated_rpn_head.py @@ -274,8 +274,7 @@ def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights, positive anchors. Returns: - loss_cls (torch.Tensor): cls. loss for each scale level. - loss_bbox (torch.Tensor): reg. loss for each scale level. + dict[str, Tensor]: A dictionary of loss components. """ # classification loss labels = labels.reshape(-1) diff --git a/mmrotate/models/dense_heads/sam_reppoints_head.py b/mmrotate/models/dense_heads/sam_reppoints_head.py index 47e570adf..91ec6592e 100644 --- a/mmrotate/models/dense_heads/sam_reppoints_head.py +++ b/mmrotate/models/dense_heads/sam_reppoints_head.py @@ -18,7 +18,7 @@ @ROTATED_HEADS.register_module() class SAMRepPointsHead(BaseDenseHead): - """CFA head. + """Rotated RepPoints head for SASM. Args: num_classes (int): Number of classes. @@ -441,6 +441,7 @@ def get_targets(self, unmap_outputs=True): """Compute corresponding GT box and classification targets for proposals. + Args: proposals_list (list[list]): Multi level points/bboxes of each image. @@ -458,19 +459,20 @@ def get_targets(self, set of anchors. Returns: - tuple: + tuple (list[Tensor]): + - labels_list (list[Tensor]): Labels of each level. - - label_weights_list (list[Tensor]): Label weights of each - level. + - label_weights_list (list[Tensor]): Label weights of each \ + level. - bbox_gt_list (list[Tensor]): Ground truth bbox of each level. - - proposal_list (list[Tensor]): Proposals(points/bboxes) of - each level. - - proposal_weights_list (list[Tensor]): Proposal weights of - each level. - - num_total_pos (int): Number of positive samples in all - images. - - num_total_neg (int): Number of negative samples in all - images. + - proposal_list (list[Tensor]): Proposals(points/bboxes) of \ + each level. + - proposal_weights_list (list[Tensor]): Proposal weights of \ + each level. + - num_total_pos (int): Number of positive samples in all \ + images. + - num_total_neg (int): Number of negative samples in all \ + images. """ assert stage in ['init', 'refine'] num_imgs = len(img_metas) @@ -685,6 +687,7 @@ def get_bboxes(self, with_nms=True, **kwargs): """Transform network outputs of a batch into bbox results. + Args: cls_scores (list[Tensor]): Classification scores for all scale levels, each is a 4D-tensor, has shape @@ -745,6 +748,7 @@ def _get_bboxes_single(self, with_nms=True, **kwargs): """Transform outputs of a single image into bbox predictions. + Args: cls_score_list (list[Tensor]): Box scores from all scale levels of a single image, each item has shape @@ -772,6 +776,7 @@ def _get_bboxes_single(self, mlvl_scores, else return mlvl_bboxes, mlvl_scores and mlvl_score_factor. Usually with_nms is False is used for aug test. If with_nms is True, then return the following format + - det_bboxes (Tensor): Predicted bboxes with shape \ [num_bboxes, 5], where the first 4 columns are bounding \ box positions (cx, cy, w, h, a) and the 5-th \ diff --git a/mmrotate/models/dense_heads/utils.py b/mmrotate/models/dense_heads/utils.py index 8b9d8e5c9..ca6274122 100644 --- a/mmrotate/models/dense_heads/utils.py +++ b/mmrotate/models/dense_heads/utils.py @@ -4,10 +4,16 @@ def points_center_pts(RPoints, y_first=True): - ''' - RPoints:[:, 18] the lists of Pointsets (9 points) - center_pts: the mean_center coordination of Pointsets - ''' + """Compute center point of Pointsets. + + Args: + RPoints (torch.Tensor): the lists of Pointsets, shape (k, 18). + y_first (bool, optional): if True, the sequence of Pointsets is (y,x). + + Returns: + center_pts (torch.Tensor): the mean_center coordination of Pointsets, + shape (k, 18). + """ RPoints = RPoints.reshape(-1, 9, 2) if y_first: @@ -28,6 +34,7 @@ def convex_overlaps(gt_bboxes, points): Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). points (torch.Tensor): Points to be assigned, shape(n, 18). + Returns: overlaps (torch.Tensor): Overlaps between k gt_bboxes and n bboxes, shape(k, n). @@ -50,6 +57,7 @@ def levels_to_images(mlvl_tensor, flatten=False): corresponding level. Each element is of shape (N, C, H, W) flatten (bool, optional): if shape of mlvl_tensor is (N, C, H, W) set False, if shape of mlvl_tensor is (N, H, W, C) set True. + Returns: list[torch.Tensor]: A list that contains N tensors and each tensor is of shape (num_elements, C) diff --git a/mmrotate/models/detectors/oriented_rcnn.py b/mmrotate/models/detectors/oriented_rcnn.py index 3aae368c5..442a8e63c 100644 --- a/mmrotate/models/detectors/oriented_rcnn.py +++ b/mmrotate/models/detectors/oriented_rcnn.py @@ -5,10 +5,9 @@ @ROTATED_DETECTORS.register_module() class OrientedRCNN(RotatedTwoStageDetector): - """Implementation of `Oriented R-CNN for Object Detection. + """Implementation of `Oriented R-CNN for Object Detection.`__ - `_ + __ https://openaccess.thecvf.com/content/ICCV2021/papers/Xie_Oriented_R-CNN_for_Object_Detection_ICCV_2021_paper.pdf # noqa: E501, E261. """ def __init__(self, diff --git a/mmrotate/models/detectors/r3det.py b/mmrotate/models/detectors/r3det.py index 523d33234..05f05d729 100644 --- a/mmrotate/models/detectors/r3det.py +++ b/mmrotate/models/detectors/r3det.py @@ -119,8 +119,8 @@ def simple_test(self, img, img_meta, rescale=False): Defaults to False. Returns: - list[list[np.ndarray]]: BBox results of each image and classes. - The outer list corresponds to each image. The inner list + list[list[np.ndarray]]: BBox results of each image and classes. \ + The outer list corresponds to each image. The inner list \ corresponds to each class. """ x = self.extract_feat(img) diff --git a/mmrotate/models/detectors/redet.py b/mmrotate/models/detectors/redet.py index dfdf5e057..7a4f11de7 100644 --- a/mmrotate/models/detectors/redet.py +++ b/mmrotate/models/detectors/redet.py @@ -6,11 +6,9 @@ @ROTATED_DETECTORS.register_module() class ReDet(RotatedTwoStageDetector): """Implementation of `ReDet: A Rotation-equivariant Detector for Aerial - Object Detection. + Object Detection.`__ - `_ + __ https://openaccess.thecvf.com/content/CVPR2021/papers/Han_ReDet_A_Rotation-Equivariant_Detector_for_Aerial_Object_Detection_CVPR_2021_paper.pdf # noqa: E501, E261. """ def __init__(self, diff --git a/mmrotate/models/detectors/roi_transformer.py b/mmrotate/models/detectors/roi_transformer.py index 9f8fd85cf..b6b08e708 100644 --- a/mmrotate/models/detectors/roi_transformer.py +++ b/mmrotate/models/detectors/roi_transformer.py @@ -6,13 +6,9 @@ @ROTATED_DETECTORS.register_module() class RoITransformer(RotatedTwoStageDetector): """Implementation of `Learning RoI Transformer for Oriented Object - Detection in Aerial Images. + Detection in Aerial Images.`__ - `_ + __ https://openaccess.thecvf.com/content_CVPR_2019/papers/Ding_Learning_RoI_Transformer_for_Oriented_Object_Detection_in_Aerial_Images_CVPR_2019_paper.pdf#:~:text=The%20core%20idea%20of%20RoI%20Transformer%20is%20to,embed-%20ded%20into%20detectors%20for%20oriented%20object%20detection # noqa: E501, E261. """ def __init__(self, diff --git a/mmrotate/models/detectors/rotate_faster_rcnn.py b/mmrotate/models/detectors/rotate_faster_rcnn.py index 623e64eae..d7ec1bf3f 100644 --- a/mmrotate/models/detectors/rotate_faster_rcnn.py +++ b/mmrotate/models/detectors/rotate_faster_rcnn.py @@ -5,9 +5,9 @@ @ROTATED_DETECTORS.register_module() class RotatedFasterRCNN(RotatedTwoStageDetector): - """Implementation of Rotated `Faster R-CNN. + """Implementation of Rotated `Faster R-CNN.`__ - `_ + __ https://arxiv.org/abs/1506.01497 """ def __init__(self, diff --git a/mmrotate/models/detectors/rotated_reppoints.py b/mmrotate/models/detectors/rotated_reppoints.py index 7386a2755..795fac545 100644 --- a/mmrotate/models/detectors/rotated_reppoints.py +++ b/mmrotate/models/detectors/rotated_reppoints.py @@ -5,7 +5,7 @@ @ROTATED_DETECTORS.register_module() class RotatedRepPoints(RotatedSingleStageDetector): - """Implementation of RepPoints.""" + """Implementation of Rotated RepPoints.""" def __init__(self, backbone, diff --git a/mmrotate/models/detectors/rotated_retinanet.py b/mmrotate/models/detectors/rotated_retinanet.py index bb3963226..257bec37a 100644 --- a/mmrotate/models/detectors/rotated_retinanet.py +++ b/mmrotate/models/detectors/rotated_retinanet.py @@ -5,9 +5,9 @@ @ROTATED_DETECTORS.register_module() class RotatedRetinaNet(RotatedSingleStageDetector): - """Implementation of Rotated `RetinaNet. + """Implementation of Rotated `RetinaNet.`__ - `_ + __ https://arxiv.org/abs/1708.02002 """ def __init__(self, diff --git a/mmrotate/models/detectors/s2anet.py b/mmrotate/models/detectors/s2anet.py index 65dee95da..c590a293b 100644 --- a/mmrotate/models/detectors/s2anet.py +++ b/mmrotate/models/detectors/s2anet.py @@ -7,7 +7,10 @@ @ROTATED_DETECTORS.register_module() class S2ANet(RotatedBaseDetector): - """Rotated Refinement RetinaNet.""" + """Implementation of `Align Deep Features for Oriented Object Detection.`__ + + __ https://ieeexplore.ieee.org/document/9377550 + """ def __init__(self, backbone, @@ -108,8 +111,8 @@ def simple_test(self, img, img_meta, rescale=False): Defaults to False. Returns: - list[list[np.ndarray]]: BBox results of each image and classes. - The outer list corresponds to each image. The inner list + list[list[np.ndarray]]: BBox results of each image and classes. \ + The outer list corresponds to each image. The inner list \ corresponds to each class. """ x = self.extract_feat(img) diff --git a/mmrotate/models/detectors/single_stage.py b/mmrotate/models/detectors/single_stage.py index 7a7fb05f5..c703ce8ef 100644 --- a/mmrotate/models/detectors/single_stage.py +++ b/mmrotate/models/detectors/single_stage.py @@ -92,8 +92,8 @@ def simple_test(self, img, img_metas, rescale=False): Defaults to False. Returns: - list[list[np.ndarray]]: BBox results of each image and classes. - The outer list corresponds to each image. The inner list + list[list[np.ndarray]]: BBox results of each image and classes. \ + The outer list corresponds to each image. The inner list \ corresponds to each class. """ x = self.extract_feat(img) @@ -121,7 +121,7 @@ def aug_test(self, imgs, img_metas, rescale=False): Defaults to False. Returns: - list[list[np.ndarray]]: BBox results of each image and classes. + list[list[np.ndarray]]: BBox results of each image and classes. \ The outer list corresponds to each image. The inner list corresponds to each class. """ diff --git a/mmrotate/models/detectors/utils.py b/mmrotate/models/detectors/utils.py index 5e7a6acba..f0c88dae1 100644 --- a/mmrotate/models/detectors/utils.py +++ b/mmrotate/models/detectors/utils.py @@ -6,9 +6,14 @@ class AlignConv(nn.Module): - """Implementation of `Align Deep Features for Oriented Object Detection. + """Align Conv of `S2ANet`. - `_ + Args: + in_channels (int): Number of input channels. + featmap_strides (list): The strides of featmap. + kernel_size (int, optional): The size of kernel. + stride (int, optional): Stride of the convolution. Default: None + deform_groups (int, optional): Number of deformable group partitions. """ def __init__(self, @@ -129,7 +134,16 @@ def forward(self, x, rbboxes): class FeatureRefineModule(nn.Module): - """Feature refine module.""" + """Feature refine module for `R3Det`. + + Args: + in_channels (int): Number of input channels. + featmap_strides (list): The strides of featmap. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. + """ def __init__(self, in_channels, diff --git a/mmrotate/models/losses/convex_giou_loss.py b/mmrotate/models/losses/convex_giou_loss.py index b704ce4d1..83bb930e9 100644 --- a/mmrotate/models/losses/convex_giou_loss.py +++ b/mmrotate/models/losses/convex_giou_loss.py @@ -70,10 +70,12 @@ class ConvexGIoULoss(nn.Module): Computing the Convex GIoU loss between a set of predicted convexes and target convexes. + Args: reduction (str, optional): The reduction method of the loss. Defaults to 'mean'. loss_weight (float, optional): The weight of loss. Defaults to 1.0. + Return: torch.Tensor: Loss tensor. """ @@ -264,10 +266,12 @@ class BCConvexGIoULoss(nn.Module): Computing the BCConvex GIoU loss between a set of predicted convexes and target convexes. + Args: reduction (str, optional): The reduction method of the loss. Defaults to 'mean'. loss_weight (float, optional): The weight of loss. Defaults to 1.0. + Return: torch.Tensor: Loss tensor. """ @@ -308,9 +312,11 @@ def forward(self, def AspectRatio(gt_rbboxes): - """compute the aspect ratio of all gts + """Compute the aspect ratio of all gts. + Args: gt_rbboxes (torch.Tensor): Groundtruth polygons, shape (k, 8). + Returns: ratios (torch.Tensor): The aspect ratio of gt_rbboxes, shape (k, 1). """ diff --git a/mmrotate/models/losses/smooth_focal_loss.py b/mmrotate/models/losses/smooth_focal_loss.py index f05c9a42c..1f3993a72 100644 --- a/mmrotate/models/losses/smooth_focal_loss.py +++ b/mmrotate/models/losses/smooth_focal_loss.py @@ -15,9 +15,6 @@ def smooth_focal_loss(pred, avg_factor=None): """Smooth Focal Loss proposed in Circular Smooth Label (CSL). - `Circular Smooth Label (CSL) - `_ . - Args: pred (torch.Tensor): The prediction. target (torch.Tensor): The learning label of the prediction. @@ -64,27 +61,29 @@ def smooth_focal_loss(pred, @ROTATED_LOSSES.register_module() class SmoothFocalLoss(nn.Module): + """Smooth Focal Loss. Implementation of `Circular Smooth Label (CSL).`__ + + __ https://link.springer.com/chapter/10.1007/978-3-030-58598-3_40 + + Args: + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float, optional): A balanced form for Focal Loss. + Defaults to 0.25. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and + "sum". + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + + Returns: + loss (torch.Tensor) + """ def __init__(self, gamma=2.0, alpha=0.25, reduction='mean', loss_weight=1.0): - """Smooth Focal Loss. - - Args: - gamma (float, optional): The gamma for calculating the modulating - factor. Defaults to 2.0. - alpha (float, optional): A balanced form for Focal Loss. - Defaults to 0.25. - reduction (str, optional): The method used to reduce the loss into - a scalar. Defaults to 'mean'. Options are "none", "mean" and - "sum". - loss_weight (float, optional): Weight of loss. Defaults to 1.0. - - Returns: - loss (torch.Tensor) - """ super(SmoothFocalLoss, self).__init__() self.gamma = gamma self.alpha = alpha diff --git a/mmrotate/models/necks/re_fpn.py b/mmrotate/models/necks/re_fpn.py index 21dcbd14a..a907197cd 100644 --- a/mmrotate/models/necks/re_fpn.py +++ b/mmrotate/models/necks/re_fpn.py @@ -12,7 +12,31 @@ class ConvModule(enn.EquivariantModule): - """ConvModule.""" + """ConvModule. + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + kernel_size (int, optional): The size of kernel. + stride (int, optional): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + conv_cfg (dict, optional): Config dict for convolution layer. + Default: None. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. + activation (str, optional): Activation layer in ConvModule. + Default: None. + inplace (bool): can optionally do the operation in-place. + order (tuple[str]): The order of conv/norm/activation layers. It is a + sequence of "conv", "norm" and "act". Common examples are + ("conv", "norm", "act") and ("act", "conv", "norm"). + """ def __init__(self, in_channels, diff --git a/mmrotate/models/roi_heads/bbox_heads/gv_bbox_head.py b/mmrotate/models/roi_heads/bbox_heads/gv_bbox_head.py index 47a1f541e..559aea38a 100644 --- a/mmrotate/models/roi_heads/bbox_heads/gv_bbox_head.py +++ b/mmrotate/models/roi_heads/bbox_heads/gv_bbox_head.py @@ -391,8 +391,6 @@ def loss(self, reduction_override (str, optional): The reduction method used to override the original reduction method of the loss. Defaults to None. - - Returns: """ losses = dict() if cls_score is not None: diff --git a/mmrotate/models/roi_heads/bbox_heads/rotated_bbox_head.py b/mmrotate/models/roi_heads/bbox_heads/rotated_bbox_head.py index 8275fd5aa..3d0296553 100644 --- a/mmrotate/models/roi_heads/bbox_heads/rotated_bbox_head.py +++ b/mmrotate/models/roi_heads/bbox_heads/rotated_bbox_head.py @@ -304,8 +304,6 @@ def loss(self, reduction_override (str, optional): The reduction method used to override the original reduction method of the loss. Defaults to None. - - Returns: """ losses = dict() if cls_score is not None: diff --git a/mmrotate/models/roi_heads/gv_ratio_roi_head.py b/mmrotate/models/roi_heads/gv_ratio_roi_head.py index a2dd6be7d..86c58afd0 100644 --- a/mmrotate/models/roi_heads/gv_ratio_roi_head.py +++ b/mmrotate/models/roi_heads/gv_ratio_roi_head.py @@ -102,11 +102,11 @@ def simple_test_bboxes(self, Default: False. Returns: - tuple[list[Tensor], list[Tensor]]: The first list contains - the boxes of the corresponding image in a batch, each - tensor has the shape (num_boxes, 5) and last dimension - 5 represent (cx, cy, w, h, a, score). Each Tensor - in the second list is the labels with shape (num_boxes, ). + tuple[list[Tensor], list[Tensor]]: The first list contains \ + the boxes of the corresponding image in a batch, each \ + tensor has the shape (num_boxes, 5) and last dimension \ + 5 represent (cx, cy, w, h, a, score). Each Tensor \ + in the second list is the labels with shape (num_boxes, ). \ The length of both lists should be equal to batch_size. """ diff --git a/mmrotate/models/roi_heads/oriented_standard_roi_head.py b/mmrotate/models/roi_heads/oriented_standard_roi_head.py index 490b0c82b..57dfcba46 100644 --- a/mmrotate/models/roi_heads/oriented_standard_roi_head.py +++ b/mmrotate/models/roi_heads/oriented_standard_roi_head.py @@ -122,11 +122,11 @@ def simple_test_bboxes(self, Default: False. Returns: - tuple[list[Tensor], list[Tensor]]: The first list contains - the boxes of the corresponding image in a batch, each - tensor has the shape (num_boxes, 5) and last dimension - 5 represent (cx, cy, w, h, a, score). Each Tensor - in the second list is the labels with shape (num_boxes, ). + tuple[list[Tensor], list[Tensor]]: The first list contains \ + the boxes of the corresponding image in a batch, each \ + tensor has the shape (num_boxes, 5) and last dimension \ + 5 represent (cx, cy, w, h, a, score). Each Tensor \ + in the second list is the labels with shape (num_boxes, ). \ The length of both lists should be equal to batch_size. """ diff --git a/mmrotate/models/roi_heads/roi_extractors/rotate_single_level_roi_extractor.py b/mmrotate/models/roi_heads/roi_extractors/rotate_single_level_roi_extractor.py index 06cca05c1..542a9c4e1 100644 --- a/mmrotate/models/roi_heads/roi_extractors/rotate_single_level_roi_extractor.py +++ b/mmrotate/models/roi_heads/roi_extractors/rotate_single_level_roi_extractor.py @@ -52,7 +52,7 @@ def build_roi_layers(self, layer_cfg, featmap_strides): coordinate system. Returns: - nn.ModuleList: The RoI extractor modules for each level feature + nn.ModuleList: The RoI extractor modules for each level feature \ map. """ diff --git a/mmrotate/models/roi_heads/rotate_standard_roi_head.py b/mmrotate/models/roi_heads/rotate_standard_roi_head.py index 15caa8b1d..fb1548024 100644 --- a/mmrotate/models/roi_heads/rotate_standard_roi_head.py +++ b/mmrotate/models/roi_heads/rotate_standard_roi_head.py @@ -279,12 +279,13 @@ def simple_test_bboxes(self, rcnn_test_cfg (obj:`ConfigDict`): `test_cfg` of R-CNN. rescale (bool): If True, return boxes in original image space. Default: False. + Returns: - tuple[list[Tensor], list[Tensor]]: The first list contains - the boxes of the corresponding image in a batch, each - tensor has the shape (num_boxes, 5) and last dimension - 5 represent (tl_x, tl_y, br_x, br_y, score). Each Tensor - in the second list is the labels with shape (num_boxes, ). + tuple[list[Tensor], list[Tensor]]: The first list contains \ + the boxes of the corresponding image in a batch, each \ + tensor has the shape (num_boxes, 5) and last dimension \ + 5 represent (tl_x, tl_y, br_x, br_y, score). Each Tensor \ + in the second list is the labels with shape (num_boxes, ). \ The length of both lists should be equal to batch_size. """ diff --git a/mmrotate/models/utils/enn.py b/mmrotate/models/utils/enn.py index 229fb50b7..ddacbc04f 100644 --- a/mmrotate/models/utils/enn.py +++ b/mmrotate/models/utils/enn.py @@ -42,7 +42,21 @@ def ennConv(inplanes, groups=1, bias=False, dilation=1): - """enn convolution.""" + """enn convolution. + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + kernel_size (int, optional): The size of kernel. + stride (int, optional): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + """ in_type = build_enn_divide_feature(inplanes) out_type = build_enn_divide_feature(outplanes) return enn.R2Conv( @@ -67,7 +81,22 @@ def ennTrivialConv(inplanes, groups=1, bias=False, dilation=1): - """enn convolution with trivial input featurn.""" + """enn convolution with trivial input featurn. + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + kernel_size (int, optional): The size of kernel. + stride (int, optional): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + """ + in_type = build_enn_trivial_feature(inplanes) out_type = build_enn_divide_feature(outplanes) return enn.R2Conv( @@ -95,7 +124,17 @@ def ennAvgPool(inplanes, stride=None, padding=0, ceil_mode=False): - """enn Average Pooling.""" + """enn Average Pooling. + + Args: + inplanes (int): The number of input channel. + kernel_size (int, optional): The size of kernel. + stride (int, optional): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + ceil_mode (bool, optional): if True, keep information in the corner of + feature map. + """ in_type = build_enn_divide_feature(inplanes) return enn.PointwiseAvgPool( in_type, diff --git a/mmrotate/models/utils/orconv.py b/mmrotate/models/utils/orconv.py index bd6473c31..5e00ac7b7 100644 --- a/mmrotate/models/utils/orconv.py +++ b/mmrotate/models/utils/orconv.py @@ -11,7 +11,23 @@ class ORConv2d(Conv2d): - """Oriented 2-D convolution.""" + """Oriented 2-D convolution. + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + kernel_size (int, optional): The size of kernel. + arf_config (tuple, optional): a tuple consist of nOrientation and + nRotation. + stride (int, optional): Stride of the convolution. Default: 1. + padding (int or tuple): Zero-padding added to both sides of the input. + Default: 0. + dilation (int or tuple): Spacing between kernel elements. Default: 1. + groups (int): Number of blocked connections from input. + channels to output channels. Default: 1. + bias (bool): If True, adds a learnable bias to the output. + Default: False. + """ def __init__(self, in_channels, diff --git a/mmrotate/models/utils/ripool.py b/mmrotate/models/utils/ripool.py index ee10f0879..804b410fe 100644 --- a/mmrotate/models/utils/ripool.py +++ b/mmrotate/models/utils/ripool.py @@ -3,7 +3,12 @@ class RotationInvariantPooling(nn.Module): - """Rotating invariant pooling module.""" + """Rotating invariant pooling module. + + Args: + nInputPlane (int): The number of Input plane. + nOrientation (int, optional): The number of oriented channels. + """ def __init__(self, nInputPlane, nOrientation=8): super(RotationInvariantPooling, self).__init__() diff --git a/mmrotate/utils/logger.py b/mmrotate/utils/logger.py index e3f1c8e3a..705107bbc 100644 --- a/mmrotate/utils/logger.py +++ b/mmrotate/utils/logger.py @@ -11,6 +11,7 @@ def get_root_logger(log_file=None, log_level=logging.INFO): log_file (str, optional): File path of log. Defaults to None. log_level (int, optional): The level of logger. Defaults to logging.INFO. + Returns: :obj:`logging.Logger`: The obtained logger """ diff --git a/mmrotate/utils/misc.py b/mmrotate/utils/misc.py index 0cc219410..db6873669 100644 --- a/mmrotate/utils/misc.py +++ b/mmrotate/utils/misc.py @@ -6,12 +6,15 @@ def find_latest_checkpoint(path, suffix='pth'): """Find the latest checkpoint from the working directory. + Args: path(str): The path to find checkpoints. suffix(str): File extension. Defaults to pth. + Returns: latest_path(str | None): File path of the latest checkpoint. + References: .. [1] https://github.com/microsoft/SoftTeacher /blob/main/ssod/utils/patch.py diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt index 0b7bd7320..3b938773d 100644 --- a/requirements/readthedocs.txt +++ b/requirements/readthedocs.txt @@ -1,3 +1,4 @@ +e2cnn mmcv mmdet torch diff --git a/tools/analysis_tools/benchmark.py b/tools/analysis_tools/benchmark.py index aafcf8908..0cab8c317 100644 --- a/tools/analysis_tools/benchmark.py +++ b/tools/analysis_tools/benchmark.py @@ -61,18 +61,20 @@ def parse_args(): def measure_inference_speed(cfg, checkpoint, max_iter, log_interval, is_fuse_conv_bn, use_fp16): - """ inference speed statistics - Args: - cfg (object): Test config object. - checkpoint (str): Checkpoint file path. - max_iter (int): Num of max iter. - log_interval (int): Interval of logging. - is_fuse_conv_bn (bool): Whether to fuse conv and bn, - this will slightly increase the inference speed - use_fp16 (bool): Whether to use fp16 to inference. - Returns: - fps (float): Average speed of inference (fps). - """ + """Inference speed statistics. + + Args: + cfg (object): Test config object. + checkpoint (str): Checkpoint file path. + max_iter (int): Num of max iter. + log_interval (int): Interval of logging. + is_fuse_conv_bn (bool): Whether to fuse conv and bn, + this will slightly increase the inference speed + use_fp16 (bool): Whether to use fp16 to inference. + + Returns: + fps (float): Average speed of inference (fps). + """ # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True @@ -155,21 +157,23 @@ def repeat_measure_inference_speed(cfg, is_fuse_conv_bn, use_fp16, repeat_num=1): - """ repeat to inference several times and take the average - Args: - cfg (object): Test config object. - checkpoint (str): Checkpoint file path. - max_iter (int): Num of max iter. - log_interval (int): Interval of logging. - is_fuse_conv_bn (bool): Whether to fuse conv and bn, - this will slightly increase the inference speed - use_fp16 (bool): Whether to use fp16 to inference. - repeat_num (int): Number of repeat times of measurement - for averaging the results. - Returns: - fps (float of list(float)): Inference speed(fps) or - list of inference speed(fps) for repeating measurements. - """ + """Repeat to inference several times and take the average. + + Args: + cfg (object): Test config object. + checkpoint (str): Checkpoint file path. + max_iter (int): Num of max iter. + log_interval (int): Interval of logging. + is_fuse_conv_bn (bool): Whether to fuse conv and bn, + this will slightly increase the inference speed + use_fp16 (bool): Whether to use fp16 to inference. + repeat_num (int): Number of repeat times of measurement + for averaging the results. + + Returns: + fps (float of list(float)): Inference speed(fps) or + list of inference speed(fps) for repeating measurements. + """ assert repeat_num >= 1 fps_list = [] From 44b0aecb05453dae9cf6b94ce03bce0c97d3b8de Mon Sep 17 00:00:00 2001 From: jbwang1997 Date: Fri, 1 Apr 2022 22:44:59 +0800 Subject: [PATCH 12/12] Bump version to v0.2.0 (#170) * Bump vision to v0.2.0 * Update vision in changelog * Update changelog * Update changelog.md * Update changelog.md * Update pr 175 --- README.md | 12 ++++-------- README_zh-CN.md | 12 ++++-------- docs/en/changelog.md | 27 +++++++++++++++++++++++++++ docs/en/install.md | 1 + docs/zh_cn/install.md | 1 + mmrotate/version.py | 2 +- 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 048206932..0fc91566d 100644 --- a/README.md +++ b/README.md @@ -64,14 +64,10 @@ https://user-images.githubusercontent.com/10410257/154433305-416d129b-60c8-44c7- ## Changelog -**0.1.1** was released in 14/3/2022: - -- Add [colab tutorial](demo/MMRotate_Tutorial.ipynb) for beginners (#66) -- Support [huge image inference](deom/huge_image_demo.py) (#34) -- Support HRSC Dataset (#96) -- Support mixed precision training (#72) -- Add inference speed statistics [tool](tools/analysis_tools/benchmark.py) (#86) -- Add confusion matrix analysis [tool](tools/analysis_tools/confusion_matrix.py) (#93) +**0.2.0** was released in 30/3/2022: + +- Support Circular Smooth Label (CSL, ECCV'20) (#153) +- Add [browse_dataset](tools/misc/browse_dataset.py) tool (#98) Please refer to [changelog.md](docs/en/changelog.md) for details and release history. diff --git a/README_zh-CN.md b/README_zh-CN.md index e6cd17f16..c2ba863e4 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -61,14 +61,10 @@ https://user-images.githubusercontent.com/10410257/154433305-416d129b-60c8-44c7- ## 更新日志 -最新的 **0.1.1** 版本已经在 2022.03.14 发布: - -- 为初学者添加了 [Colab 教程](demo/MMRotate_Tutorial.ipynb) -- 支持了[大图推理](deom/huge_image_demo.py) -- 支持了 HRSC 遥感数据集 -- 支持了混合精度训练 -- 添加了推理速度[统计工具](tools/analysis_tools/benchmark.py) -- 添加了混淆矩阵[分析工具](tools/analysis_tools/confusion_matrix.py). +最新的 **0.2.0** 版本已经在 2022.03.14 发布: + +- 支持了 Circular Sommth Label (CSL, ECCV'20) 模型 (#153) +- 增加了[数据集浏览工具](tools/misc/browse_dataset.py) (#98) 如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/changelog.md)。 diff --git a/docs/en/changelog.md b/docs/en/changelog.md index 3de1f066c..f2a91c557 100644 --- a/docs/en/changelog.md +++ b/docs/en/changelog.md @@ -1,5 +1,32 @@ ## Changelog +### v0.2.0 (30/3/2022) + +#### New Features + +- Support Circular Smooth Label (CSL, ECCV'20) (#153) +- Support multiple machines dist_train (#143) +- Add [browse_dataset](tools/misc/browse_dataset.py) tool (#98) +- Add [gather_models](.dev_scripts/gather_models.py) script (#162) + +#### Bug Fixes + +- Remove in-place operations in rbbox_overlaps (#155) +- Fix bug in docstring. (#137) +- Fix bug in HRSCDataset with `clasesswise=ture` (#175) + +#### Improvements + +- Add Chinese translation of `docs/zh_cn/tutorials/customize_dataset.md` (#65) +- Add different seeds to different ranks (#102) +- Update from-scratch install script in install.md (#166) +- Improve the arguments of all mmrotate scripts (#168) + +#### Contributors + +A total of 6 developers contributed to this release. +Thanks @zytx121 @yangxue0827 @ZwwWayne @jbwang1997 @canoe-Z @matrixgame2018 + ### v0.1.1 (14/3/2022) #### New Features diff --git a/docs/en/install.md b/docs/en/install.md index ad1bf281a..b70aa5034 100644 --- a/docs/en/install.md +++ b/docs/en/install.md @@ -14,6 +14,7 @@ Compatible MMCV, MMClassification and MMDetection versions are shown as below. P | MMRotate version | MMCV version | MMDetection version | |:-------------------:|:-----------------:|:---------------------------------:| | master | mmcv-full>=1.4.5 | mmdet >= 2.19.0 | +| 0.1.1 | mmcv-full>=1.4.5 | mmdet >= 2.19.0 | | 0.1.0 | mmcv-full>=1.4.5 | mmdet >= 2.19.0 | **Note:** You need to run `pip uninstall mmcv` first if you have mmcv installed. diff --git a/docs/zh_cn/install.md b/docs/zh_cn/install.md index e1ae9493d..bc9a5432a 100644 --- a/docs/zh_cn/install.md +++ b/docs/zh_cn/install.md @@ -14,6 +14,7 @@ MMRotate 和 MMCV, MMDet 版本兼容性如下所示,需要安装正确的版 | MMRotate 版本 | MMCV 版本 | MMDetection 版本 | |:-------------------:|:-----------------:|:---------------------------------:| | master | mmcv-full>=1.4.5 | mmdet >= 2.19.0 | +| 0.1.1 | mmcv-full>=1.4.5 | mmdet >= 2.19.0 | | 0.1.0 | mmcv-full>=1.4.5 | mmdet >= 2.19.0 | **注意:**如果已经安装了 mmcv,首先需要使用 `pip uninstall mmcv` 卸载已安装的 mmcv,如果同时安装了 mmcv 和 mmcv-full,将会报 `ModuleNotFoundError` 错误。 diff --git a/mmrotate/version.py b/mmrotate/version.py index ac2265ae6..277ceeea9 100644 --- a/mmrotate/version.py +++ b/mmrotate/version.py @@ -1,6 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. -__version__ = '0.1.1' +__version__ = '0.2.0' short_version = __version__