Skip to content

Commit e396db7

Browse files
committed
pre-release pose
1 parent 5a72c32 commit e396db7

9 files changed

+174
-20
lines changed

pose_estimation/README.md

+8-6
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,14 @@ bash get_flops.sh /path/to/config --shape 256 192
3535

3636
| Backbone | Pretrain | Input Size | Params | FLOPs | Epoch | mAP | Config | Download |
3737
|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
38-
| MogaNet-T | ImageNet-1K | 256x192 | 8.1M | 2.15G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_t_coco_256x192.py) | log / model |
39-
| MogaNet-T | ImageNet-1K | 384x288 | 8.1M | 4.85G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_t_coco_384x288.py) | log / model |
40-
| MogaNet-S | ImageNet-1K | 256x192 | 29.0M | 5.99G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_s_coco_256x192.py) | log / model |
41-
| MogaNet-S | ImageNet-1K | 384x288 | 29.0M | 13.48G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_s_coco_384x288.py) | log / model |
42-
| MogaNet-B | ImageNet-1K | 256x192 | 47.4M | 10.85G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_b_coco_256x192.py) | log / model |
43-
| MogaNet-B | ImageNet-1K | 384x288 | 47.4M | 24.42G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_b_coco_384x288.py) | log / model |
38+
| MogaNet-XT | ImageNet-1K | 256x192 | 5.6M | 1.84G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_xt_coco_256x192.py) | log / model |
39+
| MogaNet-XT | ImageNet-1K | 384x288 | 5.6M | 4.15G | 210 | | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_xt_coco_384x288.py) | log / model |
40+
| MogaNet-T | ImageNet-1K | 256x192 | 8.1M | 2.15G | 210 | 73.2 | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_t_coco_256x192.py) | log / model |
41+
| MogaNet-T | ImageNet-1K | 384x288 | 8.1M | 4.85G | 210 | 75.7 | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_t_coco_384x288.py) | log / model |
42+
| MogaNet-S | ImageNet-1K | 256x192 | 29.0M | 5.99G | 210 | 74.8 | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_s_coco_256x192.py) | log / model |
43+
| MogaNet-S | ImageNet-1K | 384x288 | 29.0M | 13.48G | 210 | 76.4 | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_s_coco_384x288.py) | log / model |
44+
| MogaNet-B | ImageNet-1K | 256x192 | 47.4M | 10.85G | 210 | 75.3 | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_b_coco_256x192.py) | log / model |
45+
| MogaNet-B | ImageNet-1K | 384x288 | 47.4M | 24.42G | 210 | 77.3 | [config](https://github.com/Westlake-AI/MogaNet/tree/main/pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_b_coco_384x288.py) | log / model |
4446

4547
## Training
4648

pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_b_coco_256x192.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
arch="base", # modify 'arch' for various architectures
3939
init_value=1e-5,
4040
frozen_stages=1,
41-
drop_path_rate=0.2,
41+
drop_path_rate=0.3,
4242
stem_norm_cfg=norm_cfg,
4343
conv_norm_cfg=norm_cfg,
4444
out_indices=(0, 1, 2, 3),

pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_b_coco_384x288.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
arch="base", # modify 'arch' for various architectures
3939
init_value=1e-5,
4040
frozen_stages=1,
41-
drop_path_rate=0.2,
41+
drop_path_rate=0.3,
4242
stem_norm_cfg=norm_cfg,
4343
conv_norm_cfg=norm_cfg,
4444
out_indices=(0, 1, 2, 3),

pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_s_coco_384x288.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
optimizer = dict(
88
type='Adam',
9-
lr=1e-3, # lr=5e-4,
9+
lr=1e-3,
1010
)
1111
optimizer_config = dict(grad_clip=None)
1212
# learning policy
@@ -38,7 +38,7 @@
3838
arch="small", # modify 'arch' for various architectures
3939
init_value=1e-5,
4040
frozen_stages=1,
41-
drop_path_rate=0.1,
41+
drop_path_rate=0.2,
4242
stem_norm_cfg=norm_cfg,
4343
conv_norm_cfg=norm_cfg,
4444
out_indices=(0, 1, 2, 3),

pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_t_coco_256x192.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
arch="tiny", # modify 'arch' for various architectures
3939
init_value=1e-5,
4040
frozen_stages=1,
41-
drop_path_rate=0.1,
41+
drop_path_rate=0.15,
4242
stem_norm_cfg=norm_cfg,
4343
conv_norm_cfg=norm_cfg,
4444
out_indices=(0, 1, 2, 3),

pose_estimation/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/moganet_t_coco_384x288.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
arch="tiny", # modify 'arch' for various architectures
3939
init_value=1e-5,
4040
frozen_stages=1,
41-
drop_path_rate=0.1,
41+
drop_path_rate=0.15,
4242
stem_norm_cfg=norm_cfg,
4343
conv_norm_cfg=norm_cfg,
4444
out_indices=(0, 1, 2, 3),
@@ -148,3 +148,6 @@
148148
pipeline=test_pipeline,
149149
dataset_info={{_base_.dataset_info}}),
150150
)
151+
152+
# fp16 settings
153+
fp16 = dict(loss_scale='dynamic')
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
_base_ = [
2+
'../../../../_base_/default_runtime.py',
3+
'../../../../_base_/datasets/coco.py'
4+
]
5+
evaluation = dict(interval=10, metric='mAP', save_best='AP')
6+
7+
optimizer = dict(
8+
type='Adam',
9+
lr=1e-3,
10+
)
11+
optimizer_config = dict(grad_clip=None)
12+
# learning policy
13+
lr_config = dict(
14+
policy='step',
15+
warmup='linear',
16+
warmup_iters=500,
17+
warmup_ratio=0.001,
18+
step=[170, 200])
19+
total_epochs = 210
20+
channel_cfg = dict(
21+
num_output_channels=17,
22+
dataset_joints=17,
23+
dataset_channel=[
24+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
25+
],
26+
inference_channel=[
27+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
28+
])
29+
30+
# model settings
31+
norm_cfg = dict(type='SyncBN', requires_grad=True)
32+
model = dict(
33+
type='TopDown',
34+
pretrained="https://github.com/Westlake-AI/MogaNet/releases/download/"
35+
"moganet-in1k-weights/moganet_xtiny_sz224_8xbs128_ep300.pth.tar",
36+
backbone=dict(
37+
type='MogaNet_feat',
38+
arch="x-tiny", # modify 'arch' for various architectures
39+
init_value=1e-5,
40+
frozen_stages=1,
41+
drop_path_rate=0.1,
42+
stem_norm_cfg=norm_cfg,
43+
conv_norm_cfg=norm_cfg,
44+
out_indices=(0, 1, 2, 3),
45+
),
46+
keypoint_head=dict(
47+
type='TopdownHeatmapSimpleHead',
48+
in_channels=192, # modify 'in_channels' for various architectures
49+
out_channels=channel_cfg['num_output_channels'],
50+
in_index=3,
51+
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
52+
train_cfg=dict(),
53+
test_cfg=dict(
54+
flip_test=True,
55+
post_process='default',
56+
shift_heatmap=True,
57+
modulate_kernel=11))
58+
59+
data_cfg = dict(
60+
image_size=[192, 256],
61+
heatmap_size=[48, 64],
62+
num_output_channels=channel_cfg['num_output_channels'],
63+
num_joints=channel_cfg['dataset_joints'],
64+
dataset_channel=channel_cfg['dataset_channel'],
65+
inference_channel=channel_cfg['inference_channel'],
66+
soft_nms=False,
67+
nms_thr=1.0,
68+
oks_thr=0.9,
69+
vis_thr=0.2,
70+
use_gt_bbox=False,
71+
det_bbox_thr=0.0,
72+
bbox_file='data/coco/person_detection_results/'
73+
'COCO_val2017_detections_AP_H_56_person.json',
74+
)
75+
76+
train_pipeline = [
77+
dict(type='LoadImageFromFile'),
78+
dict(type='TopDownGetBboxCenterScale', padding=1.25),
79+
dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3),
80+
dict(type='TopDownRandomFlip', flip_prob=0.5),
81+
dict(
82+
type='TopDownHalfBodyTransform',
83+
num_joints_half_body=8,
84+
prob_half_body=0.3),
85+
dict(
86+
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
87+
dict(type='TopDownAffine'),
88+
dict(type='ToTensor'),
89+
dict(
90+
type='NormalizeTensor',
91+
mean=[0.485, 0.456, 0.406],
92+
std=[0.229, 0.224, 0.225]),
93+
dict(type='TopDownGenerateTarget', sigma=2),
94+
dict(
95+
type='Collect',
96+
keys=['img', 'target', 'target_weight'],
97+
meta_keys=[
98+
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
99+
'rotation', 'bbox_score', 'flip_pairs'
100+
]),
101+
]
102+
103+
val_pipeline = [
104+
dict(type='LoadImageFromFile'),
105+
dict(type='TopDownGetBboxCenterScale', padding=1.25),
106+
dict(type='TopDownAffine'),
107+
dict(type='ToTensor'),
108+
dict(
109+
type='NormalizeTensor',
110+
mean=[0.485, 0.456, 0.406],
111+
std=[0.229, 0.224, 0.225]),
112+
dict(
113+
type='Collect',
114+
keys=['img'],
115+
meta_keys=[
116+
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
117+
'flip_pairs'
118+
]),
119+
]
120+
121+
test_pipeline = val_pipeline
122+
123+
data_root = 'data/coco'
124+
data = dict(
125+
samples_per_gpu=32,
126+
workers_per_gpu=2,
127+
val_dataloader=dict(samples_per_gpu=32),
128+
test_dataloader=dict(samples_per_gpu=32),
129+
train=dict(
130+
type='TopDownCocoDataset',
131+
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
132+
img_prefix=f'{data_root}/train2017/',
133+
data_cfg=data_cfg,
134+
pipeline=train_pipeline,
135+
dataset_info={{_base_.dataset_info}}),
136+
val=dict(
137+
type='TopDownCocoDataset',
138+
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
139+
img_prefix=f'{data_root}/val2017/',
140+
data_cfg=data_cfg,
141+
pipeline=val_pipeline,
142+
dataset_info={{_base_.dataset_info}}),
143+
test=dict(
144+
type='TopDownCocoDataset',
145+
ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
146+
img_prefix=f'{data_root}/val2017/',
147+
data_cfg=data_cfg,
148+
pipeline=test_pipeline,
149+
dataset_info={{_base_.dataset_info}}),
150+
)
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
optimizer = dict(
88
type='Adam',
9-
lr=1e-3, # lr=5e-4,
9+
lr=1e-3,
1010
)
1111
optimizer_config = dict(grad_clip=None)
1212
# learning policy
@@ -32,20 +32,20 @@
3232
model = dict(
3333
type='TopDown',
3434
pretrained="https://github.com/Westlake-AI/MogaNet/releases/download/"
35-
"moganet-in1k-weights/moganet_small_sz224_8xbs128_ep300.pth.tar",
35+
"moganet-in1k-weights/moganet_xtiny_sz224_8xbs128_ep300.pth.tar",
3636
backbone=dict(
3737
type='MogaNet_feat',
38-
arch="small", # modify 'arch' for various architectures
38+
arch="x-tiny", # modify 'arch' for various architectures
3939
init_value=1e-5,
4040
frozen_stages=1,
41-
drop_path_rate=0.2,
41+
drop_path_rate=0.1,
4242
stem_norm_cfg=norm_cfg,
4343
conv_norm_cfg=norm_cfg,
4444
out_indices=(0, 1, 2, 3),
4545
),
4646
keypoint_head=dict(
4747
type='TopdownHeatmapSimpleHead',
48-
in_channels=512, # modify 'in_channels' for various architectures
48+
in_channels=192, # modify 'in_channels' for various architectures
4949
out_channels=channel_cfg['num_output_channels'],
5050
in_index=3,
5151
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
@@ -148,6 +148,3 @@
148148
pipeline=test_pipeline,
149149
dataset_info={{_base_.dataset_info}}),
150150
)
151-
152-
# # fp16 settings
153-
# fp16 = dict(loss_scale='dynamic')

train.py

+2
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,8 @@
181181
help='patience epochs for Plateau LR scheduler (default: 10')
182182
parser.add_argument('--decay_rate', '--dr', type=float, default=0.1, metavar='RATE',
183183
help='LR decay rate (default: 0.1)')
184+
parser.add_argument('--update_freq', type=int, default=1,
185+
help='gradient accumulation intervals (default: 1)')
184186

185187
# Augmentation & regularization parameters
186188
parser.add_argument('--no_aug', action='store_true', default=False,

0 commit comments

Comments
 (0)