| Author | SHA1 | Message | Date |
|---|---|---|---|
|
|
ad0a705489 |
!2952 udpate release notes for r0.3.1
Merge pull request !2952 from guozhijian/update_r0.3.1_release_info |
5 years ago |
|
|
d3fbc1523b | update r0.3.1 release notes | 5 years ago |
|
|
1c9ed09fd0 |
!2812 Remove submodule akg.
Merge pull request !2812 from TronZhang/r0.3_no_akg |
5 years ago |
|
|
1ccedcde65 |
!2826 add libtiff notice info to r0.3
Merge pull request !2826 from xulei/r0.3 |
5 years ago |
|
|
921e7de987 | add libtiff notice to r0.3 | 5 years ago |
|
|
d5d9e92336 | remove submodule akg | 5 years ago |
|
|
9343746ef7 |
!2607 Modify long description format of whl package
Merge pull request !2607 from zhoufeng/modify-long-description-format |
5 years ago |
|
|
e2593466fc |
Modify long description format of whl package
Signed-off-by: zhoufeng <zhoufeng54@huawei.com> |
5 years ago |
|
|
2e5a76e0df |
!2606 Update version to 0.3.1
Merge pull request !2606 from zhoufeng/update-version-to-0.3.1 |
5 years ago |
|
|
067b619034 |
Update version to 0.3.1
Signed-off-by: zhoufeng <zhoufeng54@huawei.com> |
5 years ago |
|
|
5e5c66e300 |
!2540 Move LayerNormGrad split pass ahead of kernel select
Merge pull request !2540 from huanghui/r0.3 |
5 years ago |
|
|
d8969d243e |
!2568 update run_train.sh of mobilenetv2_quant && resnet50_quant
Merge pull request !2568 from wandongdong/r0.3 |
5 years ago |
|
|
f03e88c26f | update run_train.sh | 5 years ago |
|
|
8c30045178 |
!2553 add mindrecord to mobilenetv2_quant && resnet50_quant
Merge pull request !2553 from wandongdong/r0.3 |
5 years ago |
|
|
095d86e16f |
!2556 fix: change field name from 'data' to 'image' - sync
Merge pull request !2556 from guozhijian/fix_field_to_image_in_mindrecord |
5 years ago |
|
|
862bc22b38 | fix: change field name from data to image in mindrecord for imagenet | 5 years ago |
|
|
a6590d1866 | add mindrecord | 5 years ago |
|
|
3f0a350d68 |
!2510 THOR ops modified
Merge pull request !2510 from zongha/r0.3 |
5 years ago |
|
|
e936d5cd4b | place layernormgrad split pass before kernel select | 5 years ago |
|
|
46700bec69 |
!2500 add output activation quant in mobilenetv2 and resnet50
Merge pull request !2500 from chenzupeng/r0.3 |
5 years ago |
|
|
0920094f81 |
fine img2col_impl
2nd 3rd |
5 years ago |
|
|
bf0673003b | add dense quant activation fake | 5 years ago |
|
|
3a40ac6521 |
!2435 fix perchannel num_channels not set bug and adjust quant.py params order
Merge pull request !2435 from 王东旭/r0.3 |
5 years ago |
|
|
f110c7616b | fix perchannel num_channels not set bug and adjust quant.py params order | 5 years ago |
|
|
3e3cbbba0f |
!2447 asyn save checkpoint to file
Merge pull request !2447 from changzherui/asyn_ckpt_r0.3 |
5 years ago |
|
|
e368d0524b |
!2455 add perchannel quant train
Merge pull request !2455 from chenzupeng/r0.3 |
5 years ago |
|
|
966f05231d | asyn save checkpoint to file merge to r0.3 | 5 years ago |
|
|
e9ee59c7ad | add perchannel quant train | 5 years ago |
|
|
4bbd4414c4 |
!1734 deal with resnet50_THOR train print many ERROR&WARNING log and produce many ir file
Merge pull request !1734 from zongha/r0.3 |
5 years ago |
|
|
cf7c60a5ed |
!2431 update README
Merge pull request !2431 from panfengfeng/update_readme |
5 years ago |
|
|
87cc57d3aa | update readme | 5 years ago |
|
|
2d35511d7c |
!2423 Adapt module akg's change
Merge pull request !2423 from TronZhang/r0.3 |
5 years ago |
|
|
fded8732ea |
!2422 reshuffle all data and shard again whe use MindDataset distribute
Merge pull request !2422 from guozhijian/add_full_reshuffle_per_epoch |
5 years ago |
|
|
109a21d520 | Adapt change of module akg | 5 years ago |
|
|
0f380b559e | enhance: add full reshuffle per epoch and fix: random_device failed | 5 years ago |
|
|
e519317622 |
!2407 change Q.BNTrainingReduce() to P.BNTrainingReduce()
Merge pull request !2407 from chenzhongming/r0.3 |
5 years ago |
|
|
2fab2492bc | change Q.BNTrainingReduce() to P.BNTrainingReduce() | 5 years ago |
|
|
11caa3aec8 |
!2340 fix random_crop_resize_2
Merge pull request !2340 from panfengfeng/fix_random_crop_resize_2 |
5 years ago |
|
|
25827a8619 | fix random_crop_and_resize | 5 years ago |
|
|
91c856e5ee |
!2334 remove dataset send from data exec for r0.3
Merge pull request !2334 from wangnan39/do_not_send_data_duriing_model_init |
5 years ago |
|
|
20049bbea6 | send data after model init | 5 years ago |
|
|
cb6211f25d |
!2291 remove _quant_op.py from __init__.py
Merge pull request !2291 from chenzhongming/r0.3 |
5 years ago |
|
|
24d61337c0 |
!2302 improve summary performance
Merge pull request !2302 from Margaret_wangrui/r0.3 |
5 years ago |
|
|
69b32e4dca | improve summary performance | 5 years ago |
|
|
9be52e0a1b | remove _quant_op.py from __init__.py | 5 years ago |
|
|
53d7e622f9 |
!2261 modify log level from warning to info
Merge pull request !2261 from jjfeing/r0.3_log |
5 years ago |
|
|
1127ace7ec |
!2228 cache get_dataset_size value
Merge pull request !2228 from yanghaitao/yht_get_dataset_size |
5 years ago |
|
|
ab39708929 |
!2099 fix summary nodes memory reuse refcount
Merge pull request !2099 from laiyongqiang/r0.3_summary |
5 years ago |
|
|
147d0cde07 |
!2277 fix arithmetic simplify
Merge pull request !2277 from xianwz/r0.3 |
5 years ago |
|
|
9c70861343 | fix arithmetic simplify | 5 years ago |
|
|
e78e819b7c | modify log level from warning to info | 5 years ago |
|
|
f3bb991ce9 |
!2232 split correction_mul ops
Merge pull request !2232 from wandongdong/r0.3 |
5 years ago |
|
|
dcb90588b0 |
!2248 bind summary nodes to KernelGraph in order to memory reuse
Merge pull request !2248 from Margaret_wangrui/r0.3 |
5 years ago |
|
|
c742384a39 | split correction_mul op | 5 years ago |
|
|
038040750d | store get dataset size | 5 years ago |
|
|
476671b1cf |
!2196 fix log level too high: conversion of const tensor is normal
Merge pull request !2196 from TronZhang/fix_log_level |
5 years ago |
|
|
c749f513ac |
!2195 【r0.3 branch】fix FakeQuantPerLayer/FakeQuantPerLayerGrad symmetric=True calculation error bug
Merge pull request !2195 from 王东旭/r0.3 |
5 years ago |
|
|
7995189c72 | fix FakeQuantPerLayer/FakeQuantPerLayerGrad symmetric bug and remove BNTrainingReduceGrad/BNTrainingUpdateGrad | 5 years ago |
|
|
6f5303f0d9 | bind summary nodes to KernelGraph in order to memory reuse | 5 years ago |
|
|
ac7197d33e | fix log level: const tensor conversion is normal | 5 years ago |
|
|
8d0691aaf9 | fix summary nodes memory reuse refcount | 5 years ago |
|
|
1e90e7be05 |
!2172 fix some info
Merge pull request !2172 from guozhijian/sync_fix_info |
5 years ago |
|
|
5e2953247f | fix: verify info | 5 years ago |
|
|
ff500c678e |
!2122 add set_dataset_size for MindDataset
Merge pull request !2122 from guozhijian/add_set_dataset_size_for_minddataset |
5 years ago |
|
|
488b74e92f | 1. add set_dataset_size for MindDataset 2. modify parameter dupe_factor from 5 to 10 | 5 years ago |
|
|
fba21459a7 |
!2115 change readme.md
Merge pull request !2115 from chenzhongming/r0.3 |
5 years ago |
|
|
6d04e1a8e5 |
!2115 change readme.md
Merge pull request !2115 from chenzhongming/r0.3 |
5 years ago |
|
|
d6bd690d34 | change readme.md | 5 years ago |
|
|
9fc00ca521 |
!2031 add sync bewteen hcom
Merge pull request !2031 from gukecai/add-event-for-hcom |
5 years ago |
|
|
7c77bb8782 |
!2104 change mobilenet V2 readme.md
Merge pull request !2104 from chenzhongming/r0.3 |
5 years ago |
|
|
077d21f055 | change mobilenet V2 readme. | 5 years ago |
|
|
da9530f7f7 |
!2090 resnet quant dataset aug change
Merge pull request !2090 from panfengfeng/resnet_quant_data_aug_change |
5 years ago |
|
|
690db9a515 | resnet_quant data aug change | 5 years ago |
|
|
653519630a |
!2079 Feat(GraphKernel): Init GraphKernel.
Merge pull request !2079 from gongchen/r0.3_graph_kernel_clean |
5 years ago |
|
|
3c2f4df87c |
!2087 data aug changes from c to py
Merge pull request !2087 from panfengfeng/mobilenetv2_data_aug_change |
5 years ago |
|
|
62fae9befa |
!2082 MindDataset with padded mode print reshuffle error info
Merge pull request !2082 from guozhijian/fix_padded_log_error |
5 years ago |
|
|
e20d687e7a | using py_transform for data aug. | 5 years ago |
|
|
23d103a122 |
!2085 remove unused code in quant train
Merge pull request !2085 from chenzupeng/r0.3 |
5 years ago |
|
|
52a90f2587 | remove unused code in quant train | 5 years ago |
|
|
e21a0aad69 |
!2073 add resnet50 quant model
Merge pull request !2073 from wandongdong/r0.3 |
5 years ago |
|
|
13a2d6d49e |
Init GraphKernel.
- It provides a unified style to express graph and kernel for user. - It provides a unified IR to represent graph and kernel for developer. - It breaks the boundary between graph and kernel. - It provides more opportunities to do compile optimization. |
5 years ago |
|
|
f3ebc7319c | fix: MindDataset padded log error | 5 years ago |
|
|
df65f16812 | add resnet50_quant | 5 years ago |
|
|
dc9a51aad5 |
!2070 adapt quantization aware train for r0.3
Merge pull request !2070 from chenzupeng/r0.3 |
5 years ago |
|
|
cc497424fc | adapt for mobilenetV2 quantization awared train in r0.3 | 5 years ago |
|
|
b3f09b1d45 |
!1995 remove the useless transdata and cast connected with control depend
Merge pull request !1995 from lianliguang/r0.3 |
5 years ago |
|
|
f05da3aae9 |
!1948 fix resnet50 distribute bug
Merge pull request !1948 from zhaoting/r0.3 |
5 years ago |
|
|
0ac5911910 | remove the useless transdata and cast connected with control depend | 5 years ago |
|
|
fb65a1a929 |
!2049 update mobilenetv2 scripts
Merge pull request !2049 from panfengfeng/update_mobilenetv2_codes |
5 years ago |
|
|
7d965477a1 |
!2041 add mobilenetC2 quant
Merge pull request !2041 from chenzhongming/r0.3 |
5 years ago |
|
|
68c3c73fab | update mobilenetV2 dataset codes | 5 years ago |
|
|
7ffcc606c9 |
!2035 add example for zhwiki, CLUERNER2020 and enwiki to mindrecord
Merge pull request !2035 from guozhijian/add_zhwiki_enwiki_preprocess |
5 years ago |
|
|
aa4c4f51ac |
!2025 fix remove reshape pair pass
Merge pull request !2025 from liubuyu/r0.3 |
5 years ago |
|
|
854e16f0f8 |
!2033 fix mindrecord seekg failed
Merge pull request !2033 from guozhijian/fix_seekg_failed |
5 years ago |
|
|
60dc921186 | add mobilenetC2 quant | 5 years ago |
|
|
16e9da5ae5 | enhance: add example for zhwiki, CLUERNER2020 and enwiki to mindrecord | 5 years ago |
|
|
a48a97208b | fix: mindrecord seekg failed when shift raw page | 5 years ago |
|
|
e3145f18b0 | fix remove reshape pair pass | 5 years ago |
|
|
c4abebafcc | add sync bewteen hcom | 5 years ago |
|
|
0e4fab2368 |
!2011 fake quant debug
Merge pull request !2011 from chenzhongming/r0.3 |
5 years ago |
|
|
5a26546b56 | fake quant debug | 5 years ago |
|
|
a40e9e6fae |
!2001 fix MindDataset distribute shuffle error
Merge pull request !2001 from guozhijian/fix_MindDataset_distribute_bug |
5 years ago |
|
|
07f7d1ae62 | fix: MindDataset distribute shuffle bug | 5 years ago |
|
|
9944abe99d |
!1963 bug fix in fake quant training in r0.3
Merge pull request !1963 from chenzhongming/r0.3 |
5 years ago |
|
|
bb58ea35b9 | bug fix in fake quant training in r0.3 | 5 years ago |
|
|
eaaacfea4c |
!1941 Add order function in group params in r0.3
Merge pull request !1941 from ghzl/v3-add-oder-parameters-in-group-functions |
5 years ago |
|
|
676e717edf |
!1952 use VisitKernelWithReturnType instead of VisitKernel to get node's input in mem_reuse
Merge pull request !1952 from laiyongqiang/r0.3mem |
5 years ago |
|
|
9bdf017379 | use VisitKernelWithReturnType instead of VisitKernel to get node's input | 5 years ago |
|
|
b37184050f | fix resnet50 distribute bug | 5 years ago |
|
|
4d92e2b579 |
Revert "Revert "add pattern AdjustAllReduceMulAdduse the old opadd test case for bugtemp fix try""
This reverts commit
|
5 years ago |
|
|
ba125f9673 |
!1925 bug fix in fake quant
Merge pull request !1925 from chenzhongming/r0.3 |
5 years ago |
|
|
e0fa277a05 | fix bug in fake quant grad | 5 years ago |
|
|
eac1f93ee4 |
!1889 add dropout special kernel selected rules
Merge pull request !1889 from lianliguang/r0.3 |
5 years ago |
|
|
40e1e3843f |
!1894 fix lars weight decay computation error
Merge pull request !1894 from gziyan/upstream/r0.3 |
5 years ago |
|
|
fdb2a915b9 | fix weight decay in lars | 5 years ago |
|
|
159119cb2a | add dropout special kernel selected rules | 5 years ago |
|
|
f213c3a6ad | add order function in group params | 5 years ago |
|
|
1f34378b9c |
!1837 [MD] support padding samples in minddataset
Merge pull request !1837 from liyong126/r0.3_mindrecord_padded_samples |
5 years ago |
|
|
d915d46d79 | pad samples in mindrecord | 5 years ago |
|
|
6ce8a4ab20 |
!1836 update register info of BiasAddGrad and modify adam optimizer&softmax_grad to match fusion rules
Merge pull request !1836 from shibeiji/r0.3 |
5 years ago |
|
|
188c9feca4 | update register info of BiasAddGrad and modify adam optimizer&softmax_grad to match fusion rules | 5 years ago |
|
|
29deeca343 |
!1818 Add SoftmaxGradExt fusion pass from master to r0.3
Merge pull request !1818 from huanghui/r0.3-softmaxgradext |
5 years ago |
|
|
cc582f5e30 | add SoftmaxGradExt fusion pass | 5 years ago |
|
|
bbdc44a0cc |
!1646 reorder independent nodes for stream parallel
Merge pull request !1646 from gukecai/r0.3 |
5 years ago |
|
|
c8d31b0889 |
!1754 Add 5 patterns for AdamApplyOneWithDecay fusion pass
Merge pull request !1754 from huanghui/r0.3 |
5 years ago |
|
|
3baa52717f |
!1795 fix compile bugs in mobilenetv2 quant aware training for r0.3
Merge pull request !1795 from wandongdong/r0.3 |
5 years ago |
|
|
5485976f61 | fix compile bugs for quant | 5 years ago |
|
|
2109bb68b3 |
!1756 modify widedeep
Merge pull request !1756 from wukesong/r0.3-add-widedeep |
5 years ago |
|
|
07166d11af |
!1751 fixed SoftmaxGradExt
Merge pull request !1751 from jiangjinsheng/r0.3 |
5 years ago |
|
|
05afc22ffa | add newly 5 patterns for AdamApplyOneWithDecayRule fusion pass | 5 years ago |
|
|
bb4b06946f | modify widedeep | 5 years ago |
|
|
022d391e3c | fixed SoftmaxGradExt | 5 years ago |
|
|
4cff81ee2d |
!1733 change some settings in SSD
Merge pull request !1733 from zhaoting/SSD_t |
5 years ago |
|
|
ac12df82d2 | change some settings in SSD | 5 years ago |
|
|
9cb129ac99 |
!1720 add reducemean's special kernel fileter rule
Merge pull request !1720 from lianliguang/r0.3 |
5 years ago |
|
|
5adcbf6e23 |
!1727 move add graph manager to gpu session
Merge pull request !1727 from zyli2020/r0.3 |
5 years ago |
|
|
491ba51b8b |
set save graphs False and add bprop for op cholesky trsm
Change thor log level |
5 years ago |
|
|
db6bb720df |
!1716 fix bug introduced by gpu support
Merge pull request !1716 from gengdongjie/r0.3 |
5 years ago |
|
|
c0aa7602e0 | move add graph manager to gpu session | 5 years ago |
|
|
ba48964f2a | add reduce mean kernel filter function | 5 years ago |
|
|
0a4a449e8f |
!1711 fix log1p
Merge pull request !1711 from jiangjinsheng/r0.3 |
5 years ago |
|
|
4f50cb3a9b | fix bug introduced by gpu support | 5 years ago |
|
|
5d0cc35792 |
!1567 lstm&transpose_r0.3
Merge pull request !1567 from baihuawei/lstm_r0.3 |
5 years ago |
|
|
b85c310ea1 | add lstm & transpose | 5 years ago |
|
|
a9de8012df | fixed log1p | 5 years ago |
|
|
d85262e03c |
!1686 update r0.3 relase notes
Merge pull request !1686 from guozhijian/update_r0.3_release_notes_branch |
5 years ago |
|
|
22158fc703 | update r0.3 release notes and install path | 5 years ago |
|
|
e3a7f8f21c |
!1698 bugfix:get nullptr from graph manager
Merge pull request !1698 from zyli2020/r0.3 |
5 years ago |
|
|
df04230e13 | fix get nullptr when use graph manager | 5 years ago |
|
|
20d26b17f8 |
!1684 dataset: repair get_sampler_size problem
Merge pull request !1684 from ms_yan/r0.3_sampler |
5 years ago |
|
|
85012ceedd |
!1677 TopK fusion pass bug fix
Merge pull request !1677 from linqingke/r0.3 |
5 years ago |
|
|
527f1d70ce |
!1680 fix resource release bug of memory swap
Merge pull request !1680 from zyli2020/r0.3 |
5 years ago |
|
|
cab2612c23 |
!1662 fix get_dataset_size error for GeneratorDataset
Merge pull request !1662 from yanghaitao/yht_generator_get_dataset_size_r0.3 |
5 years ago |
|
|
a339fac777 | topk bug fix | 5 years ago |
|
|
723c66bb66 |
!1683 modify dataset.py and add autp parallel split
Merge pull request !1683 from wanghua/r0.3 |
5 years ago |
|
|
298ff4adc1 | modify dataset.py and add autp parallel split | 5 years ago |
|
|
09fd47a256 | repair get_sampler_size problem | 5 years ago |
|
|
e0510928f1 |
!1676 GPU fix resnet script
Merge pull request !1676 from VectorSL/r0.3 |
5 years ago |
|
|
9205271347 |
!1671 Add DeepLabV3 network
Merge pull request !1671 from z00378171/r0.3 |
5 years ago |
|
|
13bda4caf1 | fix resource release bug of memory swap | 5 years ago |
|
|
8d68bd874e | reorder independent nodes | 5 years ago |
|
|
60dadd6d21 | gpu fix resnet script | 5 years ago |
|
|
0e4574af6b |
!1656 fix bug for mobilenet in model_zoo
Merge pull request !1656 from SanjayChan/r0.3 |
5 years ago |
|
|
3ae2f8d12c |
!1664 revert parameter set kernel build info
Merge pull request !1664 from lianliguang/r0.3 |
5 years ago |
|
|
a74e238e21 |
!1664 revert parameter set kernel build info
Merge pull request !1664 from lianliguang/r0.3 |
5 years ago |
|
|
0a97cb8acd | add deeplabv3 to model zoo | 5 years ago |
|
|
344f2ef4df | revert don't set parameter's format when it's has been setted before | 5 years ago |
|
|
6f3758f313 |
!1657 add readme
Merge pull request !1657 from zongha/r0.3 |
5 years ago |
|
|
1187411af1 | a | 5 years ago |
|
|
f7acf0ed6f |
!1633 modify ssd script for merging backbone
Merge pull request !1633 from chengxb7532/r0.3 |
5 years ago |
|
|
e658eb7f24 | bug fix | 5 years ago |
|
|
5cb99aadf5 |
!1645 ModelZoo WideDeep r0.3
Merge pull request !1645 from yao_yf/WideDeep_ModelZoo_r0.3 |
5 years ago |
|
|
dc5b04846f |
!1566 sync lstm ops code from master to r0.3
Merge pull request !1566 from sunsuodong/r0.3_lstmops |
5 years ago |
|
|
72a166ff8c |
!1624 Remove WARNING log in pynative mode
Merge pull request !1624 from caifubi/r0.3 |
5 years ago |
|
|
894e329218 | add the readme | 5 years ago |
|
|
4ac88b6bcc | modelzoo_widedeep_r0.3 | 5 years ago |
|
|
67fecef6a8 |
!1651 GPU fix example scripts resnet r0.3
Merge pull request !1651 from VectorSL/r0.3 |
5 years ago |
|
|
c32c17bbad |
!1644 dataset: re-fix some format problem in take and split
Merge pull request !1644 from ms_yan/r0.3_doc |
5 years ago |
|
|
a9db68db3a | fix gpu resnet script | 5 years ago |
|
|
4fb3ab7882 | modify ssd script for merging backbone | 5 years ago |
|
|
b2f0135224 |
!1629 add cpu stridedslice
Merge pull request !1629 from kisnwang/r0.3 |
5 years ago |
|
|
5a1fba5103 | repair api format problem | 5 years ago |
|
|
69dd996278 |
!1620 Add protection in cross entropy kernel.
Merge pull request !1620 from ZPaC/r0.3 |
5 years ago |
|
|
9dd3c1f77d |
!1621 upload fasterrcnn scripts
Merge pull request !1621 from meixiaowei/r0.3 |
5 years ago |
|
|
ba39d53c22 | sync lstm ops code from master to r0.3 | 5 years ago |
|
|
b47847167d |
!1630 Add DeepFM scripts
Merge pull request !1630 from yangyongjie/r0.3 |
5 years ago |
|
|
41e179cc51 |
!1640 Fix lenet hang problem on windows
Merge pull request !1640 from xiefangqi/fix_lenet_windows_hang |
5 years ago |
|
|
01d9ce3e5d |
!1622 change mobilenet file struct.
Merge pull request !1622 from SanjayChan/r0.3 |
5 years ago |
|
|
2c42665e90 | fix lenet hang problem on windows | 5 years ago |
|
|
803a91596a |
!1614 LSTM network adapt to cpu target.
Merge pull request !1614 from caojian05/ms_r0.3_dev |
5 years ago |
|
|
8f79f0cce8 | add DeepFM | 5 years ago |
|
|
808d5947d5 | add cpu strided slice | 5 years ago |
|
|
9274daec9c |
!1610 fix subset random sampler error
Merge pull request !1610 from yanghaitao/yht_subsetrandomsampler_r0.3 |
5 years ago |
|
|
ce57e02db3 |
!1562 don't set parameter's format when it's has been setted before
Merge pull request !1562 from lianliguang/r0.3 |
5 years ago |
|
|
07724c7080 |
!1608 add get_dataset_size for CelebADataset
Merge pull request !1608 from yanghaitao/yht_celeba_get_dataset_size_r0.3 |
5 years ago |
|
|
9853294aaa | change mobilenet struct | 5 years ago |
|
|
6c491b8d3e | Only release runtime resource in GRAPH_MODE | 5 years ago |
|
|
24fb17895a | upload fasterrcnn scripts | 5 years ago |
|
|
42641f17ab | Add protection in cross entropy kernel. | 5 years ago |
|
|
a8efea5c81 |
!1588 GPU upadate resnet50 script in example
Merge pull request !1588 from VectorSL/r0.3 |
5 years ago |
|
|
600d052ac1 | LSTM network adapt to cpu target. | 5 years ago |
|
|
6599cc1aca |
!1579 recitify pretrained path and revert AdjustAllReduceMulAdduse
Merge pull request !1579 from gengdongjie/r0.3 |
5 years ago |
|
|
a14be2254b |
!1594 refine data copy in multi-graph
Merge pull request !1594 from zyli2020/r0.3 |
5 years ago |
|
|
1289c3e4db |
!1592 bug fix while evaluation
Merge pull request !1592 from SanjayChan/r0.3 |
5 years ago |
|
|
b70b2da675 |
!1582 add topk and randomchoicewithmask op data type for aicpu
Merge pull request !1582 from yanzhenxiang2020/r03_add_datatype |
5 years ago |
|
|
02914ba0b9 |
!1581 fix flatten grad error with reshape
Merge pull request !1581 from zhaozhenlong/fix-issue-flatten-grad |
5 years ago |
|
|
085d8f1233 | don't set parameter's format when it's has been setted before | 5 years ago |
|
|
a379c668f5 | fix subsetrandomsampler | 5 years ago |
|
|
00a4e188b7 |
!1590 dataset: fix some format problem in take and split
Merge pull request !1590 from ms_yan/r0.3_format |
5 years ago |
|
|
415afe09f5 | add get_dataset_size to celebadataset | 5 years ago |
|
|
94872b7678 |
!1570 Check the size of topk input names before converting input to attr
Merge pull request !1570 from YuJianfeng/r0.3 |
5 years ago |
|
|
2f936166c9 |
!1575 VocDataset support split ops
Merge pull request !1575 from xiefangqi/md_voc_support_split |
5 years ago |
|
|
76befd5703 |
!1577 fix reshape reshape case in auto parallel for r0.3
Merge pull request !1577 from yao_yf/fix_reshape_reshape_r0.3_ |
5 years ago |
|
|
78909200ed |
!1589 fix bert performance
Merge pull request !1589 from chenhaozhe/fix-bert-in-r0.3 |
5 years ago |
|
|
32dbbc1de2 | refine data copy in multi-graph | 5 years ago |
|
|
97610885d0 | bug fix while evaluation | 5 years ago |
|
|
27712eafaf | repair some format problem in API | 5 years ago |
|
|
04bc2a938e | fix performance of bert | 5 years ago |
|
|
b5ce6c55a5 | gpu update example resnet | 5 years ago |
|
|
63479f8e7c |
!1574 fix tfreadop hang
Merge pull request !1574 from yanghaitao/yht_tfreadop_equal_rows_hang_r0.3 |
5 years ago |
|
|
6cd15ea553 | use reshape as flatten grad | 5 years ago |
|
|
d5af2f23b2 | add topk and randomchoicewithmask data type for aicpu | 5 years ago |
|
|
217d801c12 | bugfix for resnet50_imagenet pretrained_ckpt | 5 years ago |
|
|
135e90b135 |
Revert "add pattern AdjustAllReduceMulAdduse the old opadd test case for bugtemp fix try"
This reverts commit
|
5 years ago |
|
|
431bc8bf4b |
!1553 change hook function grad input to tuple
Merge pull request !1553 from wangqiuliang/r0.3 |
5 years ago |
|
|
771a88d490 |
!1569 fix multi-graph run out of device resource
Merge pull request !1569 from caifubi/r0.3 |
5 years ago |
|
|
b298c515a6 |
!1559 Voc dataset support split ops
Merge pull request !1559 from xiefangqi/xfq_voc_support_split |
5 years ago |
|
|
dcb91b0ef6 | fix reshape reshape case in auto parallel | 5 years ago |
|
|
f30928f084 | fix tfreaderop hang | 5 years ago |
|
|
1fb2cce274 | Check the size of topk input names before converting input to attr | 5 years ago |
|
|
f6ad679ef9 | fix multi-graph device resource run out bug | 5 years ago |
|
|
0f22140331 |
!1548 [session]make manager for every graph
Merge pull request !1548 from chenfei_mindspore/r0.3 |
5 years ago |
|
|
a5e66e159e | change hook grad input to tuple | 5 years ago |
|
|
fce296eb38 | make manager for every graph | 5 years ago |
|
|
e5c45bd339 |
!1538 add custom tbe ops for quant aware training
Merge pull request !1538 from wandongdong/r0.3 |
5 years ago |
|
|
af1fde399b |
!1509 dataset: PR1457 fix 3 bug reports for split
Merge pull request !1509 from ms_yan/r0.3_split |
5 years ago |
|
|
1deb091c0f |
!1529 support tensor set item the number value type is similar as tensor dtype
Merge pull request !1529 from zhangbuxue/support_tensor_setitem_the_number_value_type_and_tensor_dtype_not_same |
5 years ago |
|
|
0a52fd052b | add custom tbe ops for quant aware training | 5 years ago |
|
|
cf20b3443c |
!1514 fix ssd run failed problem
Merge pull request !1514 from chengxb7532/r0.3 |
5 years ago |
|
|
a0e552e75c |
!1524 fix compilation order
Merge pull request !1524 from panfengfeng/fix_compilation_order |
5 years ago |
|
|
ffdb11f548 |
!1526 Move graph_map_schema.py to example directory
Merge pull request !1526 from heleiwang/r0.3_mv |
5 years ago |
|
|
fac36e6a1a |
!1527 THOR ops master -> r0.3
Merge pull request !1527 from zongha/r0.3 |
5 years ago |
|
|
642761c2b1 |
adapte Second order optimization ops
for thor ops for impl of 2nd-order and format for format for pylint 2nd for pylint 3rd for pylint 4th for pylint 5th for pylint nth for comments for debug for DEBUG for DEBUG for DEBUG for DEBUG for well performance for pylint for te chip for pylint for pylint nth for modification of comments |
5 years ago |
|
|
cfe87d9563 |
!1519 [Data]Updated UA, RandSharp and RandColor parameter check, Updated UA code and description.
Merge pull request !1519 from xulei/r0.3 |
5 years ago |
|
|
5ab32c33e4 | support tensor set item the number value type is similar as tensor dtype not same | 5 years ago |
|
|
e056799467 |
!1517 Add check for empty group parameters
Merge pull request !1517 from ghzl/incubator-add-check-group-params-empty |
5 years ago |
|
|
2be75b0c74 | mv graph_map_schema.py to example | 5 years ago |
|
|
d0d7864ccc | fix compilation order | 5 years ago |
|
|
9228384304 |
fixed bug for split, RandomSampler and some other cleanup
Cleanup dataset UT: restore config support |
5 years ago |
|
|
bdd9aec368 |
!1463 Updated UA, RandSharp and RandColor parameter check, Updated UA code and description.
Merge pull request !1463 from alashkari/ua-ops-v2 |
5 years ago |
|
|
75c1e7f6af | add check for group parameters | 5 years ago |
|
|
c6f309e125 |
!1507 remove print
Merge pull request !1507 from zhangbuxue/remove_print |
5 years ago |
|
|
c77ac8aa0b | add mobilenet file for ssd net | 5 years ago |
|
|
c821cc1ebb | remove print | 5 years ago |
|
|
6be8929f62 |
!1496 revert decoupled of 1313
Merge pull request !1496 from lianliguang/revert_decoupled |
5 years ago |
|
|
f51a745931 |
!1486 add train and eval script for LSTM
Merge pull request !1486 from caojian05/ms_r0.3_dev |
5 years ago |
|
|
d3c848fc09 |
Revert "!1313 decoupled of insert transdata and deal ref and split transdata"
This reverts commit 9ceea1263655ae1916031aeb2944579e614d7d01, reversing
changes made to
|
5 years ago |
|
|
702fcbbe99 |
!1467 Pynative can not add cell hook
Merge pull request !1467 from JoyLvliang/r0.3 |
5 years ago |
|
|
7013a9918a |
!1485 Fix fusion condition of transpose and reshape
Merge pull request !1485 from YuJianfeng/r0.3 |
5 years ago |
|
|
f95052fd65 | Fix fusion condition of transpose and reshape | 5 years ago |
|
|
1fae83d746 | add train and eval script for LSTM | 5 years ago |
|
|
11303142b1 | pynative-cell-hook-grad-abnormal | 5 years ago |
|
|
5157063cbb |
!1470 Fix log and comment errors in graphdata
Merge pull request !1470 from heleiwang/r0.3_fix_log |
5 years ago |
|
|
d210fbb7e9 |
!1471 Fix input check in graphdata
Merge pull request !1471 from heleiwang/r0.3_fix_input_check |
5 years ago |
|
|
b0a354830b | fix input check | 5 years ago |
|
|
0ca8daa1a2 | fix log error | 5 years ago |
|
|
47039a6d98 |
!1449 fix kernel select
Merge pull request !1449 from liubuyu/r0.3 |
5 years ago |
|
|
74cdb91151 |
!1458 remove old buffer fusion pass
Merge pull request !1458 from Etone.Chan/Resnet50 |
5 years ago |
|
|
6f6fc75ba5 | bug fix | 5 years ago |
|
|
24e5387973 | remove old buffer fusion pass | 5 years ago |
|
|
51a50e17b7 |
!1429 update version from 0.2 to 0.3
Merge pull request !1429 from guozhijian/r0.3_update_version |
5 years ago |
|
|
3d6802007a | update version from 0.2 to 0.3 | 5 years ago |
| @@ -13,3 +13,6 @@ | |||
| [submodule "graphengine"] | |||
| path = graphengine | |||
| url = https://gitee.com/mindspore/graphengine.git | |||
| [submodule "akg"] | |||
| path = akg | |||
| url = https://gitee.com/mindspore/akg.git | |||
| @@ -89,4 +89,4 @@ if (ENABLE_TESTCASES) | |||
| add_subdirectory(tests) | |||
| endif() | |||
| include(cmake/package.cmake) | |||
| include(cmake/package.cmake) | |||
| @@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem. | |||
| <img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/> | |||
| For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html). | |||
| For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.3.0-alpha/architecture.html). | |||
| ### Automatic Differentiation | |||
| @@ -76,7 +76,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex | |||
| 1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package. | |||
| ``` | |||
| pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl | |||
| pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/cpu/ubuntu_x86/mindspore-0.3.0-cp37-cp37m-linux_x86_64.whl | |||
| ``` | |||
| 2. Run the following command to verify the install. | |||
| @@ -133,8 +133,8 @@ currently the containerized build options are supported as follows: | |||
| For `CPU` backend, you can directly pull and run the latest stable image using the below command: | |||
| ``` | |||
| docker pull mindspore/mindspore-cpu:0.2.0-alpha | |||
| docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash | |||
| docker pull mindspore/mindspore-cpu:0.3.0-alpha | |||
| docker run -it mindspore/mindspore-cpu:0.3.0-alpha /bin/bash | |||
| ``` | |||
| * GPU | |||
| @@ -151,8 +151,8 @@ currently the containerized build options are supported as follows: | |||
| Then you can pull and run the latest stable image using the below command: | |||
| ``` | |||
| docker pull mindspore/mindspore-gpu:0.2.0-alpha | |||
| docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash | |||
| docker pull mindspore/mindspore-gpu:0.3.0-alpha | |||
| docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.3.0-alpha /bin/bash | |||
| ``` | |||
| To test if the docker image works, please execute the python code below and check the output: | |||
| @@ -187,7 +187,7 @@ please check out [docker](docker/README.md) repo for the details. | |||
| ## Quickstart | |||
| See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html) | |||
| See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.3.0-alpha/quick_start/quick_start.html) | |||
| to implement the image classification. | |||
| ## Docs | |||
| @@ -1,3 +1,88 @@ | |||
| # Release 0.3.1-alpha | |||
| ## Major Features and Improvements | |||
| ### Ascend 910 Training and Inference Framework | |||
| * Frontend and User Interface | |||
| * Independent model init interface. | |||
| * Data processing, augmentation, and save format | |||
| * Support sample padding for minddataset. | |||
| ## Bugfixes | |||
| * Python API | |||
| * Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894)) | |||
| * Data processing | |||
| * Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340)) | |||
| # Release 0.3.0-alpha | |||
| ## Major Features and Improvements | |||
| ### Ascend 910 Training and Inference Framework | |||
| * New models | |||
| * DeepFM: a factorization-machine based neural network for CTR prediction on Criteo dataset. | |||
| * DeepLabV3: significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2007 semantic image segmentation benchmark. | |||
| * Faster-RCNN: towards real-time object detection with region proposal networks on COCO 2017 dataset. | |||
| * SSD: a single stage object detection methods on COCO 2017 dataset. | |||
| * GoogLeNet: a deep convolutional neural network architecture codenamed Inception V1 for classification and detection on CIFAR-10 dataset. | |||
| * Wide&Deep: jointly trained wide linear models and deep neural networks for recommender systems on Criteo dataset. | |||
| * Frontend and User Interface | |||
| * Complete numpy advanced indexing method. Supports value and assignment through tensor index. | |||
| * Some optimizers support separating parameter groups. Different parameter groups can set different `learning_rate` and `weight_decay`. | |||
| * Support setting submodule's logging level independently, e.g. you can set logging level of module `A` to warning and set logging level of module `B` to info. | |||
| * Support weights to be compiled according to shape to solve the problem of large memory overhead. | |||
| * Add some operators implement and grammar support in pynative mode. To be consistent with graph mode. | |||
| * User interfaces change log | |||
| * Learning rate and weight decay making group params([!637](https://gitee.com/mindspore/mindspore/pulls/637)) | |||
| * Support weights to be compiled according to shape([!1015](https://gitee.com/mindspore/mindspore/pulls/1015)) | |||
| * delete some context param([!1100](https://gitee.com/mindspore/mindspore/pulls/1100)) | |||
| * ImageSummary/ScalarSummary/TensorSummary/HistogramSummary([!1329](https://gitee.com/mindspore/mindspore/pulls/1329))([!1425](https://gitee.com/mindspore/mindspore/pulls/1425)) | |||
| * Executor and Performance Optimization | |||
| * Support doing evaluation while in training process, so that the accuracy of training can be easily obtained. | |||
| * Enable second-order optimization for resnet50, which can achieve 75.9% accuracy in 45 epochs (Resnet50 @ImageNet). | |||
| * Optimize pynative implementation and improve it's execution performance. | |||
| * Optimize summary record implementation and improve its performance. | |||
| * Data processing, augmentation, and save format | |||
| * Support simple text processing, such as tokenizer/buildvocab/lookup. | |||
| * Support padding batch. | |||
| * Support split or concat dataset. | |||
| * Support MindDataset reading from file list. | |||
| ### Other Hardware Support | |||
| * GPU platform | |||
| * New models supported: MobileNetV2, MobileNetV3. | |||
| * Support mixed precision training. | |||
| * Support device memory swapping. | |||
| ## Bugfixes | |||
| * Python API | |||
| * An exception to the broadcast input data type check([!712](https://gitee.com/mindspore/mindspore/pulls/712)) | |||
| * Fix issues assignsub return value 0([!1036](https://gitee.com/mindspore/mindspore/pulls/1036)) | |||
| * Fix issue Conv2dBackpropInput bprop should return 3 instead of 2 items([!1001](https://gitee.com/mindspore/mindspore/pulls/1001)) | |||
| * Fix sens shape error of TrainOneStepWithLossScaleCell([!1050](https://gitee.com/mindspore/mindspore/pulls/1050)) | |||
| * Fix BatchNormGrad operator([!1344](https://gitee.com/mindspore/mindspore/pulls/1344)) | |||
| * Executor | |||
| * Fix dropout,topK and addn errors in PyNative mode ([!1285](https://gitee.com/mindspore/mindspore/pulls/1285), [!1138](https://gitee.com/mindspore/mindspore/pulls/1138), [!1033](https://gitee.com/mindspore/mindspore/pulls/1033)). | |||
| * Fix memory leaks after execution in PyNatvie mode ([!1201](https://gitee.com/mindspore/mindspore/pulls/1201)). | |||
| * Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1252)). | |||
| * Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1449)). | |||
| * Fix Topk operator selection strategy bug between aicore and aicpu([!1367](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1367)). | |||
| * Fix input memory size of 'assign' op unequal in control sink mode when assigning a data from one child graph to another child graph([!802](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/802)). | |||
| * Fix allreduce ir inconsistency([!989](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/989)). | |||
| * GPU platform | |||
| * Fix summary for gradient collection ([!1364](https://gitee.com/mindspore/mindspore/pulls/1364)) | |||
| * Fix the slice operator ([!1489](https://gitee.com/mindspore/mindspore/pulls/1489)) | |||
| * Data processing | |||
| * Fix memory problems of GeneratorDataset of sub-process ([!907](https://gitee.com/mindspore/mindspore/pulls/907)) | |||
| * Fix getting data timeout when training the cifar10 dataset under the lenet([!1391](https://gitee.com/mindspore/mindspore/pulls/1391)) | |||
| ## Contributors | |||
| Thanks goes to these wonderful people: | |||
| Alexey Shevlyakov, Amir Lashkari, anthony, baihuawei, biffex, buxue, caifubi, candanzg, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenzomi, chujinjin, cristoval, dengwentao, eric, etone-chan, fary86, gaojing, gengdongjie, gongchen, guohongzilong, guozhijian, heleiwang, hesham, He Wei, Hoai Linh Tran, hongxing, huangdongrun, huanghui, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jonwe, jonyguo, Junhan Hu, Kang, kingfo, kswang, laiyongqiang, leopz, lichenever, lihongkang, limingqi107, liubuyu, liuliyan2, liuwenhao4, liuxiao, liuxiao, liyong, lizhenyu, lvliang, Margaret_wangrui, meixiaowei, ms_yan, Nat Sutyanyong, ougongchang, panfengfeng, panyifeng, Peilin Wang, peixu_ren, qianlong, rick_sanchez, seatea, sheng, shijianning, simson, sunsuodong, Tinazhang, VectorSL, wandongdong, wangcong, wanghua, wangnan39, Wei Luning, wenchunjiang, wilfChen, WilliamLian, wsc, wukesong, wuxuejian, Xiaoda Zhang, xiefangqi, xulei2020, Yang, yangjie159, yangruoqi713, yangyongjie, yangzhenzhang, Yanjun Peng, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yujianfeng, YuJianfeng, yvetteliu, zhangdengcheng, Zhang Qinghua, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, zhouyuanshen, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang | |||
| Contributions of any kind are welcome! | |||
| # Release 0.2.0-alpha | |||
| ## Major Features and Improvements | |||
| @@ -3053,6 +3053,61 @@ Copyright 2003 Google Inc. | |||
| Copyright 2009 Google Inc. | |||
| Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All | |||
| Software: libtiff 4.1.0 | |||
| Copyright notice: | |||
| Copyright © 2015 Open Microscopy Environment / University of Dundee | |||
| Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 1990-1997 Sam Leffler | |||
| Copyright (c) 1991-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 1988-1997 Sam Leffler | |||
| Copyright (c) 1991-1997 Sam Leffler | |||
| Use and Copyright | |||
| Copyright (C) 1990, 1995 Frank D. Cringle. | |||
| Copyright (c) 1994-1997 Sam Leffler | |||
| Copyright (c) 1994-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 1997 Greg Ward Larson | |||
| Copyright (c) 1997 Silicon Graphics, Inc. | |||
| Copyright (c) 2010, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) Joris Van Damme <info@awaresystems.be> | |||
| Copyright (c) AWare Systems <http:www.awaresystems.be/> | |||
| Copyright (c) 1996-1997 Sam Leffler | |||
| Copyright (c) 1996 Pixar | |||
| Copyright (c) 1995-1997 Sam Leffler | |||
| Copyright (c) 1995-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 1988-1996 Sam Leffler | |||
| Copyright (c) 1991-1996 Silicon Graphics, Inc. | |||
| Copyright (c) 1992-1997 Sam Leffler | |||
| Copyright (c) 1992-1997 Silicon Graphics, Inc. | |||
| Copyright (c) 2018, Mapbox | |||
| Copyright (c) 2017, Planet Labs | |||
| Copyright (c) 1990 by Sun Microsystems, Inc. | |||
| Copyright 1990 by Digital Equipment Corporation, Maynard, Massachusetts. | |||
| Copyright 1991 by Digital Equipment Corporation, Maynard, Massachusetts. | |||
| Copyright (c) 2002, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 2003 Ross Finlayson | |||
| Additions (c) Richard Nolde 2006-2010 | |||
| Copyright (c) 2003, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 2000, Frank Warmerdam | |||
| Copyright (c) 1987, 1993, 1994 | |||
| Copyright (c) 1989, 1993 | |||
| Copyright (c) 2009 Frank Warmerdam | |||
| Copyright (c) 1987, 1993 | |||
| Copyright (c) 2005 The DragonFly Project. All rights reserved. | |||
| Copyright (c) 2003 Citrus Project, | |||
| All rights reserved. | |||
| Copyright (c) 1990, 1993 | |||
| Copyright (c) 1996 Mike Johnson | |||
| Copyright (c) 1996 BancTec AB | |||
| Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 2012, Frank Warmerdam <warmerdam@pobox.com> | |||
| Copyright (c) 2019, Even Rouault <even.rouault at spatialys.com> | |||
| Copyright (c) 2007, Frank Warmerdam <warmerdam@pobox.com> | |||
| Copyright (c) 2019, Thomas Bernard <miniupnp@free.fr> | |||
| Copyright (c) 2008, Andrey Kiselev <dron@ak4719.spb.edu> | |||
| Copyright (c) 1999, Frank Warmerdam | |||
| Copyright (c) 1991-1996 Sam Leffler | |||
| Copyright (c) 1996 USAF Phillips Laboratory | |||
| Software: opencv 4.2.0 | |||
| Copyright notice: | |||
| Copyright (C) 2016, NVIDIA Corporation, all rights reserved. | |||
| @@ -25,7 +25,7 @@ usage() | |||
| echo "Usage:" | |||
| echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\" | |||
| echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]" | |||
| echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I]" | |||
| echo "" | |||
| echo "Options:" | |||
| echo " -d Debug mode" | |||
| @@ -52,7 +52,6 @@ usage() | |||
| echo " -M Enable MPI and NCCL for GPU training, default on" | |||
| echo " -V Specify the minimum required cuda version, default CUDA 9.2" | |||
| echo " -I Compile predict, default off" | |||
| echo " -K Compile with AKG, default off" | |||
| } | |||
| # check value of input is 'on' or 'off' | |||
| @@ -91,7 +90,6 @@ checkopts() | |||
| COMPILE_PREDICT="off" | |||
| USE_GLOG="on" | |||
| PREDICT_PLATFORM="" | |||
| ENABLE_AKG="off" | |||
| # Process the options | |||
| while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K' opt | |||
| @@ -230,10 +228,6 @@ checkopts() | |||
| exit 1 | |||
| fi | |||
| ;; | |||
| K) | |||
| ENABLE_AKG="on" | |||
| echo "enable compile with akg" | |||
| ;; | |||
| *) | |||
| echo "Unknown option ${opt}!" | |||
| usage | |||
| @@ -307,9 +301,6 @@ build_mindspore() | |||
| if [[ "X$USE_GLOG" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DUSE_GLOG=ON" | |||
| fi | |||
| if [[ "X$ENABLE_AKG" = "Xon" ]]; then | |||
| CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_AKG=ON" | |||
| fi | |||
| echo "${CMAKE_ARGS}" | |||
| if [[ "X$INC_BUILD" = "Xoff" ]]; then | |||
| cmake ${CMAKE_ARGS} ../.. | |||
| @@ -433,9 +424,9 @@ build_predict() | |||
| cd "${BASEPATH}/predict/output/" | |||
| if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then | |||
| tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed | |||
| tar -cf MSPredict-0.3.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed | |||
| elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then | |||
| tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed | |||
| tar -cf MSPredict-0.3.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed | |||
| fi | |||
| echo "success to build predict project!" | |||
| } | |||
| @@ -16,7 +16,6 @@ option(ENABLE_DUMP_PROTO "Enable dump anf graph to file in ProtoBuffer format, d | |||
| option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF) | |||
| option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON) | |||
| option(ENABLE_MPI "enable mpi" OFF) | |||
| option(ENABLE_AKG "enable akg" OFF) | |||
| if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") | |||
| if (WIN32) | |||
| @@ -0,0 +1,67 @@ | |||
| FROM ubuntu:18.04 | |||
| MAINTAINER leonwanghui <leon.wanghui@huawei.com> | |||
| # Set env | |||
| ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 | |||
| ENV PATH /usr/local/bin:$PATH | |||
| # Install base tools | |||
| RUN apt update \ | |||
| && DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| vim \ | |||
| wget \ | |||
| curl \ | |||
| xz-utils \ | |||
| net-tools \ | |||
| openssh-client \ | |||
| git \ | |||
| ntpdate \ | |||
| tzdata \ | |||
| tcl \ | |||
| sudo \ | |||
| bash-completion | |||
| # Install compile tools | |||
| RUN DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| gcc \ | |||
| g++ \ | |||
| zlibc \ | |||
| make \ | |||
| libgmp-dev \ | |||
| patch \ | |||
| autoconf \ | |||
| libtool \ | |||
| automake \ | |||
| flex | |||
| # Set bash | |||
| RUN echo "dash dash/sh boolean false" | debconf-set-selections | |||
| RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash | |||
| # Install python (v3.7.5) | |||
| RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ | |||
| libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ | |||
| && cd /tmp \ | |||
| && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ | |||
| && tar -xvf v3.7.5.tar.gz \ | |||
| && cd /tmp/cpython-3.7.5 \ | |||
| && mkdir -p ${PYTHON_ROOT_PATH} \ | |||
| && ./configure --prefix=${PYTHON_ROOT_PATH} \ | |||
| && make -j4 \ | |||
| && make install -j4 \ | |||
| && rm -f /usr/local/bin/python \ | |||
| && rm -f /usr/local/bin/pip \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ | |||
| && rm -rf /tmp/cpython-3.7.5 \ | |||
| && rm -f /tmp/v3.7.5.tar.gz | |||
| # Set pip source | |||
| RUN mkdir -pv /root/.pip \ | |||
| && echo "[global]" > /root/.pip/pip.conf \ | |||
| && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ | |||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||
| # Install MindSpore cpu whl package | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/cpu/ubuntu_x86/mindspore-0.3.0-cp37-cp37m-linux_x86_64.whl | |||
| @@ -0,0 +1,83 @@ | |||
| FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 | |||
| MAINTAINER leonwanghui <leon.wanghui@huawei.com> | |||
| # Set env | |||
| ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5 | |||
| ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5 | |||
| ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH | |||
| ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH | |||
| # Install base tools | |||
| RUN apt update \ | |||
| && DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| vim \ | |||
| wget \ | |||
| curl \ | |||
| xz-utils \ | |||
| net-tools \ | |||
| openssh-client \ | |||
| git \ | |||
| ntpdate \ | |||
| tzdata \ | |||
| tcl \ | |||
| sudo \ | |||
| bash-completion | |||
| # Install compile tools | |||
| RUN DEBIAN_FRONTEND=noninteractive apt install -y \ | |||
| gcc \ | |||
| g++ \ | |||
| zlibc \ | |||
| make \ | |||
| libgmp-dev \ | |||
| patch \ | |||
| autoconf \ | |||
| libtool \ | |||
| automake \ | |||
| flex \ | |||
| libnccl2=2.4.8-1+cuda10.1 \ | |||
| libnccl-dev=2.4.8-1+cuda10.1 | |||
| # Set bash | |||
| RUN echo "dash dash/sh boolean false" | debconf-set-selections | |||
| RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash | |||
| # Install python (v3.7.5) | |||
| RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ | |||
| libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \ | |||
| && cd /tmp \ | |||
| && wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \ | |||
| && tar -xvf v3.7.5.tar.gz \ | |||
| && cd /tmp/cpython-3.7.5 \ | |||
| && mkdir -p ${PYTHON_ROOT_PATH} \ | |||
| && ./configure --prefix=${PYTHON_ROOT_PATH} \ | |||
| && make -j4 \ | |||
| && make install -j4 \ | |||
| && rm -f /usr/local/bin/python \ | |||
| && rm -f /usr/local/bin/pip \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ | |||
| && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ | |||
| && rm -rf /tmp/cpython-3.7.5 \ | |||
| && rm -f /tmp/v3.7.5.tar.gz | |||
| # Set pip source | |||
| RUN mkdir -pv /root/.pip \ | |||
| && echo "[global]" > /root/.pip/pip.conf \ | |||
| && echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \ | |||
| && echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf | |||
| # Install openmpi (v3.1.5) | |||
| RUN cd /tmp \ | |||
| && wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \ | |||
| && tar -xvf openmpi-3.1.5.tar.gz \ | |||
| && cd /tmp/openmpi-3.1.5 \ | |||
| && mkdir -p ${OMPI_ROOT_PATH} \ | |||
| && ./configure --prefix=${OMPI_ROOT_PATH} \ | |||
| && make -j4 \ | |||
| && make install -j4 \ | |||
| && rm -rf /tmp/openmpi-3.1.5 \ | |||
| && rm -f /tmp/openmpi-3.1.5.tar.gz | |||
| # Install MindSpore cuda-10.1 whl package | |||
| RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-0.3.0-cp37-cp37m-linux_x86_64.whl | |||
| @@ -52,7 +52,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e | |||
| ds = ds.map(input_columns="input_ids", operations=type_cast_op) | |||
| # apply batch operations | |||
| ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_count) | |||
| ds = ds.repeat(new_repeat_count) | |||
| logger.info("data size: {}".format(ds.get_dataset_size())) | |||
| logger.info("repeatcount: {}".format(ds.get_repeat_count())) | |||
| return ds, new_repeat_count | |||
| @@ -81,6 +81,11 @@ def run_pretrain(): | |||
| context.reset_auto_parallel_context() | |||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, | |||
| device_num=device_num) | |||
| from mindspore.parallel._auto_parallel_context import auto_parallel_context | |||
| if bert_net_cfg.num_hidden_layers == 12: | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([28, 55, 82, 109, 136, 163, 190, 205]) | |||
| elif bert_net_cfg.num_hidden_layers == 24: | |||
| auto_parallel_context().set_all_reduce_fusion_split_indices([38, 93, 148, 203, 258, 313, 368, 397]) | |||
| D.init() | |||
| rank = args_opt.device_id % device_num | |||
| else: | |||
| @@ -26,8 +26,8 @@ import os | |||
| import pickle | |||
| ######## mindrecord_schema begin ########## | |||
| mindrecord_schema = {"label": {"type": "int64"}, | |||
| "data": {"type": "bytes"}, | |||
| mindrecord_schema = {"label": {"type": "int32"}, | |||
| "image": {"type": "bytes"}, | |||
| "file_name": {"type": "string"}} | |||
| ######## mindrecord_schema end ########## | |||
| @@ -121,5 +121,5 @@ def mindrecord_dict_data(task_id): | |||
| if not image_bytes: | |||
| print("The image file: {} is invalid.".format(file_name)) | |||
| continue | |||
| data["data"] = image_bytes | |||
| data["image"] = image_bytes | |||
| yield data | |||
| @@ -0,0 +1,132 @@ | |||
| # DeepFM Description | |||
| This is an example of training DeepFM with Criteo dataset in MindSpore. | |||
| [Paper](https://arxiv.org/pdf/1703.04247.pdf) Huifeng Guo, Ruiming Tang, Yunming Ye, Zhenguo Li, Xiuqiang He | |||
| # Model architecture | |||
| The overall network architecture of DeepFM is show below: | |||
| [Link](https://arxiv.org/pdf/1703.04247.pdf) | |||
| # Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the criteo dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path. | |||
| - For more information, please check the resources below: | |||
| - [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html) | |||
| - [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html) | |||
| # Script description | |||
| ## Script and sample code | |||
| ```python | |||
| ├── deepfm | |||
| ├── README.md | |||
| ├── scripts | |||
| │ ├──run_train.sh | |||
| │ ├──run_eval.sh | |||
| ├── src | |||
| │ ├──config.py | |||
| │ ├──dataset.py | |||
| │ ├──callback.py | |||
| │ ├──deepfm.py | |||
| ├── train.py | |||
| ├── eval.py | |||
| ``` | |||
| ## Training process | |||
| ### Usage | |||
| - sh run_train.sh [DEVICE_NUM] [DATASET_PATH] [MINDSPORE_HCCL_CONFIG_PAHT] | |||
| - python train.py --dataset_path [DATASET_PATH] | |||
| ### Launch | |||
| ``` | |||
| # distribute training example | |||
| sh scripts/run_distribute_train.sh 8 /opt/dataset/criteo /opt/mindspore_hccl_file.json | |||
| # standalone training example | |||
| sh scripts/run_standalone_train.sh 0 /opt/dataset/criteo | |||
| or | |||
| python train.py --dataset_path /opt/dataset/criteo > output.log 2>&1 & | |||
| ``` | |||
| ### Result | |||
| Training result will be stored in the example path. | |||
| Checkpoints will be stored at `./checkpoint` by default, | |||
| and training log will be redirected to `./output.log` by default, | |||
| and loss log will be redirected to `./loss.log` by default, | |||
| and eval log will be redirected to `./auc.log` by default. | |||
| ## Eval process | |||
| ### Usage | |||
| - sh run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH] | |||
| ### Launch | |||
| ``` | |||
| # infer example | |||
| sh scripts/run_eval.sh 0 ~/criteo/eval/ ~/train/deepfm-15_41257.ckpt | |||
| ``` | |||
| > checkpoint can be produced in training process. | |||
| ### Result | |||
| Inference result will be stored in the example path, you can find result like the followings in `auc.log`. | |||
| ``` | |||
| 2020-05-27 20:51:35 AUC: 0.80577889065281, eval time: 35.55999s. | |||
| ``` | |||
| # Model description | |||
| ## Performance | |||
| ### Training Performance | |||
| | Parameters | DeepFM | | |||
| | -------------------------- | ------------------------------------------------------| | |||
| | Model Version | | | |||
| | Resource | Ascend 910, cpu:2.60GHz 96cores, memory:1.5T | | |||
| | uploaded Date | 05/27/2020 | | |||
| | MindSpore Version | 0.2.0 | | |||
| | Dataset | Criteo | | |||
| | Training Parameters | src/config.py | | |||
| | Optimizer | Adam | | |||
| | Loss Function | SoftmaxCrossEntropyWithLogits | | |||
| | outputs | | | |||
| | Loss | 0.4234 | | |||
| | Accuracy | AUC[0.8055] | | |||
| | Total time | 91 min | | |||
| | Params (M) | | | |||
| | Checkpoint for Fine tuning | | | |||
| | Model for inference | | | |||
| #### Inference Performance | |||
| | Parameters | | | | |||
| | -------------------------- | ----------------------------- | ------------------------- | | |||
| | Model Version | | | | |||
| | Resource | Ascend 910 | Ascend 310 | | |||
| | uploaded Date | 05/27/2020 | 05/27/2020 | | |||
| | MindSpore Version | 0.2.0 | 0.2.0 | | |||
| | Dataset | Criteo | | | |||
| | batch_size | 1000 | | | |||
| | outputs | | | | |||
| | Accuracy | AUC[0.8055] | | | |||
| | Speed | | | | |||
| | Total time | 35.559s | | | |||
| | Model for inference | | | | |||
| # ModelZoo Homepage | |||
| [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,66 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train_criteo.""" | |||
| import os | |||
| import sys | |||
| import time | |||
| import argparse | |||
| from mindspore import context | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from src.deepfm import ModelBuilder, AUCMetric | |||
| from src.config import DataConfig, ModelConfig, TrainConfig | |||
| from src.dataset import create_dataset | |||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||
| parser = argparse.ArgumentParser(description='CTR Prediction') | |||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| args_opt, _ = parser.parse_known_args() | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id) | |||
| def add_write(file_path, print_str): | |||
| with open(file_path, 'a+', encoding='utf-8') as file_out: | |||
| file_out.write(print_str + '\n') | |||
| if __name__ == '__main__': | |||
| data_config = DataConfig() | |||
| model_config = ModelConfig() | |||
| train_config = TrainConfig() | |||
| ds_eval = create_dataset(args_opt.dataset_path, train_mode=False, | |||
| epochs=1, batch_size=train_config.batch_size) | |||
| model_builder = ModelBuilder(ModelConfig, TrainConfig) | |||
| train_net, eval_net = model_builder.get_train_eval_net() | |||
| train_net.set_train() | |||
| eval_net.set_train(False) | |||
| auc_metric = AUCMetric() | |||
| model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) | |||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||
| load_param_into_net(eval_net, param_dict) | |||
| start = time.time() | |||
| res = model.eval(ds_eval) | |||
| eval_time = time.time() - start | |||
| time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |||
| out_str = f'{time_str} AUC: {list(res.values())[0]}, eval time: {eval_time}s.' | |||
| print(out_str) | |||
| add_write('./auc.log', str(out_str)) | |||
| @@ -0,0 +1,44 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| echo "Please run the script as: " | |||
| echo "sh scripts/run_distribute_train.sh DEVICE_NUM DATASET_PATH MINDSPORE_HCCL_CONFIG_PAHT" | |||
| echo "for example: sh scripts/run_distribute_train.sh 8 /dataset_path /rank_table_8p.json" | |||
| echo "After running the script, the network runs in the background, The log will be generated in logx/output.log" | |||
| export RANK_SIZE=$1 | |||
| DATA_URL=$2 | |||
| export MINDSPORE_HCCL_CONFIG_PAHT=$3 | |||
| for ((i=0; i<RANK_SIZE;i++)) | |||
| do | |||
| export DEVICE_ID=$i | |||
| export RANK_ID=$i | |||
| rm -rf log$i | |||
| mkdir ./log$i | |||
| cp *.py ./log$i | |||
| cp -r src ./log$i | |||
| cd ./log$i || exit | |||
| echo "start training for rank $i, device $DEVICE_ID" | |||
| env > env.log | |||
| python -u train.py \ | |||
| --dataset_path=$DATA_URL \ | |||
| --ckpt_path="checkpoint" \ | |||
| --eval_file_name='auc.log' \ | |||
| --loss_file_name='loss.log' \ | |||
| --do_eval=True > output.log 2>&1 & | |||
| cd ../ | |||
| done | |||
| @@ -0,0 +1,32 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| echo "Please run the script as: " | |||
| echo "sh scripts/run_eval.sh DEVICE_ID DATASET_PATH CHECKPOINT_PATH" | |||
| echo "for example: sh scripts/run_eval.sh 0 /dataset_path /checkpoint_path" | |||
| echo "After running the script, the network runs in the background, The log will be generated in ms_log/eval_output.log" | |||
| export DEVICE_ID=$1 | |||
| DATA_URL=$2 | |||
| CHECKPOINT_PATH=$3 | |||
| mkdir -p ms_log | |||
| CUR_DIR=`pwd` | |||
| export GLOG_log_dir=${CUR_DIR}/ms_log | |||
| export GLOG_logtostderr=0 | |||
| python -u eval.py \ | |||
| --dataset_path=$DATA_URL \ | |||
| --checkpoint_path=$CHECKPOINT_PATH > ms_log/eval_output.log 2>&1 & | |||
| @@ -0,0 +1,34 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| echo "Please run the script as: " | |||
| echo "sh scripts/run_standalone_train.sh DEVICE_ID DATASET_PATH" | |||
| echo "for example: sh scripts/run_standalone_train.sh 0 /dataset_path" | |||
| echo "After running the script, the network runs in the background, The log will be generated in ms_log/output.log" | |||
| export DEVICE_ID=$1 | |||
| DATA_URL=$2 | |||
| mkdir -p ms_log | |||
| CUR_DIR=`pwd` | |||
| export GLOG_log_dir=${CUR_DIR}/ms_log | |||
| export GLOG_logtostderr=0 | |||
| python -u train.py \ | |||
| --dataset_path=$DATA_URL \ | |||
| --ckpt_path="checkpoint" \ | |||
| --eval_file_name='auc.log' \ | |||
| --loss_file_name='loss.log' \ | |||
| --do_eval=True > ms_log/output.log 2>&1 & | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,107 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Defined callback for DeepFM. | |||
| """ | |||
| import time | |||
| from mindspore.train.callback import Callback | |||
| def add_write(file_path, out_str): | |||
| with open(file_path, 'a+', encoding='utf-8') as file_out: | |||
| file_out.write(out_str + '\n') | |||
| class EvalCallBack(Callback): | |||
| """ | |||
| Monitor the loss in training. | |||
| If the loss is NAN or INF terminating training. | |||
| Note | |||
| If per_print_times is 0 do not print loss. | |||
| """ | |||
| def __init__(self, model, eval_dataset, auc_metric, eval_file_path): | |||
| super(EvalCallBack, self).__init__() | |||
| self.model = model | |||
| self.eval_dataset = eval_dataset | |||
| self.aucMetric = auc_metric | |||
| self.aucMetric.clear() | |||
| self.eval_file_path = eval_file_path | |||
| def epoch_end(self, run_context): | |||
| start_time = time.time() | |||
| out = self.model.eval(self.eval_dataset) | |||
| eval_time = int(time.time() - start_time) | |||
| time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |||
| out_str = "{} EvalCallBack metric{}; eval_time{}s".format( | |||
| time_str, out.values(), eval_time) | |||
| print(out_str) | |||
| add_write(self.eval_file_path, out_str) | |||
| class LossCallBack(Callback): | |||
| """ | |||
| Monitor the loss in training. | |||
| If the loss is NAN or INF terminating training. | |||
| Note | |||
| If per_print_times is 0 do not print loss. | |||
| Args | |||
| loss_file_path (str) The file absolute path, to save as loss_file; | |||
| per_print_times (int) Print loss every times. Default 1. | |||
| """ | |||
| def __init__(self, loss_file_path, per_print_times=1): | |||
| super(LossCallBack, self).__init__() | |||
| if not isinstance(per_print_times, int) or per_print_times < 0: | |||
| raise ValueError("print_step must be int and >= 0.") | |||
| self.loss_file_path = loss_file_path | |||
| self._per_print_times = per_print_times | |||
| def step_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| loss = cb_params.net_outputs.asnumpy() | |||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 | |||
| cur_num = cb_params.cur_step_num | |||
| if self._per_print_times != 0 and cur_num % self._per_print_times == 0: | |||
| with open(self.loss_file_path, "a+") as loss_file: | |||
| time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) | |||
| loss_file.write("{} epoch: {} step: {}, loss is {}\n".format( | |||
| time_str, cb_params.cur_epoch_num, cur_step_in_epoch, loss)) | |||
| print("epoch: {} step: {}, loss is {}\n".format( | |||
| cb_params.cur_epoch_num, cur_step_in_epoch, loss)) | |||
| class TimeMonitor(Callback): | |||
| """ | |||
| Time monitor for calculating cost of each epoch. | |||
| Args | |||
| data_size (int) step size of an epoch. | |||
| """ | |||
| def __init__(self, data_size): | |||
| super(TimeMonitor, self).__init__() | |||
| self.data_size = data_size | |||
| def epoch_begin(self, run_context): | |||
| self.epoch_time = time.time() | |||
| def epoch_end(self, run_context): | |||
| epoch_mseconds = (time.time() - self.epoch_time) * 1000 | |||
| per_step_mseconds = epoch_mseconds / self.data_size | |||
| print("epoch time: {0}, per step time: {1}".format(epoch_mseconds, per_step_mseconds), flush=True) | |||
| def step_begin(self, run_context): | |||
| self.step_time = time.time() | |||
| def step_end(self, run_context): | |||
| step_mseconds = (time.time() - self.step_time) * 1000 | |||
| print(f"step time {step_mseconds}", flush=True) | |||
| @@ -0,0 +1,62 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in train.py and eval.py | |||
| """ | |||
| class DataConfig: | |||
| """ | |||
| Define parameters of dataset. | |||
| """ | |||
| data_vocab_size = 184965 | |||
| train_num_of_parts = 21 | |||
| test_num_of_parts = 3 | |||
| batch_size = 1000 | |||
| data_field_size = 39 | |||
| # dataset format, 1: mindrecord, 2: tfrecord, 3: h5 | |||
| data_format = 2 | |||
| class ModelConfig: | |||
| """ | |||
| Define parameters of model. | |||
| """ | |||
| batch_size = DataConfig.batch_size | |||
| data_field_size = DataConfig.data_field_size | |||
| data_vocab_size = DataConfig.data_vocab_size | |||
| data_emb_dim = 80 | |||
| deep_layer_args = [[400, 400, 512], "relu"] | |||
| init_args = [-0.01, 0.01] | |||
| weight_bias_init = ['normal', 'normal'] | |||
| keep_prob = 0.9 | |||
| class TrainConfig: | |||
| """ | |||
| Define parameters of training. | |||
| """ | |||
| batch_size = DataConfig.batch_size | |||
| l2_coef = 1e-6 | |||
| learning_rate = 1e-5 | |||
| epsilon = 1e-8 | |||
| loss_scale = 1024.0 | |||
| train_epochs = 15 | |||
| save_checkpoint = True | |||
| ckpt_file_name_prefix = "deepfm" | |||
| save_checkpoint_steps = 1 | |||
| keep_checkpoint_max = 15 | |||
| eval_callback = True | |||
| loss_callback = True | |||
| @@ -0,0 +1,299 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Create train or eval dataset. | |||
| """ | |||
| import os | |||
| import math | |||
| from enum import Enum | |||
| import pandas as pd | |||
| import numpy as np | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.common.dtype as mstype | |||
| from .config import DataConfig | |||
| class DataType(Enum): | |||
| """ | |||
| Enumerate supported dataset format. | |||
| """ | |||
| MINDRECORD = 1 | |||
| TFRECORD = 2 | |||
| H5 = 3 | |||
| class H5Dataset(): | |||
| """ | |||
| Create dataset with H5 format. | |||
| Args: | |||
| data_path (str): Dataset directory. | |||
| train_mode (bool): Whether dataset is used for train or eval (default=True). | |||
| train_num_of_parts (int): The number of train data file (default=21). | |||
| test_num_of_parts (int): The number of test data file (default=3). | |||
| """ | |||
| max_length = 39 | |||
| def __init__(self, data_path, train_mode=True, | |||
| train_num_of_parts=DataConfig.train_num_of_parts, | |||
| test_num_of_parts=DataConfig.test_num_of_parts): | |||
| self._hdf_data_dir = data_path | |||
| self._is_training = train_mode | |||
| if self._is_training: | |||
| self._file_prefix = 'train' | |||
| self._num_of_parts = train_num_of_parts | |||
| else: | |||
| self._file_prefix = 'test' | |||
| self._num_of_parts = test_num_of_parts | |||
| self.data_size = self._bin_count(self._hdf_data_dir, self._file_prefix, self._num_of_parts) | |||
| print("data_size: {}".format(self.data_size)) | |||
| def _bin_count(self, hdf_data_dir, file_prefix, num_of_parts): | |||
| size = 0 | |||
| for part in range(num_of_parts): | |||
| _y = pd.read_hdf(os.path.join(hdf_data_dir, f'{file_prefix}_output_part_{str(part)}.h5')) | |||
| size += _y.shape[0] | |||
| return size | |||
| def _iterate_hdf_files_(self, num_of_parts=None, | |||
| shuffle_block=False): | |||
| """ | |||
| iterate among hdf files(blocks). when the whole data set is finished, the iterator restarts | |||
| from the beginning, thus the data stream will never stop | |||
| :param train_mode: True or false,false is eval_mode, | |||
| this file iterator will go through the train set | |||
| :param num_of_parts: number of files | |||
| :param shuffle_block: shuffle block files at every round | |||
| :return: input_hdf_file_name, output_hdf_file_name, finish_flag | |||
| """ | |||
| parts = np.arange(num_of_parts) | |||
| while True: | |||
| if shuffle_block: | |||
| for _ in range(int(shuffle_block)): | |||
| np.random.shuffle(parts) | |||
| for i, p in enumerate(parts): | |||
| yield os.path.join(self._hdf_data_dir, f'{self._file_prefix}_input_part_{str(p)}.h5'), \ | |||
| os.path.join(self._hdf_data_dir, f'{self._file_prefix}_output_part_{str(p)}.h5'), \ | |||
| i + 1 == len(parts) | |||
| def _generator(self, X, y, batch_size, shuffle=True): | |||
| """ | |||
| should be accessed only in private | |||
| :param X: | |||
| :param y: | |||
| :param batch_size: | |||
| :param shuffle: | |||
| :return: | |||
| """ | |||
| number_of_batches = np.ceil(1. * X.shape[0] / batch_size) | |||
| counter = 0 | |||
| finished = False | |||
| sample_index = np.arange(X.shape[0]) | |||
| if shuffle: | |||
| for _ in range(int(shuffle)): | |||
| np.random.shuffle(sample_index) | |||
| assert X.shape[0] > 0 | |||
| while True: | |||
| batch_index = sample_index[batch_size * counter: batch_size * (counter + 1)] | |||
| X_batch = X[batch_index] | |||
| y_batch = y[batch_index] | |||
| counter += 1 | |||
| yield X_batch, y_batch, finished | |||
| if counter == number_of_batches: | |||
| counter = 0 | |||
| finished = True | |||
| def batch_generator(self, batch_size=1000, | |||
| random_sample=False, shuffle_block=False): | |||
| """ | |||
| :param train_mode: True or false,false is eval_mode, | |||
| :param batch_size | |||
| :param num_of_parts: number of files | |||
| :param random_sample: if True, will shuffle | |||
| :param shuffle_block: shuffle file blocks at every round | |||
| :return: | |||
| """ | |||
| for hdf_in, hdf_out, _ in self._iterate_hdf_files_(self._num_of_parts, | |||
| shuffle_block): | |||
| start = stop = None | |||
| X_all = pd.read_hdf(hdf_in, start=start, stop=stop).values | |||
| y_all = pd.read_hdf(hdf_out, start=start, stop=stop).values | |||
| data_gen = self._generator(X_all, y_all, batch_size, | |||
| shuffle=random_sample) | |||
| finished = False | |||
| while not finished: | |||
| X, y, finished = data_gen.__next__() | |||
| X_id = X[:, 0:self.max_length] | |||
| X_va = X[:, self.max_length:] | |||
| yield np.array(X_id.astype(dtype=np.int32)), \ | |||
| np.array(X_va.astype(dtype=np.float32)), \ | |||
| np.array(y.astype(dtype=np.float32)) | |||
| def _get_h5_dataset(directory, train_mode=True, epochs=1, batch_size=1000): | |||
| """ | |||
| Get dataset with h5 format. | |||
| Args: | |||
| directory (str): Dataset directory. | |||
| train_mode (bool): Whether dataset is use for train or eval (default=True). | |||
| epochs (int): Dataset epoch size (default=1). | |||
| batch_size (int): Dataset batch size (default=1000) | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| data_para = {'batch_size': batch_size} | |||
| if train_mode: | |||
| data_para['random_sample'] = True | |||
| data_para['shuffle_block'] = True | |||
| h5_dataset = H5Dataset(data_path=directory, train_mode=train_mode) | |||
| numbers_of_batch = math.ceil(h5_dataset.data_size / batch_size) | |||
| def _iter_h5_data(): | |||
| train_eval_gen = h5_dataset.batch_generator(**data_para) | |||
| for _ in range(0, numbers_of_batch, 1): | |||
| yield train_eval_gen.__next__() | |||
| ds = de.GeneratorDataset(_iter_h5_data, ["ids", "weights", "labels"]) | |||
| ds.set_dataset_size(numbers_of_batch) | |||
| ds = ds.repeat(epochs) | |||
| return ds | |||
| def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=1000, | |||
| line_per_sample=1000, rank_size=None, rank_id=None): | |||
| """ | |||
| Get dataset with mindrecord format. | |||
| Args: | |||
| directory (str): Dataset directory. | |||
| train_mode (bool): Whether dataset is use for train or eval (default=True). | |||
| epochs (int): Dataset epoch size (default=1). | |||
| batch_size (int): Dataset batch size (default=1000). | |||
| line_per_sample (int): The number of sample per line (default=1000). | |||
| rank_size (int): The number of device, not necessary for single device (default=None). | |||
| rank_id (int): Id of device, not necessary for single device (default=None). | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| file_prefix_name = 'train_input_part.mindrecord' if train_mode else 'test_input_part.mindrecord' | |||
| file_suffix_name = '00' if train_mode else '0' | |||
| shuffle = train_mode | |||
| if rank_size is not None and rank_id is not None: | |||
| ds = de.MindDataset(os.path.join(directory, file_prefix_name + file_suffix_name), | |||
| columns_list=['feat_ids', 'feat_vals', 'label'], | |||
| num_shards=rank_size, shard_id=rank_id, shuffle=shuffle, | |||
| num_parallel_workers=8) | |||
| else: | |||
| ds = de.MindDataset(os.path.join(directory, file_prefix_name + file_suffix_name), | |||
| columns_list=['feat_ids', 'feat_vals', 'label'], | |||
| shuffle=shuffle, num_parallel_workers=8) | |||
| ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True) | |||
| ds = ds.map(operations=(lambda x, y, z: (np.array(x).flatten().reshape(batch_size, 39), | |||
| np.array(y).flatten().reshape(batch_size, 39), | |||
| np.array(z).flatten().reshape(batch_size, 1))), | |||
| input_columns=['feat_ids', 'feat_vals', 'label'], | |||
| columns_order=['feat_ids', 'feat_vals', 'label'], | |||
| num_parallel_workers=8) | |||
| ds = ds.repeat(epochs) | |||
| return ds | |||
| def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, | |||
| line_per_sample=1000, rank_size=None, rank_id=None): | |||
| """ | |||
| Get dataset with tfrecord format. | |||
| Args: | |||
| directory (str): Dataset directory. | |||
| train_mode (bool): Whether dataset is use for train or eval (default=True). | |||
| epochs (int): Dataset epoch size (default=1). | |||
| batch_size (int): Dataset batch size (default=1000). | |||
| line_per_sample (int): The number of sample per line (default=1000). | |||
| rank_size (int): The number of device, not necessary for single device (default=None). | |||
| rank_id (int): Id of device, not necessary for single device (default=None). | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| dataset_files = [] | |||
| file_prefixt_name = 'train' if train_mode else 'test' | |||
| shuffle = train_mode | |||
| for (dir_path, _, filenames) in os.walk(directory): | |||
| for filename in filenames: | |||
| if file_prefixt_name in filename and 'tfrecord' in filename: | |||
| dataset_files.append(os.path.join(dir_path, filename)) | |||
| schema = de.Schema() | |||
| schema.add_column('feat_ids', de_type=mstype.int32) | |||
| schema.add_column('feat_vals', de_type=mstype.float32) | |||
| schema.add_column('label', de_type=mstype.float32) | |||
| if rank_size is not None and rank_id is not None: | |||
| ds = de.TFRecordDataset(dataset_files=dataset_files, shuffle=shuffle, | |||
| schema=schema, num_parallel_workers=8, | |||
| num_shards=rank_size, shard_id=rank_id, | |||
| shard_equal_rows=True) | |||
| else: | |||
| ds = de.TFRecordDataset(dataset_files=dataset_files, shuffle=shuffle, | |||
| schema=schema, num_parallel_workers=8) | |||
| ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True) | |||
| ds = ds.map(operations=(lambda x, y, z: ( | |||
| np.array(x).flatten().reshape(batch_size, 39), | |||
| np.array(y).flatten().reshape(batch_size, 39), | |||
| np.array(z).flatten().reshape(batch_size, 1))), | |||
| input_columns=['feat_ids', 'feat_vals', 'label'], | |||
| columns_order=['feat_ids', 'feat_vals', 'label'], | |||
| num_parallel_workers=8) | |||
| ds = ds.repeat(epochs) | |||
| return ds | |||
| def create_dataset(directory, train_mode=True, epochs=1, batch_size=1000, | |||
| data_type=DataType.TFRECORD, line_per_sample=1000, | |||
| rank_size=None, rank_id=None): | |||
| """ | |||
| Get dataset. | |||
| Args: | |||
| directory (str): Dataset directory. | |||
| train_mode (bool): Whether dataset is use for train or eval (default=True). | |||
| epochs (int): Dataset epoch size (default=1). | |||
| batch_size (int): Dataset batch size (default=1000). | |||
| data_type (DataType): The type of dataset which is one of H5, TFRECORE, MINDRECORD (default=TFRECORD). | |||
| line_per_sample (int): The number of sample per line (default=1000). | |||
| rank_size (int): The number of device, not necessary for single device (default=None). | |||
| rank_id (int): Id of device, not necessary for single device (default=None). | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| if data_type == DataType.MINDRECORD: | |||
| return _get_mindrecord_dataset(directory, train_mode, epochs, | |||
| batch_size, line_per_sample, | |||
| rank_size, rank_id) | |||
| if data_type == DataType.TFRECORD: | |||
| return _get_tf_dataset(directory, train_mode, epochs, batch_size, | |||
| line_per_sample, rank_size=rank_size, rank_id=rank_id) | |||
| if rank_size is not None and rank_size > 1: | |||
| raise ValueError('Please use mindrecord dataset.') | |||
| return _get_h5_dataset(directory, train_mode, epochs, batch_size) | |||
| @@ -0,0 +1,370 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ test_training """ | |||
| import os | |||
| import numpy as np | |||
| from sklearn.metrics import roc_auc_score | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import composite as C | |||
| from mindspore.ops import operations as P | |||
| from mindspore.nn import Dropout | |||
| from mindspore.nn.optim import Adam | |||
| from mindspore.nn.metrics import Metric | |||
| from mindspore import nn, ParameterTuple, Parameter | |||
| from mindspore.common.initializer import Uniform, initializer, Normal | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig | |||
| from .callback import EvalCallBack, LossCallBack | |||
| np_type = np.float32 | |||
| ms_type = mstype.float32 | |||
| class AUCMetric(Metric): | |||
| """AUC metric for DeepFM model.""" | |||
| def __init__(self): | |||
| super(AUCMetric, self).__init__() | |||
| self.pred_probs = [] | |||
| self.true_labels = [] | |||
| def clear(self): | |||
| """Clear the internal evaluation result.""" | |||
| self.pred_probs = [] | |||
| self.true_labels = [] | |||
| def update(self, *inputs): | |||
| batch_predict = inputs[1].asnumpy() | |||
| batch_label = inputs[2].asnumpy() | |||
| self.pred_probs.extend(batch_predict.flatten().tolist()) | |||
| self.true_labels.extend(batch_label.flatten().tolist()) | |||
| def eval(self): | |||
| if len(self.true_labels) != len(self.pred_probs): | |||
| raise RuntimeError('true_labels.size() is not equal to pred_probs.size()') | |||
| auc = roc_auc_score(self.true_labels, self.pred_probs) | |||
| return auc | |||
| def init_method(method, shape, name, max_val=0.01): | |||
| """ | |||
| The method of init parameters. | |||
| Args: | |||
| method (str): The method uses to initialize parameter. | |||
| shape (list): The shape of parameter. | |||
| name (str): The name of parameter. | |||
| max_val (float): Max value in parameter when uses 'random' or 'uniform' to initialize parameter. | |||
| Returns: | |||
| Parameter. | |||
| """ | |||
| if method in ['random', 'uniform']: | |||
| params = Parameter(initializer(Uniform(max_val), shape, ms_type), name=name) | |||
| elif method == "one": | |||
| params = Parameter(initializer("ones", shape, ms_type), name=name) | |||
| elif method == 'zero': | |||
| params = Parameter(initializer("zeros", shape, ms_type), name=name) | |||
| elif method == "normal": | |||
| params = Parameter(initializer(Normal(max_val), shape, ms_type), name=name) | |||
| return params | |||
| def init_var_dict(init_args, values): | |||
| """ | |||
| Init parameter. | |||
| Args: | |||
| init_args (list): Define max and min value of parameters. | |||
| values (list): Define name, shape and init method of parameters. | |||
| Returns: | |||
| dict, a dict ot Parameter. | |||
| """ | |||
| var_map = {} | |||
| _, _max_val = init_args | |||
| for key, shape, init_flag in values: | |||
| if key not in var_map.keys(): | |||
| if init_flag in ['random', 'uniform']: | |||
| var_map[key] = Parameter(initializer(Uniform(_max_val), shape, ms_type), name=key) | |||
| elif init_flag == "one": | |||
| var_map[key] = Parameter(initializer("ones", shape, ms_type), name=key) | |||
| elif init_flag == "zero": | |||
| var_map[key] = Parameter(initializer("zeros", shape, ms_type), name=key) | |||
| elif init_flag == 'normal': | |||
| var_map[key] = Parameter(initializer(Normal(_max_val), shape, ms_type), name=key) | |||
| return var_map | |||
| class DenseLayer(nn.Cell): | |||
| """ | |||
| Dense Layer for Deep Layer of DeepFM Model; | |||
| Containing: activation, matmul, bias_add; | |||
| Args: | |||
| input_dim (int): the shape of weight at 0-aixs; | |||
| output_dim (int): the shape of weight at 1-aixs, and shape of bias | |||
| weight_bias_init (list): weight and bias init method, "random", "uniform", "one", "zero", "normal"; | |||
| act_str (str): activation function method, "relu", "sigmoid", "tanh"; | |||
| keep_prob (float): Dropout Layer keep_prob_rate; | |||
| scale_coef (float): input scale coefficient; | |||
| """ | |||
| def __init__(self, input_dim, output_dim, weight_bias_init, act_str, keep_prob=0.9, scale_coef=1.0): | |||
| super(DenseLayer, self).__init__() | |||
| weight_init, bias_init = weight_bias_init | |||
| self.weight = init_method(weight_init, [input_dim, output_dim], name="weight") | |||
| self.bias = init_method(bias_init, [output_dim], name="bias") | |||
| self.act_func = self._init_activation(act_str) | |||
| self.matmul = P.MatMul(transpose_b=False) | |||
| self.bias_add = P.BiasAdd() | |||
| self.cast = P.Cast() | |||
| self.dropout = Dropout(keep_prob=keep_prob) | |||
| self.mul = P.Mul() | |||
| self.realDiv = P.RealDiv() | |||
| self.scale_coef = scale_coef | |||
| def _init_activation(self, act_str): | |||
| act_str = act_str.lower() | |||
| if act_str == "relu": | |||
| act_func = P.ReLU() | |||
| elif act_str == "sigmoid": | |||
| act_func = P.Sigmoid() | |||
| elif act_str == "tanh": | |||
| act_func = P.Tanh() | |||
| return act_func | |||
| def construct(self, x): | |||
| x = self.act_func(x) | |||
| if self.training: | |||
| x = self.dropout(x) | |||
| x = self.mul(x, self.scale_coef) | |||
| x = self.cast(x, mstype.float16) | |||
| weight = self.cast(self.weight, mstype.float16) | |||
| wx = self.matmul(x, weight) | |||
| wx = self.cast(wx, mstype.float32) | |||
| wx = self.realDiv(wx, self.scale_coef) | |||
| output = self.bias_add(wx, self.bias) | |||
| return output | |||
| class DeepFMModel(nn.Cell): | |||
| """ | |||
| From paper: "DeepFM: A Factorization-Machine based Neural Network for CTR Prediction" | |||
| Args: | |||
| batch_size (int): smaple_number of per step in training; (int, batch_size=128) | |||
| filed_size (int): input filed number, or called id_feature number; (int, filed_size=39) | |||
| vocab_size (int): id_feature vocab size, id dict size; (int, vocab_size=200000) | |||
| emb_dim (int): id embedding vector dim, id mapped to embedding vector; (int, emb_dim=100) | |||
| deep_layer_args (list): Deep Layer args, layer_dim_list, layer_activator; | |||
| (int, deep_layer_args=[[100, 100, 100], "relu"]) | |||
| init_args (list): init args for Parameter init; (list, init_args=[min, max, seeds]) | |||
| weight_bias_init (list): weight, bias init method for deep layers; | |||
| (list[str], weight_bias_init=['random', 'zero']) | |||
| keep_prob (float): if dropout_flag is True, keep_prob rate to keep connect; (float, keep_prob=0.8) | |||
| """ | |||
| def __init__(self, config): | |||
| super(DeepFMModel, self).__init__() | |||
| self.batch_size = config.batch_size | |||
| self.field_size = config.data_field_size | |||
| self.vocab_size = config.data_vocab_size | |||
| self.emb_dim = config.data_emb_dim | |||
| self.deep_layer_dims_list, self.deep_layer_act = config.deep_layer_args | |||
| self.init_args = config.init_args | |||
| self.weight_bias_init = config.weight_bias_init | |||
| self.keep_prob = config.keep_prob | |||
| init_acts = [('W_l2', [self.vocab_size, 1], 'normal'), | |||
| ('V_l2', [self.vocab_size, self.emb_dim], 'normal'), | |||
| ('b', [1], 'normal')] | |||
| var_map = init_var_dict(self.init_args, init_acts) | |||
| self.fm_w = var_map["W_l2"] | |||
| self.fm_b = var_map["b"] | |||
| self.embedding_table = var_map["V_l2"] | |||
| # Deep Layers | |||
| self.deep_input_dims = self.field_size * self.emb_dim + 1 | |||
| self.all_dim_list = [self.deep_input_dims] + self.deep_layer_dims_list + [1] | |||
| self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1], | |||
| self.weight_bias_init, self.deep_layer_act, self.keep_prob) | |||
| self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2], | |||
| self.weight_bias_init, self.deep_layer_act, self.keep_prob) | |||
| self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3], | |||
| self.weight_bias_init, self.deep_layer_act, self.keep_prob) | |||
| self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4], | |||
| self.weight_bias_init, self.deep_layer_act, self.keep_prob) | |||
| # FM, linear Layers | |||
| self.Gatherv2 = P.GatherV2() | |||
| self.Mul = P.Mul() | |||
| self.ReduceSum = P.ReduceSum(keep_dims=False) | |||
| self.Reshape = P.Reshape() | |||
| self.Square = P.Square() | |||
| self.Shape = P.Shape() | |||
| self.Tile = P.Tile() | |||
| self.Concat = P.Concat(axis=1) | |||
| self.Cast = P.Cast() | |||
| def construct(self, id_hldr, wt_hldr): | |||
| """ | |||
| Args: | |||
| id_hldr: batch ids; [bs, field_size] | |||
| wt_hldr: batch weights; [bs, field_size] | |||
| """ | |||
| mask = self.Reshape(wt_hldr, (self.batch_size, self.field_size, 1)) | |||
| # Linear layer | |||
| fm_id_weight = self.Gatherv2(self.fm_w, id_hldr, 0) | |||
| wx = self.Mul(fm_id_weight, mask) | |||
| linear_out = self.ReduceSum(wx, 1) | |||
| # FM layer | |||
| fm_id_embs = self.Gatherv2(self.embedding_table, id_hldr, 0) | |||
| vx = self.Mul(fm_id_embs, mask) | |||
| v1 = self.ReduceSum(vx, 1) | |||
| v1 = self.Square(v1) | |||
| v2 = self.Square(vx) | |||
| v2 = self.ReduceSum(v2, 1) | |||
| fm_out = 0.5 * self.ReduceSum(v1 - v2, 1) | |||
| fm_out = self.Reshape(fm_out, (-1, 1)) | |||
| # Deep layer | |||
| b = self.Reshape(self.fm_b, (1, 1)) | |||
| b = self.Tile(b, (self.batch_size, 1)) | |||
| deep_in = self.Reshape(vx, (-1, self.field_size * self.emb_dim)) | |||
| deep_in = self.Concat((deep_in, b)) | |||
| deep_in = self.dense_layer_1(deep_in) | |||
| deep_in = self.dense_layer_2(deep_in) | |||
| deep_in = self.dense_layer_3(deep_in) | |||
| deep_out = self.dense_layer_4(deep_in) | |||
| out = linear_out + fm_out + deep_out | |||
| return out, fm_id_weight, fm_id_embs | |||
| class NetWithLossClass(nn.Cell): | |||
| """ | |||
| NetWithLossClass definition. | |||
| """ | |||
| def __init__(self, network, l2_coef=1e-6): | |||
| super(NetWithLossClass, self).__init__(auto_prefix=False) | |||
| self.loss = P.SigmoidCrossEntropyWithLogits() | |||
| self.network = network | |||
| self.l2_coef = l2_coef | |||
| self.Square = P.Square() | |||
| self.ReduceMean_false = P.ReduceMean(keep_dims=False) | |||
| self.ReduceSum_false = P.ReduceSum(keep_dims=False) | |||
| def construct(self, batch_ids, batch_wts, label): | |||
| predict, fm_id_weight, fm_id_embs = self.network(batch_ids, batch_wts) | |||
| log_loss = self.loss(predict, label) | |||
| mean_log_loss = self.ReduceMean_false(log_loss) | |||
| l2_loss_w = self.ReduceSum_false(self.Square(fm_id_weight)) | |||
| l2_loss_v = self.ReduceSum_false(self.Square(fm_id_embs)) | |||
| l2_loss_all = self.l2_coef * (l2_loss_v + l2_loss_w) * 0.5 | |||
| loss = mean_log_loss + l2_loss_all | |||
| return loss | |||
| class TrainStepWrap(nn.Cell): | |||
| """ | |||
| TrainStepWrap definition | |||
| """ | |||
| def __init__(self, network, lr=5e-8, eps=1e-8, loss_scale=1000.0): | |||
| super(TrainStepWrap, self).__init__(auto_prefix=False) | |||
| self.network = network | |||
| self.network.set_train() | |||
| self.weights = ParameterTuple(network.trainable_params()) | |||
| self.optimizer = Adam(self.weights, learning_rate=lr, eps=eps, loss_scale=loss_scale) | |||
| self.hyper_map = C.HyperMap() | |||
| self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True) | |||
| self.sens = loss_scale | |||
| def construct(self, batch_ids, batch_wts, label): | |||
| weights = self.weights | |||
| loss = self.network(batch_ids, batch_wts, label) | |||
| sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) # | |||
| grads = self.grad(self.network, weights)(batch_ids, batch_wts, label, sens) | |||
| return F.depend(loss, self.optimizer(grads)) | |||
| class PredictWithSigmoid(nn.Cell): | |||
| """ | |||
| Eval model with sigmoid. | |||
| """ | |||
| def __init__(self, network): | |||
| super(PredictWithSigmoid, self).__init__(auto_prefix=False) | |||
| self.network = network | |||
| self.sigmoid = P.Sigmoid() | |||
| def construct(self, batch_ids, batch_wts, labels): | |||
| logits, _, _, = self.network(batch_ids, batch_wts) | |||
| pred_probs = self.sigmoid(logits) | |||
| return logits, pred_probs, labels | |||
| class ModelBuilder: | |||
| """ | |||
| Model builder for DeepFM. | |||
| Args: | |||
| model_config (ModelConfig): Model configuration. | |||
| train_config (TrainConfig): Train configuration. | |||
| """ | |||
| def __init__(self, model_config, train_config): | |||
| self.model_config = model_config | |||
| self.train_config = train_config | |||
| def get_callback_list(self, model=None, eval_dataset=None): | |||
| """ | |||
| Get callbacks which contains checkpoint callback, eval callback and loss callback. | |||
| Args: | |||
| model (Cell): The network is added callback (default=None). | |||
| eval_dataset (Dataset): Dataset for eval (default=None). | |||
| """ | |||
| callback_list = [] | |||
| if self.train_config.save_checkpoint: | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=self.train_config.save_checkpoint_steps, | |||
| keep_checkpoint_max=self.train_config.keep_checkpoint_max) | |||
| ckpt_cb = ModelCheckpoint(prefix=self.train_config.ckpt_file_name_prefix, | |||
| directory=self.train_config.output_path, | |||
| config=config_ck) | |||
| callback_list.append(ckpt_cb) | |||
| if self.train_config.eval_callback: | |||
| if model is None: | |||
| raise RuntimeError("train_config.eval_callback is {}; get_callback_list() args model is {}".format( | |||
| self.train_config.eval_callback, model)) | |||
| if eval_dataset is None: | |||
| raise RuntimeError("train_config.eval_callback is {}; get_callback_list() " | |||
| "args eval_dataset is {}".format(self.train_config.eval_callback, eval_dataset)) | |||
| auc_metric = AUCMetric() | |||
| eval_callback = EvalCallBack(model, eval_dataset, auc_metric, | |||
| eval_file_path=os.path.join(self.train_config.output_path, | |||
| self.train_config.eval_file_name)) | |||
| callback_list.append(eval_callback) | |||
| if self.train_config.loss_callback: | |||
| loss_callback = LossCallBack(loss_file_path=os.path.join(self.train_config.output_path, | |||
| self.train_config.loss_file_name)) | |||
| callback_list.append(loss_callback) | |||
| if callback_list: | |||
| return callback_list | |||
| return None | |||
| def get_train_eval_net(self): | |||
| deepfm_net = DeepFMModel(self.model_config) | |||
| loss_net = NetWithLossClass(deepfm_net, l2_coef=self.train_config.l2_coef) | |||
| train_net = TrainStepWrap(loss_net, lr=self.train_config.learning_rate, | |||
| eps=self.train_config.epsilon, | |||
| loss_scale=self.train_config.loss_scale) | |||
| eval_net = PredictWithSigmoid(deepfm_net) | |||
| return train_net, eval_net | |||
| @@ -0,0 +1,91 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train_criteo.""" | |||
| import os | |||
| import sys | |||
| import argparse | |||
| from mindspore import context, ParallelMode | |||
| from mindspore.communication.management import init | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor | |||
| from src.deepfm import ModelBuilder, AUCMetric | |||
| from src.config import DataConfig, ModelConfig, TrainConfig | |||
| from src.dataset import create_dataset, DataType | |||
| from src.callback import EvalCallBack, LossCallBack | |||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||
| parser = argparse.ArgumentParser(description='CTR Prediction') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| parser.add_argument('--ckpt_path', type=str, default=None, help='Checkpoint path') | |||
| parser.add_argument('--eval_file_name', type=str, default="./auc.log", help='eval file path') | |||
| parser.add_argument('--loss_file_name', type=str, default="./loss.log", help='loss file path') | |||
| parser.add_argument('--do_eval', type=bool, default=True, help='Do evaluation or not.') | |||
| args_opt, _ = parser.parse_known_args() | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id) | |||
| if __name__ == '__main__': | |||
| data_config = DataConfig() | |||
| model_config = ModelConfig() | |||
| train_config = TrainConfig() | |||
| rank_size = int(os.environ.get("RANK_SIZE", 1)) | |||
| if rank_size > 1: | |||
| context.reset_auto_parallel_context() | |||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) | |||
| init() | |||
| rank_id = int(os.environ.get('RANK_ID')) | |||
| else: | |||
| rank_size = None | |||
| rank_id = None | |||
| ds_train = create_dataset(args_opt.dataset_path, | |||
| train_mode=True, | |||
| epochs=train_config.train_epochs, | |||
| batch_size=train_config.batch_size, | |||
| data_type=DataType(data_config.data_format), | |||
| rank_size=rank_size, | |||
| rank_id=rank_id) | |||
| model_builder = ModelBuilder(ModelConfig, TrainConfig) | |||
| train_net, eval_net = model_builder.get_train_eval_net() | |||
| auc_metric = AUCMetric() | |||
| model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) | |||
| time_callback = TimeMonitor(data_size=ds_train.get_dataset_size()) | |||
| loss_callback = LossCallBack(loss_file_path=args_opt.loss_file_name) | |||
| callback_list = [time_callback, loss_callback] | |||
| if train_config.save_checkpoint: | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=train_config.save_checkpoint_steps, | |||
| keep_checkpoint_max=train_config.keep_checkpoint_max) | |||
| ckpt_cb = ModelCheckpoint(prefix=train_config.ckpt_file_name_prefix, | |||
| directory=args_opt.ckpt_path, | |||
| config=config_ck) | |||
| callback_list.append(ckpt_cb) | |||
| if args_opt.do_eval: | |||
| ds_eval = create_dataset(args_opt.dataset_path, train_mode=False, | |||
| epochs=train_config.train_epochs, | |||
| batch_size=train_config.batch_size, | |||
| data_type=DataType(data_config.data_format)) | |||
| eval_callback = EvalCallBack(model, ds_eval, auc_metric, | |||
| eval_file_path=args_opt.eval_file_name) | |||
| callback_list.append(eval_callback) | |||
| model.train(train_config.train_epochs, ds_train, callbacks=callback_list) | |||
| @@ -0,0 +1,66 @@ | |||
| # Deeplab-V3 Example | |||
| ## Description | |||
| This is an example of training DeepLabv3 with PASCAL VOC 2012 dataset in MindSpore. | |||
| ## Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the VOC 2012 dataset for training. | |||
| > Notes: | |||
| If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file. | |||
| ## Running the Example | |||
| ### Training | |||
| - Set options in config.py. | |||
| - Run `run_standalone_train.sh` for non-distributed training. | |||
| ``` bash | |||
| sh scripts/run_standalone_train.sh DEVICE_ID EPOCH_SIZE DATA_DIR | |||
| ``` | |||
| - Run `run_distribute_train.sh` for distributed training. | |||
| ``` bash | |||
| sh scripts/run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH | |||
| ``` | |||
| ### Evaluation | |||
| Set options in evaluation_config.py. Make sure the 'data_file' and 'finetune_ckpt' are set to your own path. | |||
| - Run run_eval.sh for evaluation. | |||
| ``` bash | |||
| sh scripts/run_eval.sh DEVICE_ID DATA_DIR | |||
| ``` | |||
| ## Options and Parameters | |||
| It contains of parameters of Deeplab-V3 model and options for training, which is set in file config.py. | |||
| ### Options: | |||
| ``` | |||
| config.py: | |||
| learning_rate Learning rate, default is 0.0014. | |||
| weight_decay Weight decay, default is 5e-5. | |||
| momentum Momentum, default is 0.97. | |||
| crop_size Image crop size [height, width] during training, default is 513. | |||
| eval_scales The scales to resize images for evaluation, default is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]. | |||
| output_stride The ratio of input to output spatial resolution, default is 16. | |||
| ignore_label Ignore label value, default is 255. | |||
| seg_num_classes Number of semantic classes, including the background class (if exists). | |||
| foreground classes + 1 background class in the PASCAL VOC 2012 dataset, default is 21. | |||
| fine_tune_batch_norm Fine tune the batch norm parameters or not, default is False. | |||
| atrous_rates Atrous rates for atrous spatial pyramid pooling, default is None. | |||
| decoder_output_stride The ratio of input to output spatial resolution when employing decoder | |||
| to refine segmentation results, default is None. | |||
| image_pyramid Input scales for multi-scale feature extraction, default is None. | |||
| ``` | |||
| ### Parameters: | |||
| ``` | |||
| Parameters for dataset and network: | |||
| distribute Run distribute, default is false. | |||
| epoch_size Epoch size, default is 6. | |||
| batch_size batch size of input dataset: N, default is 2. | |||
| data_url Train/Evaluation data url, required. | |||
| checkpoint_url Checkpoint path, default is None. | |||
| enable_save_ckpt Enable save checkpoint, default is true. | |||
| save_checkpoint_steps Save checkpoint steps, default is 1000. | |||
| save_checkpoint_num Save checkpoint numbers, default is 1. | |||
| ``` | |||
| @@ -0,0 +1,53 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """evaluation.""" | |||
| import argparse | |||
| from mindspore import context | |||
| from mindspore import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from src.md_dataset import create_dataset | |||
| from src.losses import OhemLoss | |||
| from src.miou_precision import MiouPrecision | |||
| from src.deeplabv3 import deeplabv3_resnet50 | |||
| from src.config import config | |||
| parser = argparse.ArgumentParser(description="Deeplabv3 evaluation") | |||
| parser.add_argument('--epoch_size', type=int, default=2, help='Epoch size.') | |||
| parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") | |||
| parser.add_argument('--batch_size', type=int, default=2, help='Batch size.') | |||
| parser.add_argument('--data_url', required=True, default=None, help='Evaluation data url') | |||
| parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path') | |||
| args_opt = parser.parse_args() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) | |||
| print(args_opt) | |||
| if __name__ == "__main__": | |||
| args_opt.crop_size = config.crop_size | |||
| args_opt.base_size = config.crop_size | |||
| eval_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="eval") | |||
| net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size], | |||
| infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates, | |||
| decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride, | |||
| fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid) | |||
| param_dict = load_checkpoint(args_opt.checkpoint_url) | |||
| load_param_into_net(net, param_dict) | |||
| mIou = MiouPrecision(config.seg_num_classes) | |||
| metrics = {'mIou': mIou} | |||
| loss = OhemLoss(config.seg_num_classes, config.ignore_label) | |||
| model = Model(net, loss, metrics=metrics) | |||
| model.eval(eval_dataset) | |||
| @@ -0,0 +1,66 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| echo "==============================================================================================================" | |||
| echo "Please run the scipt as: " | |||
| echo "bash run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH" | |||
| echo "for example: bash run_distribute_train.sh 8 40 /path/zh-wiki/ /path/hccl.json" | |||
| echo "It is better to use absolute path." | |||
| echo "==============================================================================================================" | |||
| EPOCH_SIZE=$2 | |||
| DATA_DIR=$3 | |||
| export MINDSPORE_HCCL_CONFIG_PATH=$4 | |||
| export RANK_TABLE_FILE=$4 | |||
| export RANK_SIZE=$1 | |||
| cores=`cat /proc/cpuinfo|grep "processor" |wc -l` | |||
| echo "the number of logical core" $cores | |||
| avg_core_per_rank=`expr $cores \/ $RANK_SIZE` | |||
| core_gap=`expr $avg_core_per_rank \- 1` | |||
| echo "avg_core_per_rank" $avg_core_per_rank | |||
| echo "core_gap" $core_gap | |||
| for((i=0;i<RANK_SIZE;i++)) | |||
| do | |||
| start=`expr $i \* $avg_core_per_rank` | |||
| export DEVICE_ID=$i | |||
| export RANK_ID=$i | |||
| export DEPLOY_MODE=0 | |||
| export GE_USE_STATIC_MEMORY=1 | |||
| end=`expr $start \+ $core_gap` | |||
| cmdopt=$start"-"$end | |||
| rm -rf LOG$i | |||
| mkdir ./LOG$i | |||
| cp *.py ./LOG$i | |||
| cd ./LOG$i || exit | |||
| echo "start training for rank $i, device $DEVICE_ID" | |||
| mkdir -p ms_log | |||
| CUR_DIR=`pwd` | |||
| export GLOG_log_dir=${CUR_DIR}/ms_log | |||
| export GLOG_logtostderr=0 | |||
| env > env.log | |||
| taskset -c $cmdopt python ../train.py \ | |||
| --distribute="true" \ | |||
| --epoch_size=$EPOCH_SIZE \ | |||
| --device_id=$DEVICE_ID \ | |||
| --enable_save_ckpt="true" \ | |||
| --checkpoint_url="" \ | |||
| --save_checkpoint_steps=10000 \ | |||
| --save_checkpoint_num=1 \ | |||
| --data_url=$DATA_DIR > log.txt 2>&1 & | |||
| cd ../ | |||
| done | |||
| @@ -0,0 +1,32 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| echo "==============================================================================================================" | |||
| echo "Please run the scipt as: " | |||
| echo "bash run_eval.sh DEVICE_ID DATA_DIR" | |||
| echo "for example: bash run_eval.sh /path/zh-wiki/ " | |||
| echo "==============================================================================================================" | |||
| DEVICE_ID=$1 | |||
| DATA_DIR=$2 | |||
| mkdir -p ms_log | |||
| CUR_DIR=`pwd` | |||
| export GLOG_log_dir=${CUR_DIR}/ms_log | |||
| export GLOG_logtostderr=0 | |||
| python evaluation.py \ | |||
| --device_id=$DEVICE_ID \ | |||
| --checkpoint_url="" \ | |||
| --data_url=$DATA_DIR > log.txt 2>&1 & | |||
| @@ -0,0 +1,38 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| echo "==============================================================================================================" | |||
| echo "Please run the scipt as: " | |||
| echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR" | |||
| echo "for example: bash run_standalone_train.sh 0 40 /path/zh-wiki/ " | |||
| echo "==============================================================================================================" | |||
| DEVICE_ID=$1 | |||
| EPOCH_SIZE=$2 | |||
| DATA_DIR=$3 | |||
| mkdir -p ms_log | |||
| CUR_DIR=`pwd` | |||
| export GLOG_log_dir=${CUR_DIR}/ms_log | |||
| export GLOG_logtostderr=0 | |||
| python train.py \ | |||
| --distribute="false" \ | |||
| --epoch_size=$EPOCH_SIZE \ | |||
| --device_id=$DEVICE_ID \ | |||
| --enable_save_ckpt="true" \ | |||
| --checkpoint_url="" \ | |||
| --save_checkpoint_steps=10000 \ | |||
| --save_checkpoint_num=1 \ | |||
| --data_url=$DATA_DIR > log.txt 2>&1 & | |||
| @@ -0,0 +1,23 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Init DeepLabv3.""" | |||
| from .deeplabv3 import ASPP, DeepLabV3, deeplabv3_resnet50 | |||
| from .backbone import * | |||
| __all__ = [ | |||
| "ASPP", "DeepLabV3", "deeplabv3_resnet50" | |||
| ] | |||
| __all__.extend(backbone.__all__) | |||
| @@ -0,0 +1,21 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Init backbone.""" | |||
| from .resnet_deeplab import Subsample, DepthwiseConv2dNative, SpaceToBatch, BatchToSpace, ResNetV1, \ | |||
| RootBlockBeta, resnet50_dl | |||
| __all__ = [ | |||
| "Subsample", "DepthwiseConv2dNative", "SpaceToBatch", "BatchToSpace", "ResNetV1", "RootBlockBeta", "resnet50_dl" | |||
| ] | |||
| @@ -0,0 +1,577 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ResNet based DeepLab.""" | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore._checkparam import twice | |||
| from mindspore.common.parameter import Parameter | |||
| def _conv_bn_relu(in_channel, | |||
| out_channel, | |||
| ksize, | |||
| stride=1, | |||
| padding=0, | |||
| dilation=1, | |||
| pad_mode="pad", | |||
| use_batch_statistics=False): | |||
| """Get a conv2d -> batchnorm -> relu layer""" | |||
| return nn.SequentialCell( | |||
| [nn.Conv2d(in_channel, | |||
| out_channel, | |||
| kernel_size=ksize, | |||
| stride=stride, | |||
| padding=padding, | |||
| dilation=dilation, | |||
| pad_mode=pad_mode), | |||
| nn.BatchNorm2d(out_channel, use_batch_statistics=use_batch_statistics), | |||
| nn.ReLU()] | |||
| ) | |||
| def _deep_conv_bn_relu(in_channel, | |||
| channel_multiplier, | |||
| ksize, | |||
| stride=1, | |||
| padding=0, | |||
| dilation=1, | |||
| pad_mode="pad", | |||
| use_batch_statistics=False): | |||
| """Get a spacetobatch -> conv2d -> batchnorm -> relu -> batchtospace layer""" | |||
| return nn.SequentialCell( | |||
| [DepthwiseConv2dNative(in_channel, | |||
| channel_multiplier, | |||
| kernel_size=ksize, | |||
| stride=stride, | |||
| padding=padding, | |||
| dilation=dilation, | |||
| pad_mode=pad_mode), | |||
| nn.BatchNorm2d(channel_multiplier * in_channel, use_batch_statistics=use_batch_statistics), | |||
| nn.ReLU()] | |||
| ) | |||
| def _stob_deep_conv_btos_bn_relu(in_channel, | |||
| channel_multiplier, | |||
| ksize, | |||
| space_to_batch_block_shape, | |||
| batch_to_space_block_shape, | |||
| paddings, | |||
| crops, | |||
| stride=1, | |||
| padding=0, | |||
| dilation=1, | |||
| pad_mode="pad", | |||
| use_batch_statistics=False): | |||
| """Get a spacetobatch -> conv2d -> batchnorm -> relu -> batchtospace layer""" | |||
| return nn.SequentialCell( | |||
| [SpaceToBatch(space_to_batch_block_shape, paddings), | |||
| DepthwiseConv2dNative(in_channel, | |||
| channel_multiplier, | |||
| kernel_size=ksize, | |||
| stride=stride, | |||
| padding=padding, | |||
| dilation=dilation, | |||
| pad_mode=pad_mode), | |||
| BatchToSpace(batch_to_space_block_shape, crops), | |||
| nn.BatchNorm2d(channel_multiplier * in_channel, use_batch_statistics=use_batch_statistics), | |||
| nn.ReLU()] | |||
| ) | |||
| def _stob_conv_btos_bn_relu(in_channel, | |||
| out_channel, | |||
| ksize, | |||
| space_to_batch_block_shape, | |||
| batch_to_space_block_shape, | |||
| paddings, | |||
| crops, | |||
| stride=1, | |||
| padding=0, | |||
| dilation=1, | |||
| pad_mode="pad", | |||
| use_batch_statistics=False): | |||
| """Get a spacetobatch -> conv2d -> batchnorm -> relu -> batchtospace layer""" | |||
| return nn.SequentialCell([SpaceToBatch(space_to_batch_block_shape, paddings), | |||
| nn.Conv2d(in_channel, | |||
| out_channel, | |||
| kernel_size=ksize, | |||
| stride=stride, | |||
| padding=padding, | |||
| dilation=dilation, | |||
| pad_mode=pad_mode), | |||
| BatchToSpace(batch_to_space_block_shape, crops), | |||
| nn.BatchNorm2d(out_channel, use_batch_statistics=use_batch_statistics), | |||
| nn.ReLU()] | |||
| ) | |||
| def _make_layer(block, | |||
| in_channels, | |||
| out_channels, | |||
| num_blocks, | |||
| stride=1, | |||
| rate=1, | |||
| multi_grads=None, | |||
| output_stride=None, | |||
| g_current_stride=2, | |||
| g_rate=1): | |||
| """Make layer for DeepLab-ResNet network.""" | |||
| if multi_grads is None: | |||
| multi_grads = [1] * num_blocks | |||
| # (stride == 2, num_blocks == 4 --> strides == [1, 1, 1, 2]) | |||
| strides = [1] * (num_blocks - 1) + [stride] | |||
| blocks = [] | |||
| if output_stride is not None: | |||
| if output_stride % 4 != 0: | |||
| raise ValueError('The output_stride needs to be a multiple of 4.') | |||
| output_stride //= 4 | |||
| for i_stride, _ in enumerate(strides): | |||
| if output_stride is not None and g_current_stride > output_stride: | |||
| raise ValueError('The target output_stride cannot be reached.') | |||
| if output_stride is not None and g_current_stride == output_stride: | |||
| b_rate = g_rate | |||
| b_stride = 1 | |||
| g_rate *= strides[i_stride] | |||
| else: | |||
| b_rate = rate | |||
| b_stride = strides[i_stride] | |||
| g_current_stride *= strides[i_stride] | |||
| blocks.append(block(in_channels=in_channels, | |||
| out_channels=out_channels, | |||
| stride=b_stride, | |||
| rate=b_rate, | |||
| multi_grad=multi_grads[i_stride])) | |||
| in_channels = out_channels | |||
| layer = nn.SequentialCell(blocks) | |||
| return layer, g_current_stride, g_rate | |||
| class Subsample(nn.Cell): | |||
| """ | |||
| Subsample for DeepLab-ResNet. | |||
| Args: | |||
| factor (int): Sample factor. | |||
| Returns: | |||
| Tensor, the sub sampled tensor. | |||
| Examples: | |||
| >>> Subsample(2) | |||
| """ | |||
| def __init__(self, factor): | |||
| super(Subsample, self).__init__() | |||
| self.factor = factor | |||
| self.pool = nn.MaxPool2d(kernel_size=1, | |||
| stride=factor) | |||
| def construct(self, x): | |||
| if self.factor == 1: | |||
| return x | |||
| return self.pool(x) | |||
| class SpaceToBatch(nn.Cell): | |||
| def __init__(self, block_shape, paddings): | |||
| super(SpaceToBatch, self).__init__() | |||
| self.space_to_batch = P.SpaceToBatch(block_shape, paddings) | |||
| self.bs = block_shape | |||
| self.pd = paddings | |||
| def construct(self, x): | |||
| return self.space_to_batch(x) | |||
| class BatchToSpace(nn.Cell): | |||
| def __init__(self, block_shape, crops): | |||
| super(BatchToSpace, self).__init__() | |||
| self.batch_to_space = P.BatchToSpace(block_shape, crops) | |||
| self.bs = block_shape | |||
| self.cr = crops | |||
| def construct(self, x): | |||
| return self.batch_to_space(x) | |||
| class _DepthwiseConv2dNative(nn.Cell): | |||
| """Depthwise Conv2D Cell.""" | |||
| def __init__(self, | |||
| in_channels, | |||
| channel_multiplier, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| weight_init): | |||
| super(_DepthwiseConv2dNative, self).__init__() | |||
| self.in_channels = in_channels | |||
| self.channel_multiplier = channel_multiplier | |||
| self.kernel_size = kernel_size | |||
| self.stride = stride | |||
| self.pad_mode = pad_mode | |||
| self.padding = padding | |||
| self.dilation = dilation | |||
| self.group = group | |||
| if not (isinstance(in_channels, int) and in_channels > 0): | |||
| raise ValueError('Attr \'in_channels\' of \'DepthwiseConv2D\' Op passed ' | |||
| + str(in_channels) + ', should be a int and greater than 0.') | |||
| if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \ | |||
| (not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \ | |||
| kernel_size[0] < 1 or kernel_size[1] < 1: | |||
| raise ValueError('Attr \'kernel_size\' of \'DepthwiseConv2D\' Op passed ' | |||
| + str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.') | |||
| self.weight = Parameter(initializer(weight_init, [1, in_channels // group, *kernel_size]), | |||
| name='weight') | |||
| def construct(self, *inputs): | |||
| """Must be overridden by all subclasses.""" | |||
| raise NotImplementedError | |||
| class DepthwiseConv2dNative(_DepthwiseConv2dNative): | |||
| """Depthwise Conv2D Cell.""" | |||
| def __init__(self, | |||
| in_channels, | |||
| channel_multiplier, | |||
| kernel_size, | |||
| stride=1, | |||
| pad_mode='same', | |||
| padding=0, | |||
| dilation=1, | |||
| group=1, | |||
| weight_init='normal'): | |||
| kernel_size = twice(kernel_size) | |||
| super(DepthwiseConv2dNative, self).__init__( | |||
| in_channels, | |||
| channel_multiplier, | |||
| kernel_size, | |||
| stride, | |||
| pad_mode, | |||
| padding, | |||
| dilation, | |||
| group, | |||
| weight_init) | |||
| self.depthwise_conv2d_native = P.DepthwiseConv2dNative(channel_multiplier=self.channel_multiplier, | |||
| kernel_size=self.kernel_size, | |||
| mode=3, | |||
| pad_mode=self.pad_mode, | |||
| pad=self.padding, | |||
| stride=self.stride, | |||
| dilation=self.dilation, | |||
| group=self.group) | |||
| def set_strategy(self, strategy): | |||
| self.depthwise_conv2d_native.set_strategy(strategy) | |||
| return self | |||
| def construct(self, x): | |||
| return self.depthwise_conv2d_native(x, self.weight) | |||
| class BottleneckV1(nn.Cell): | |||
| """ | |||
| ResNet V1 BottleneckV1 block definition. | |||
| Args: | |||
| in_channels (int): Input channel. | |||
| out_channels (int): Output channel. | |||
| stride (int): Stride size for the initial convolutional layer. Default: 1. | |||
| rate (int): Rate for convolution. Default: 1. | |||
| multi_grad (int): Employ a rate within network. Default: 1. | |||
| Returns: | |||
| Tensor, the ResNet unit's output. | |||
| Examples: | |||
| >>> BottleneckV1(3,256,stride=2) | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| stride=1, | |||
| use_batch_statistics=False, | |||
| use_batch_to_stob_and_btos=False): | |||
| super(BottleneckV1, self).__init__() | |||
| expansion = 4 | |||
| mid_channels = out_channels // expansion | |||
| self.conv_bn1 = _conv_bn_relu(in_channels, | |||
| mid_channels, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=use_batch_statistics) | |||
| self.conv_bn2 = _conv_bn_relu(mid_channels, | |||
| mid_channels, | |||
| ksize=3, | |||
| stride=stride, | |||
| padding=1, | |||
| dilation=1, | |||
| use_batch_statistics=use_batch_statistics) | |||
| if use_batch_to_stob_and_btos: | |||
| self.conv_bn2 = _stob_conv_btos_bn_relu(mid_channels, | |||
| mid_channels, | |||
| ksize=3, | |||
| stride=stride, | |||
| padding=0, | |||
| dilation=1, | |||
| space_to_batch_block_shape=2, | |||
| batch_to_space_block_shape=2, | |||
| paddings=[[2, 3], [2, 3]], | |||
| crops=[[0, 1], [0, 1]], | |||
| pad_mode="valid", | |||
| use_batch_statistics=use_batch_statistics) | |||
| self.conv3 = nn.Conv2d(mid_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=1) | |||
| self.bn3 = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) | |||
| if in_channels != out_channels: | |||
| conv = nn.Conv2d(in_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=stride) | |||
| bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) | |||
| self.downsample = nn.SequentialCell([conv, bn]) | |||
| else: | |||
| self.downsample = Subsample(stride) | |||
| self.add = P.TensorAdd() | |||
| self.relu = nn.ReLU() | |||
| self.Reshape = P.Reshape() | |||
| def construct(self, x): | |||
| out = self.conv_bn1(x) | |||
| out = self.conv_bn2(out) | |||
| out = self.bn3(self.conv3(out)) | |||
| out = self.add(out, self.downsample(x)) | |||
| out = self.relu(out) | |||
| return out | |||
| class BottleneckV2(nn.Cell): | |||
| """ | |||
| ResNet V2 Bottleneck variance V2 block definition. | |||
| Args: | |||
| in_channels (int): Input channel. | |||
| out_channels (int): Output channel. | |||
| stride (int): Stride size for the initial convolutional layer. Default: 1. | |||
| Returns: | |||
| Tensor, the ResNet unit's output. | |||
| Examples: | |||
| >>> BottleneckV2(3,256,stride=2) | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| stride=1, | |||
| use_batch_statistics=False, | |||
| use_batch_to_stob_and_btos=False, | |||
| dilation=1): | |||
| super(BottleneckV2, self).__init__() | |||
| expansion = 4 | |||
| mid_channels = out_channels // expansion | |||
| self.conv_bn1 = _conv_bn_relu(in_channels, | |||
| mid_channels, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=use_batch_statistics) | |||
| self.conv_bn2 = _conv_bn_relu(mid_channels, | |||
| mid_channels, | |||
| ksize=3, | |||
| stride=stride, | |||
| padding=1, | |||
| dilation=dilation, | |||
| use_batch_statistics=use_batch_statistics) | |||
| if use_batch_to_stob_and_btos: | |||
| self.conv_bn2 = _stob_conv_btos_bn_relu(mid_channels, | |||
| mid_channels, | |||
| ksize=3, | |||
| stride=stride, | |||
| padding=0, | |||
| dilation=1, | |||
| space_to_batch_block_shape=2, | |||
| batch_to_space_block_shape=2, | |||
| paddings=[[2, 3], [2, 3]], | |||
| crops=[[0, 1], [0, 1]], | |||
| pad_mode="valid", | |||
| use_batch_statistics=use_batch_statistics) | |||
| self.conv3 = nn.Conv2d(mid_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=1) | |||
| self.bn3 = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) | |||
| if in_channels != out_channels: | |||
| conv = nn.Conv2d(in_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=stride) | |||
| bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) | |||
| self.downsample = nn.SequentialCell([conv, bn]) | |||
| else: | |||
| self.downsample = Subsample(stride) | |||
| self.add = P.TensorAdd() | |||
| self.relu = nn.ReLU() | |||
| def construct(self, x): | |||
| out = self.conv_bn1(x) | |||
| out = self.conv_bn2(out) | |||
| out = self.bn3(self.conv3(out)) | |||
| out = self.add(out, x) | |||
| out = self.relu(out) | |||
| return out | |||
| class BottleneckV3(nn.Cell): | |||
| """ | |||
| ResNet V1 Bottleneck variance V1 block definition. | |||
| Args: | |||
| in_channels (int): Input channel. | |||
| out_channels (int): Output channel. | |||
| stride (int): Stride size for the initial convolutional layer. Default: 1. | |||
| Returns: | |||
| Tensor, the ResNet unit's output. | |||
| Examples: | |||
| >>> BottleneckV3(3,256,stride=2) | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| stride=1, | |||
| use_batch_statistics=False): | |||
| super(BottleneckV3, self).__init__() | |||
| expansion = 4 | |||
| mid_channels = out_channels // expansion | |||
| self.conv_bn1 = _conv_bn_relu(in_channels, | |||
| mid_channels, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=use_batch_statistics) | |||
| self.conv_bn2 = _conv_bn_relu(mid_channels, | |||
| mid_channels, | |||
| ksize=3, | |||
| stride=stride, | |||
| padding=1, | |||
| dilation=1, | |||
| use_batch_statistics=use_batch_statistics) | |||
| self.conv3 = nn.Conv2d(mid_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=1) | |||
| self.bn3 = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) | |||
| if in_channels != out_channels: | |||
| conv = nn.Conv2d(in_channels, | |||
| out_channels, | |||
| kernel_size=1, | |||
| stride=stride) | |||
| bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) | |||
| self.downsample = nn.SequentialCell([conv, bn]) | |||
| else: | |||
| self.downsample = Subsample(stride) | |||
| self.downsample = Subsample(stride) | |||
| self.add = P.TensorAdd() | |||
| self.relu = nn.ReLU() | |||
| def construct(self, x): | |||
| out = self.conv_bn1(x) | |||
| out = self.conv_bn2(out) | |||
| out = self.bn3(self.conv3(out)) | |||
| out = self.add(out, self.downsample(x)) | |||
| out = self.relu(out) | |||
| return out | |||
| class ResNetV1(nn.Cell): | |||
| """ | |||
| ResNet V1 for DeepLab. | |||
| Args: | |||
| Returns: | |||
| Tuple, output tensor tuple, (c2,c5). | |||
| Examples: | |||
| >>> ResNetV1(False) | |||
| """ | |||
| def __init__(self, fine_tune_batch_norm=False): | |||
| super(ResNetV1, self).__init__() | |||
| self.layer_root = nn.SequentialCell( | |||
| [RootBlockBeta(fine_tune_batch_norm), | |||
| nn.MaxPool2d(kernel_size=(3, 3), | |||
| stride=(2, 2), | |||
| pad_mode='same')]) | |||
| self.layer1_1 = BottleneckV1(128, 256, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer1_2 = BottleneckV2(256, 256, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer1_3 = BottleneckV3(256, 256, stride=2, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer2_1 = BottleneckV1(256, 512, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer2_2 = BottleneckV2(512, 512, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer2_3 = BottleneckV2(512, 512, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer2_4 = BottleneckV3(512, 512, stride=2, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer3_1 = BottleneckV1(512, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer3_2 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer3_3 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer3_4 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer3_5 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer3_6 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer4_1 = BottleneckV1(1024, 2048, stride=1, use_batch_to_stob_and_btos=True, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer4_2 = BottleneckV2(2048, 2048, stride=1, use_batch_to_stob_and_btos=True, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.layer4_3 = BottleneckV2(2048, 2048, stride=1, use_batch_to_stob_and_btos=True, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| def construct(self, x): | |||
| x = self.layer_root(x) | |||
| x = self.layer1_1(x) | |||
| c2 = self.layer1_2(x) | |||
| x = self.layer1_3(c2) | |||
| x = self.layer2_1(x) | |||
| x = self.layer2_2(x) | |||
| x = self.layer2_3(x) | |||
| x = self.layer2_4(x) | |||
| x = self.layer3_1(x) | |||
| x = self.layer3_2(x) | |||
| x = self.layer3_3(x) | |||
| x = self.layer3_4(x) | |||
| x = self.layer3_5(x) | |||
| x = self.layer3_6(x) | |||
| x = self.layer4_1(x) | |||
| x = self.layer4_2(x) | |||
| c5 = self.layer4_3(x) | |||
| return c2, c5 | |||
| class RootBlockBeta(nn.Cell): | |||
| """ | |||
| ResNet V1 beta root block definition. | |||
| Returns: | |||
| Tensor, the block unit's output. | |||
| Examples: | |||
| >>> RootBlockBeta() | |||
| """ | |||
| def __init__(self, fine_tune_batch_norm=False): | |||
| super(RootBlockBeta, self).__init__() | |||
| self.conv1 = _conv_bn_relu(3, 64, ksize=3, stride=2, padding=0, pad_mode="valid", | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.conv2 = _conv_bn_relu(64, 64, ksize=3, stride=1, padding=0, pad_mode="same", | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.conv3 = _conv_bn_relu(64, 128, ksize=3, stride=1, padding=0, pad_mode="same", | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| def construct(self, x): | |||
| x = self.conv1(x) | |||
| x = self.conv2(x) | |||
| x = self.conv3(x) | |||
| return x | |||
| def resnet50_dl(fine_tune_batch_norm=False): | |||
| return ResNetV1(fine_tune_batch_norm) | |||
| @@ -0,0 +1,33 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in train.py and evaluation.py | |||
| """ | |||
| from easydict import EasyDict as ed | |||
| config = ed({ | |||
| "learning_rate": 0.0014, | |||
| "weight_decay": 0.00005, | |||
| "momentum": 0.97, | |||
| "crop_size": 513, | |||
| "eval_scales": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75], | |||
| "atrous_rates": None, | |||
| "image_pyramid": None, | |||
| "output_stride": 16, | |||
| "fine_tune_batch_norm": False, | |||
| "ignore_label": 255, | |||
| "decoder_output_stride": None, | |||
| "seg_num_classes": 21 | |||
| }) | |||
| @@ -0,0 +1,457 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """DeepLabv3.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from .backbone.resnet_deeplab import _conv_bn_relu, resnet50_dl, _deep_conv_bn_relu, \ | |||
| DepthwiseConv2dNative, SpaceToBatch, BatchToSpace | |||
| class ASPPSampleBlock(nn.Cell): | |||
| """ASPP sample block.""" | |||
| def __init__(self, feature_shape, scale_size, output_stride): | |||
| super(ASPPSampleBlock, self).__init__() | |||
| sample_h = (feature_shape[0] * scale_size + 1) / output_stride + 1 | |||
| sample_w = (feature_shape[1] * scale_size + 1) / output_stride + 1 | |||
| self.sample = P.ResizeBilinear((int(sample_h), int(sample_w)), align_corners=True) | |||
| def construct(self, x): | |||
| return self.sample(x) | |||
| class ASPP(nn.Cell): | |||
| """ | |||
| ASPP model for DeepLabv3. | |||
| Args: | |||
| channel (int): Input channel. | |||
| depth (int): Output channel. | |||
| feature_shape (list): The shape of feature,[h,w]. | |||
| scale_sizes (list): Input scales for multi-scale feature extraction. | |||
| atrous_rates (list): Atrous rates for atrous spatial pyramid pooling. | |||
| output_stride (int): 'The ratio of input to output spatial resolution.' | |||
| fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not' | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ASPP(channel=2048,256,[14,14],[1],[6],16) | |||
| """ | |||
| def __init__(self, channel, depth, feature_shape, scale_sizes, | |||
| atrous_rates, output_stride, fine_tune_batch_norm=False): | |||
| super(ASPP, self).__init__() | |||
| self.aspp0 = _conv_bn_relu(channel, | |||
| depth, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.atrous_rates = [] | |||
| if atrous_rates is not None: | |||
| self.atrous_rates = atrous_rates | |||
| self.aspp_pointwise = _conv_bn_relu(channel, | |||
| depth, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.aspp_depth_depthwiseconv = DepthwiseConv2dNative(channel, | |||
| channel_multiplier=1, | |||
| kernel_size=3, | |||
| stride=1, | |||
| dilation=1, | |||
| pad_mode="valid") | |||
| self.aspp_depth_bn = nn.BatchNorm2d(1 * channel, use_batch_statistics=fine_tune_batch_norm) | |||
| self.aspp_depth_relu = nn.ReLU() | |||
| self.aspp_depths = [] | |||
| self.aspp_depth_spacetobatchs = [] | |||
| self.aspp_depth_batchtospaces = [] | |||
| for scale_size in scale_sizes: | |||
| aspp_scale_depth_size = np.ceil((feature_shape[0]*scale_size)/16) | |||
| if atrous_rates is None: | |||
| break | |||
| for rate in atrous_rates: | |||
| padding = 0 | |||
| for j in range(100): | |||
| padded_size = rate * j | |||
| if padded_size >= aspp_scale_depth_size + 2 * rate: | |||
| padding = padded_size - aspp_scale_depth_size - 2 * rate | |||
| break | |||
| paddings = [[rate, rate + int(padding)], | |||
| [rate, rate + int(padding)]] | |||
| self.aspp_depth_spacetobatch = SpaceToBatch(rate, paddings) | |||
| self.aspp_depth_spacetobatchs.append(self.aspp_depth_spacetobatch) | |||
| crops = [[0, int(padding)], [0, int(padding)]] | |||
| self.aspp_depth_batchtospace = BatchToSpace(rate, crops) | |||
| self.aspp_depth_batchtospaces.append(self.aspp_depth_batchtospace) | |||
| self.aspp_depths = nn.CellList(self.aspp_depths) | |||
| self.aspp_depth_spacetobatchs = nn.CellList(self.aspp_depth_spacetobatchs) | |||
| self.aspp_depth_batchtospaces = nn.CellList(self.aspp_depth_batchtospaces) | |||
| self.global_pooling = nn.AvgPool2d(kernel_size=(int(feature_shape[0]), int(feature_shape[1]))) | |||
| self.global_poolings = [] | |||
| for scale_size in scale_sizes: | |||
| pooling_h = np.ceil((feature_shape[0]*scale_size)/output_stride) | |||
| pooling_w = np.ceil((feature_shape[0]*scale_size)/output_stride) | |||
| self.global_poolings.append(nn.AvgPool2d(kernel_size=(int(pooling_h), int(pooling_w)))) | |||
| self.global_poolings = nn.CellList(self.global_poolings) | |||
| self.conv_bn = _conv_bn_relu(channel, | |||
| depth, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.samples = [] | |||
| for scale_size in scale_sizes: | |||
| self.samples.append(ASPPSampleBlock(feature_shape, scale_size, output_stride)) | |||
| self.samples = nn.CellList(self.samples) | |||
| self.feature_shape = feature_shape | |||
| self.concat = P.Concat(axis=1) | |||
| def construct(self, x, scale_index=0): | |||
| aspp0 = self.aspp0(x) | |||
| aspp1 = self.global_poolings[scale_index](x) | |||
| aspp1 = self.conv_bn(aspp1) | |||
| aspp1 = self.samples[scale_index](aspp1) | |||
| output = self.concat((aspp1, aspp0)) | |||
| for i in range(len(self.atrous_rates)): | |||
| aspp_i = self.aspp_depth_spacetobatchs[i + scale_index * len(self.atrous_rates)](x) | |||
| aspp_i = self.aspp_depth_depthwiseconv(aspp_i) | |||
| aspp_i = self.aspp_depth_batchtospaces[i + scale_index * len(self.atrous_rates)](aspp_i) | |||
| aspp_i = self.aspp_depth_bn(aspp_i) | |||
| aspp_i = self.aspp_depth_relu(aspp_i) | |||
| aspp_i = self.aspp_pointwise(aspp_i) | |||
| output = self.concat((output, aspp_i)) | |||
| return output | |||
| class DecoderSampleBlock(nn.Cell): | |||
| """Decoder sample block.""" | |||
| def __init__(self, feature_shape, scale_size=1.0, decoder_output_stride=4): | |||
| super(DecoderSampleBlock, self).__init__() | |||
| sample_h = (feature_shape[0] * scale_size + 1) / decoder_output_stride + 1 | |||
| sample_w = (feature_shape[1] * scale_size + 1) / decoder_output_stride + 1 | |||
| self.sample = P.ResizeBilinear((int(sample_h), int(sample_w)), align_corners=True) | |||
| def construct(self, x): | |||
| return self.sample(x) | |||
| class Decoder(nn.Cell): | |||
| """ | |||
| Decode module for DeepLabv3. | |||
| Args: | |||
| low_level_channel (int): Low level input channel | |||
| channel (int): Input channel. | |||
| depth (int): Output channel. | |||
| feature_shape (list): 'Input image shape, [N,C,H,W].' | |||
| scale_sizes (list): 'Input scales for multi-scale feature extraction.' | |||
| decoder_output_stride (int): 'The ratio of input to output spatial resolution' | |||
| fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not' | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> Decoder(256, 100, [56,56]) | |||
| """ | |||
| def __init__(self, | |||
| low_level_channel, | |||
| channel, | |||
| depth, | |||
| feature_shape, | |||
| scale_sizes, | |||
| decoder_output_stride, | |||
| fine_tune_batch_norm): | |||
| super(Decoder, self).__init__() | |||
| self.feature_projection = _conv_bn_relu(low_level_channel, 48, ksize=1, stride=1, | |||
| pad_mode="same", use_batch_statistics=fine_tune_batch_norm) | |||
| self.decoder_depth0 = _deep_conv_bn_relu(channel + 48, | |||
| channel_multiplier=1, | |||
| ksize=3, | |||
| stride=1, | |||
| pad_mode="same", | |||
| dilation=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.decoder_pointwise0 = _conv_bn_relu(channel + 48, | |||
| depth, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.decoder_depth1 = _deep_conv_bn_relu(depth, | |||
| channel_multiplier=1, | |||
| ksize=3, | |||
| stride=1, | |||
| pad_mode="same", | |||
| dilation=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.decoder_pointwise1 = _conv_bn_relu(depth, | |||
| depth, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.depth = depth | |||
| self.concat = P.Concat(axis=1) | |||
| self.samples = [] | |||
| for scale_size in scale_sizes: | |||
| self.samples.append(DecoderSampleBlock(feature_shape, scale_size, decoder_output_stride)) | |||
| self.samples = nn.CellList(self.samples) | |||
| def construct(self, x, low_level_feature, scale_index): | |||
| low_level_feature = self.feature_projection(low_level_feature) | |||
| low_level_feature = self.samples[scale_index](low_level_feature) | |||
| x = self.samples[scale_index](x) | |||
| output = self.concat((x, low_level_feature)) | |||
| output = self.decoder_depth0(output) | |||
| output = self.decoder_pointwise0(output) | |||
| output = self.decoder_depth1(output) | |||
| output = self.decoder_pointwise1(output) | |||
| return output | |||
| class SingleDeepLabV3(nn.Cell): | |||
| """ | |||
| DeepLabv3 Network. | |||
| Args: | |||
| num_classes (int): Class number. | |||
| feature_shape (list): Input image shape, [N,C,H,W]. | |||
| backbone (Cell): Backbone Network. | |||
| channel (int): Resnet output channel. | |||
| depth (int): ASPP block depth. | |||
| scale_sizes (list): Input scales for multi-scale feature extraction. | |||
| atrous_rates (list): Atrous rates for atrous spatial pyramid pooling. | |||
| decoder_output_stride (int): 'The ratio of input to output spatial resolution' | |||
| output_stride (int): 'The ratio of input to output spatial resolution.' | |||
| fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not' | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> SingleDeepLabV3(num_classes=10, | |||
| >>> feature_shape=[1,3,224,224], | |||
| >>> backbone=resnet50_dl(), | |||
| >>> channel=2048, | |||
| >>> depth=256) | |||
| >>> scale_sizes=[1.0]) | |||
| >>> atrous_rates=[6]) | |||
| >>> decoder_output_stride=4) | |||
| >>> output_stride=16) | |||
| """ | |||
| def __init__(self, | |||
| num_classes, | |||
| feature_shape, | |||
| backbone, | |||
| channel, | |||
| depth, | |||
| scale_sizes, | |||
| atrous_rates, | |||
| decoder_output_stride, | |||
| output_stride, | |||
| fine_tune_batch_norm=False): | |||
| super(SingleDeepLabV3, self).__init__() | |||
| self.num_classes = num_classes | |||
| self.channel = channel | |||
| self.depth = depth | |||
| self.scale_sizes = [] | |||
| for scale_size in np.sort(scale_sizes): | |||
| self.scale_sizes.append(scale_size) | |||
| self.net = backbone | |||
| self.aspp = ASPP(channel=self.channel, | |||
| depth=self.depth, | |||
| feature_shape=[feature_shape[2], | |||
| feature_shape[3]], | |||
| scale_sizes=self.scale_sizes, | |||
| atrous_rates=atrous_rates, | |||
| output_stride=output_stride, | |||
| fine_tune_batch_norm=fine_tune_batch_norm) | |||
| self.aspp.add_flags(loop_can_unroll=True) | |||
| atrous_rates_len = 0 | |||
| if atrous_rates is not None: | |||
| atrous_rates_len = len(atrous_rates) | |||
| self.fc1 = _conv_bn_relu(depth * (2 + atrous_rates_len), depth, | |||
| ksize=1, | |||
| stride=1, | |||
| use_batch_statistics=fine_tune_batch_norm) | |||
| self.fc2 = nn.Conv2d(depth, | |||
| num_classes, | |||
| kernel_size=1, | |||
| stride=1, | |||
| has_bias=True) | |||
| self.upsample = P.ResizeBilinear((int(feature_shape[2]), | |||
| int(feature_shape[3])), | |||
| align_corners=True) | |||
| self.samples = [] | |||
| for scale_size in self.scale_sizes: | |||
| self.samples.append(SampleBlock(feature_shape, scale_size)) | |||
| self.samples = nn.CellList(self.samples) | |||
| self.feature_shape = [float(feature_shape[0]), float(feature_shape[1]), float(feature_shape[2]), | |||
| float(feature_shape[3])] | |||
| self.pad = P.Pad(((0, 0), (0, 0), (1, 1), (1, 1))) | |||
| self.dropout = nn.Dropout(keep_prob=0.9) | |||
| self.shape = P.Shape() | |||
| self.decoder_output_stride = decoder_output_stride | |||
| if decoder_output_stride is not None: | |||
| self.decoder = Decoder(low_level_channel=depth, | |||
| channel=depth, | |||
| depth=depth, | |||
| feature_shape=[feature_shape[2], | |||
| feature_shape[3]], | |||
| scale_sizes=self.scale_sizes, | |||
| decoder_output_stride=decoder_output_stride, | |||
| fine_tune_batch_norm=fine_tune_batch_norm) | |||
| def construct(self, x, scale_index=0): | |||
| x = (2.0 / 255.0) * x - 1.0 | |||
| x = self.pad(x) | |||
| low_level_feature, feature_map = self.net(x) | |||
| for scale_size in self.scale_sizes: | |||
| if scale_size * self.feature_shape[2] + 1.0 >= self.shape(x)[2] - 2: | |||
| output = self.aspp(feature_map, scale_index) | |||
| output = self.fc1(output) | |||
| if self.decoder_output_stride is not None: | |||
| output = self.decoder(output, low_level_feature, scale_index) | |||
| output = self.fc2(output) | |||
| output = self.samples[scale_index](output) | |||
| return output | |||
| scale_index += 1 | |||
| return feature_map | |||
| class SampleBlock(nn.Cell): | |||
| """Sample block.""" | |||
| def __init__(self, | |||
| feature_shape, | |||
| scale_size=1.0): | |||
| super(SampleBlock, self).__init__() | |||
| sample_h = np.ceil(float(feature_shape[2]) * scale_size) | |||
| sample_w = np.ceil(float(feature_shape[3]) * scale_size) | |||
| self.sample = P.ResizeBilinear((int(sample_h), int(sample_w)), align_corners=True) | |||
| def construct(self, x): | |||
| return self.sample(x) | |||
| class DeepLabV3(nn.Cell): | |||
| """DeepLabV3 model.""" | |||
| def __init__(self, num_classes, feature_shape, backbone, channel, depth, infer_scale_sizes, atrous_rates, | |||
| decoder_output_stride, output_stride, fine_tune_batch_norm, image_pyramid): | |||
| super(DeepLabV3, self).__init__() | |||
| self.infer_scale_sizes = [] | |||
| if infer_scale_sizes is not None: | |||
| self.infer_scale_sizes = infer_scale_sizes | |||
| self.infer_scale_sizes = infer_scale_sizes | |||
| if image_pyramid is None: | |||
| image_pyramid = [1.0] | |||
| self.image_pyramid = image_pyramid | |||
| scale_sizes = [] | |||
| for pyramid in image_pyramid: | |||
| scale_sizes.append(pyramid) | |||
| for scale in infer_scale_sizes: | |||
| scale_sizes.append(scale) | |||
| self.samples = [] | |||
| for scale_size in scale_sizes: | |||
| self.samples.append(SampleBlock(feature_shape, scale_size)) | |||
| self.samples = nn.CellList(self.samples) | |||
| self.deeplabv3 = SingleDeepLabV3(num_classes=num_classes, | |||
| feature_shape=feature_shape, | |||
| backbone=resnet50_dl(fine_tune_batch_norm), | |||
| channel=channel, | |||
| depth=depth, | |||
| scale_sizes=scale_sizes, | |||
| atrous_rates=atrous_rates, | |||
| decoder_output_stride=decoder_output_stride, | |||
| output_stride=output_stride, | |||
| fine_tune_batch_norm=fine_tune_batch_norm) | |||
| self.softmax = P.Softmax(axis=1) | |||
| self.concat = P.Concat(axis=2) | |||
| self.expand_dims = P.ExpandDims() | |||
| self.reduce_mean = P.ReduceMean() | |||
| self.sample_common = P.ResizeBilinear((int(feature_shape[2]), | |||
| int(feature_shape[3])), | |||
| align_corners=True) | |||
| def construct(self, x): | |||
| logits = () | |||
| if self.training: | |||
| if len(self.image_pyramid) >= 1: | |||
| if self.image_pyramid[0] == 1: | |||
| logits = self.deeplabv3(x) | |||
| else: | |||
| x1 = self.samples[0](x) | |||
| logits = self.deeplabv3(x1) | |||
| logits = self.sample_common(logits) | |||
| logits = self.expand_dims(logits, 2) | |||
| for i in range(len(self.image_pyramid) - 1): | |||
| x_i = self.samples[i + 1](x) | |||
| logits_i = self.deeplabv3(x_i) | |||
| logits_i = self.sample_common(logits_i) | |||
| logits_i = self.expand_dims(logits_i, 2) | |||
| logits = self.concat((logits, logits_i)) | |||
| logits = self.reduce_mean(logits, 2) | |||
| return logits | |||
| if len(self.infer_scale_sizes) >= 1: | |||
| infer_index = len(self.image_pyramid) | |||
| x1 = self.samples[infer_index](x) | |||
| logits = self.deeplabv3(x1) | |||
| logits = self.sample_common(logits) | |||
| logits = self.softmax(logits) | |||
| logits = self.expand_dims(logits, 2) | |||
| for i in range(len(self.infer_scale_sizes) - 1): | |||
| x_i = self.samples[i + 1 + infer_index](x) | |||
| logits_i = self.deeplabv3(x_i) | |||
| logits_i = self.sample_common(logits_i) | |||
| logits_i = self.softmax(logits_i) | |||
| logits_i = self.expand_dims(logits_i, 2) | |||
| logits = self.concat((logits, logits_i)) | |||
| logits = self.reduce_mean(logits, 2) | |||
| return logits | |||
| def deeplabv3_resnet50(num_classes, feature_shape, image_pyramid, | |||
| infer_scale_sizes, atrous_rates=None, decoder_output_stride=None, | |||
| output_stride=16, fine_tune_batch_norm=False): | |||
| """ | |||
| ResNet50 based DeepLabv3 network. | |||
| Args: | |||
| num_classes (int): Class number. | |||
| feature_shape (list): Input image shape, [N,C,H,W]. | |||
| image_pyramid (list): Input scales for multi-scale feature extraction. | |||
| atrous_rates (list): Atrous rates for atrous spatial pyramid pooling. | |||
| infer_scale_sizes (list): 'The scales to resize images for inference. | |||
| decoder_output_stride (int): 'The ratio of input to output spatial resolution' | |||
| output_stride (int): 'The ratio of input to output spatial resolution.' | |||
| fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not' | |||
| Returns: | |||
| Cell, cell instance of ResNet50 based DeepLabv3 neural network. | |||
| Examples: | |||
| >>> deeplabv3_resnet50(100, [1,3,224,224],[1.0],[1.0]) | |||
| """ | |||
| return DeepLabV3(num_classes=num_classes, | |||
| feature_shape=feature_shape, | |||
| backbone=resnet50_dl(fine_tune_batch_norm), | |||
| channel=2048, | |||
| depth=256, | |||
| infer_scale_sizes=infer_scale_sizes, | |||
| atrous_rates=atrous_rates, | |||
| decoder_output_stride=decoder_output_stride, | |||
| output_stride=output_stride, | |||
| fine_tune_batch_norm=fine_tune_batch_norm, | |||
| image_pyramid=image_pyramid) | |||
| @@ -0,0 +1,84 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Process Dataset.""" | |||
| import abc | |||
| import os | |||
| import time | |||
| from .utils.adapter import get_raw_samples, read_image | |||
| class BaseDataset: | |||
| """ | |||
| Create dataset. | |||
| Args: | |||
| data_url (str): The path of data. | |||
| usage (str): Whether to use train or eval (default='train'). | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| def __init__(self, data_url, usage): | |||
| self.data_url = data_url | |||
| self.usage = usage | |||
| self.cur_index = 0 | |||
| self.samples = [] | |||
| _s_time = time.time() | |||
| self._load_samples() | |||
| _e_time = time.time() | |||
| print(f"load samples success~, time cost = {_e_time - _s_time}") | |||
| def __getitem__(self, item): | |||
| sample = self.samples[item] | |||
| return self._next_data(sample) | |||
| def __len__(self): | |||
| return len(self.samples) | |||
| @staticmethod | |||
| def _next_data(sample): | |||
| image_path = sample[0] | |||
| mask_image_path = sample[1] | |||
| image = read_image(image_path) | |||
| mask_image = read_image(mask_image_path) | |||
| return [image, mask_image] | |||
| @abc.abstractmethod | |||
| def _load_samples(self): | |||
| pass | |||
| class HwVocRawDataset(BaseDataset): | |||
| """ | |||
| Create dataset with raw data. | |||
| Args: | |||
| data_url (str): The path of data. | |||
| usage (str): Whether to use train or eval (default='train'). | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| def __init__(self, data_url, usage="train"): | |||
| super().__init__(data_url, usage) | |||
| def _load_samples(self): | |||
| try: | |||
| self.samples = get_raw_samples(os.path.join(self.data_url, self.usage)) | |||
| except Exception as e: | |||
| print("load HwVocRawDataset failed!!!") | |||
| raise e | |||
| @@ -0,0 +1,63 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """OhemLoss.""" | |||
| import mindspore.nn as nn | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops import functional as F | |||
| class OhemLoss(nn.Cell): | |||
| """Ohem loss cell.""" | |||
| def __init__(self, num, ignore_label): | |||
| super(OhemLoss, self).__init__() | |||
| self.mul = P.Mul() | |||
| self.shape = P.Shape() | |||
| self.one_hot = nn.OneHot(-1, num, 1.0, 0.0) | |||
| self.squeeze = P.Squeeze() | |||
| self.num = num | |||
| self.cross_entropy = P.SoftmaxCrossEntropyWithLogits() | |||
| self.mean = P.ReduceMean() | |||
| self.select = P.Select() | |||
| self.reshape = P.Reshape() | |||
| self.cast = P.Cast() | |||
| self.not_equal = P.NotEqual() | |||
| self.equal = P.Equal() | |||
| self.reduce_sum = P.ReduceSum(keep_dims=False) | |||
| self.fill = P.Fill() | |||
| self.transpose = P.Transpose() | |||
| self.ignore_label = ignore_label | |||
| self.loss_weight = 1.0 | |||
| def construct(self, logits, labels): | |||
| logits = self.transpose(logits, (0, 2, 3, 1)) | |||
| logits = self.reshape(logits, (-1, self.num)) | |||
| labels = F.cast(labels, mstype.int32) | |||
| labels = self.reshape(labels, (-1,)) | |||
| one_hot_labels = self.one_hot(labels) | |||
| losses = self.cross_entropy(logits, one_hot_labels)[0] | |||
| weights = self.cast(self.not_equal(labels, self.ignore_label), mstype.float32) * self.loss_weight | |||
| weighted_losses = self.mul(losses, weights) | |||
| loss = self.reduce_sum(weighted_losses, (0,)) | |||
| zeros = self.fill(mstype.float32, self.shape(weights), 0.0) | |||
| ones = self.fill(mstype.float32, self.shape(weights), 1.0) | |||
| present = self.select(self.equal(weights, zeros), zeros, ones) | |||
| present = self.reduce_sum(present, (0,)) | |||
| zeros = self.fill(mstype.float32, self.shape(present), 0.0) | |||
| min_control = self.fill(mstype.float32, self.shape(present), 1.0) | |||
| present = self.select(self.equal(present, zeros), min_control, present) | |||
| loss = loss / present | |||
| return loss | |||
| @@ -0,0 +1,115 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Dataset module.""" | |||
| from PIL import Image | |||
| import mindspore.dataset as de | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| from .ei_dataset import HwVocRawDataset | |||
| from .utils import custom_transforms as tr | |||
| class DataTransform: | |||
| """Transform dataset for DeepLabV3.""" | |||
| def __init__(self, args, usage): | |||
| self.args = args | |||
| self.usage = usage | |||
| def __call__(self, image, label): | |||
| if self.usage == "train": | |||
| return self._train(image, label) | |||
| if self.usage == "eval": | |||
| return self._eval(image, label) | |||
| return None | |||
| def _train(self, image, label): | |||
| """ | |||
| Process training data. | |||
| Args: | |||
| image (list): Image data. | |||
| label (list): Dataset label. | |||
| """ | |||
| image = Image.fromarray(image) | |||
| label = Image.fromarray(label) | |||
| rsc_tr = tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size) | |||
| image, label = rsc_tr(image, label) | |||
| rhf_tr = tr.RandomHorizontalFlip() | |||
| image, label = rhf_tr(image, label) | |||
| nor_tr = tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) | |||
| image, label = nor_tr(image, label) | |||
| return image, label | |||
| def _eval(self, image, label): | |||
| """ | |||
| Process eval data. | |||
| Args: | |||
| image (list): Image data. | |||
| label (list): Dataset label. | |||
| """ | |||
| image = Image.fromarray(image) | |||
| label = Image.fromarray(label) | |||
| fsc_tr = tr.FixScaleCrop(crop_size=self.args.crop_size) | |||
| image, label = fsc_tr(image, label) | |||
| nor_tr = tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) | |||
| image, label = nor_tr(image, label) | |||
| return image, label | |||
| def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train"): | |||
| """ | |||
| Create Dataset for DeepLabV3. | |||
| Args: | |||
| args (dict): Train parameters. | |||
| data_url (str): Dataset path. | |||
| epoch_num (int): Epoch of dataset (default=1). | |||
| batch_size (int): Batch size of dataset (default=1). | |||
| usage (str): Whether is use to train or eval (default='train'). | |||
| Returns: | |||
| Dataset. | |||
| """ | |||
| # create iter dataset | |||
| dataset = HwVocRawDataset(data_url, usage=usage) | |||
| dataset_len = len(dataset) | |||
| # wrapped with GeneratorDataset | |||
| dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None) | |||
| dataset.set_dataset_size(dataset_len) | |||
| dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage)) | |||
| channelswap_op = C.HWC2CHW() | |||
| dataset = dataset.map(input_columns="image", operations=channelswap_op) | |||
| # 1464 samples / batch_size 8 = 183 batches | |||
| # epoch_num is num of steps | |||
| # 3658 steps / 183 = 20 epochs | |||
| if usage == "train": | |||
| dataset = dataset.shuffle(1464) | |||
| dataset = dataset.batch(batch_size, drop_remainder=(usage == "train")) | |||
| dataset = dataset.repeat(count=epoch_num) | |||
| dataset.map_model = 4 | |||
| return dataset | |||
| @@ -0,0 +1,72 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """mIou.""" | |||
| import numpy as np | |||
| from mindspore.nn.metrics.metric import Metric | |||
| def confuse_matrix(target, pred, n): | |||
| k = (target >= 0) & (target < n) | |||
| return np.bincount(n * target[k].astype(int) + pred[k], minlength=n ** 2).reshape(n, n) | |||
| def iou(hist): | |||
| denominator = hist.sum(1) + hist.sum(0) - np.diag(hist) | |||
| res = np.diag(hist) / np.where(denominator > 0, denominator, 1) | |||
| res = np.sum(res) / np.count_nonzero(denominator) | |||
| return res | |||
| class MiouPrecision(Metric): | |||
| """Calculate miou precision.""" | |||
| def __init__(self, num_class=21): | |||
| super(MiouPrecision, self).__init__() | |||
| if not isinstance(num_class, int): | |||
| raise TypeError('num_class should be integer type, but got {}'.format(type(num_class))) | |||
| if num_class < 1: | |||
| raise ValueError('num_class must be at least 1, but got {}'.format(num_class)) | |||
| self._num_class = num_class | |||
| self._mIoU = [] | |||
| self.clear() | |||
| def clear(self): | |||
| self._hist = np.zeros((self._num_class, self._num_class)) | |||
| self._mIoU = [] | |||
| def update(self, *inputs): | |||
| if len(inputs) != 2: | |||
| raise ValueError('Need 2 inputs (y_pred, y), but got {}'.format(len(inputs))) | |||
| predict_in = self._convert_data(inputs[0]) | |||
| label_in = self._convert_data(inputs[1]) | |||
| if predict_in.shape[1] != self._num_class: | |||
| raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} ' | |||
| 'classes'.format(self._num_class, predict_in.shape[1])) | |||
| pred = np.argmax(predict_in, axis=1) | |||
| label = label_in | |||
| if len(label.flatten()) != len(pred.flatten()): | |||
| print('Skipping: len(gt) = {:d}, len(pred) = {:d}'.format(len(label.flatten()), len(pred.flatten()))) | |||
| raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} ' | |||
| 'classes'.format(self._num_class, predict_in.shape[1])) | |||
| self._hist = confuse_matrix(label.flatten(), pred.flatten(), self._num_class) | |||
| mIoUs = iou(self._hist) | |||
| self._mIoU.append(mIoUs) | |||
| def eval(self): | |||
| """ | |||
| Computes the mIoU categorical accuracy. | |||
| """ | |||
| mIoU = np.nanmean(self._mIoU) | |||
| print('mIoU = {}'.format(mIoU)) | |||
| return mIoU | |||
| @@ -0,0 +1,14 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| @@ -0,0 +1,67 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Adapter dataset.""" | |||
| import fnmatch | |||
| import io | |||
| import os | |||
| import numpy as np | |||
| from PIL import Image | |||
| from ..utils import file_io | |||
| def get_raw_samples(data_url): | |||
| """ | |||
| Get dataset from raw data. | |||
| Args: | |||
| data_url (str): Dataset path. | |||
| Returns: | |||
| list, a file list. | |||
| """ | |||
| def _list_files(dir_path, pattern): | |||
| full_files = [] | |||
| _, _, files = next(file_io.walk(dir_path)) | |||
| for f in files: | |||
| if fnmatch.fnmatch(f.lower(), pattern.lower()): | |||
| full_files.append(os.path.join(dir_path, f)) | |||
| return full_files | |||
| img_files = _list_files(os.path.join(data_url, "Images"), "*.jpg") | |||
| seg_files = _list_files(os.path.join(data_url, "SegmentationClassRaw"), "*.png") | |||
| files = [] | |||
| for img_file in img_files: | |||
| _, file_name = os.path.split(img_file) | |||
| name, _ = os.path.splitext(file_name) | |||
| seg_file = os.path.join(data_url, "SegmentationClassRaw", ".".join([name, "png"])) | |||
| if seg_file in seg_files: | |||
| files.append([img_file, seg_file]) | |||
| return files | |||
| def read_image(img_path): | |||
| """ | |||
| Read image from file. | |||
| Args: | |||
| img_path (str): image path. | |||
| """ | |||
| img = file_io.read(img_path.strip(), binary=True) | |||
| data = io.BytesIO(img) | |||
| img = Image.open(data) | |||
| return np.array(img) | |||
| @@ -0,0 +1,148 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the License); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # httpwww.apache.orglicensesLICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an AS IS BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Random process dataset.""" | |||
| import random | |||
| import numpy as np | |||
| from PIL import Image, ImageOps, ImageFilter | |||
| class Normalize: | |||
| """Normalize a tensor image with mean and standard deviation. | |||
| Args: | |||
| mean (tuple): means for each channel. | |||
| std (tuple): standard deviations for each channel. | |||
| """ | |||
| def __init__(self, mean=(0., 0., 0.), std=(1., 1., 1.)): | |||
| self.mean = mean | |||
| self.std = std | |||
| def __call__(self, img, mask): | |||
| img = np.array(img).astype(np.float32) | |||
| mask = np.array(mask).astype(np.float32) | |||
| return img, mask | |||
| class RandomHorizontalFlip: | |||
| """Randomly decide whether to horizontal flip.""" | |||
| def __call__(self, img, mask): | |||
| if random.random() < 0.5: | |||
| img = img.transpose(Image.FLIP_LEFT_RIGHT) | |||
| mask = mask.transpose(Image.FLIP_LEFT_RIGHT) | |||
| return img, mask | |||
| class RandomRotate: | |||
| """ | |||
| Randomly decide whether to rotate. | |||
| Args: | |||
| degree (float): The degree of rotate. | |||
| """ | |||
| def __init__(self, degree): | |||
| self.degree = degree | |||
| def __call__(self, img, mask): | |||
| rotate_degree = random.uniform(-1 * self.degree, self.degree) | |||
| img = img.rotate(rotate_degree, Image.BILINEAR) | |||
| mask = mask.rotate(rotate_degree, Image.NEAREST) | |||
| return img, mask | |||
| class RandomGaussianBlur: | |||
| """Randomly decide whether to filter image with gaussian blur.""" | |||
| def __call__(self, img, mask): | |||
| if random.random() < 0.5: | |||
| img = img.filter(ImageFilter.GaussianBlur( | |||
| radius=random.random())) | |||
| return img, mask | |||
| class RandomScaleCrop: | |||
| """Randomly decide whether to scale and crop image.""" | |||
| def __init__(self, base_size, crop_size, fill=0): | |||
| self.base_size = base_size | |||
| self.crop_size = crop_size | |||
| self.fill = fill | |||
| def __call__(self, img, mask): | |||
| # random scale (short edge) | |||
| short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0)) | |||
| w, h = img.size | |||
| if h > w: | |||
| ow = short_size | |||
| oh = int(1.0 * h * ow / w) | |||
| else: | |||
| oh = short_size | |||
| ow = int(1.0 * w * oh / h) | |||
| img = img.resize((ow, oh), Image.BILINEAR) | |||
| mask = mask.resize((ow, oh), Image.NEAREST) | |||
| # pad crop | |||
| if short_size < self.crop_size: | |||
| padh = self.crop_size - oh if oh < self.crop_size else 0 | |||
| padw = self.crop_size - ow if ow < self.crop_size else 0 | |||
| img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0) | |||
| mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=self.fill) | |||
| # random crop crop_size | |||
| w, h = img.size | |||
| x1 = random.randint(0, w - self.crop_size) | |||
| y1 = random.randint(0, h - self.crop_size) | |||
| img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) | |||
| mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) | |||
| return img, mask | |||
| class FixScaleCrop: | |||
| """Scale and crop image with fixing size.""" | |||
| def __init__(self, crop_size): | |||
| self.crop_size = crop_size | |||
| def __call__(self, img, mask): | |||
| w, h = img.size | |||
| if w > h: | |||
| oh = self.crop_size | |||
| ow = int(1.0 * w * oh / h) | |||
| else: | |||
| ow = self.crop_size | |||
| oh = int(1.0 * h * ow / w) | |||
| img = img.resize((ow, oh), Image.BILINEAR) | |||
| mask = mask.resize((ow, oh), Image.NEAREST) | |||
| # center crop | |||
| w, h = img.size | |||
| x1 = int(round((w - self.crop_size) / 2.)) | |||
| y1 = int(round((h - self.crop_size) / 2.)) | |||
| img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) | |||
| mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size)) | |||
| return img, mask | |||
| class FixedResize: | |||
| """Resize image with fixing size.""" | |||
| def __init__(self, size): | |||
| self.size = (size, size) | |||
| def __call__(self, img, mask): | |||
| assert img.size == mask.size | |||
| img = img.resize(self.size, Image.BILINEAR) | |||
| mask = mask.resize(self.size, Image.NEAREST) | |||
| return img, mask | |||
| @@ -0,0 +1,36 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """File operation module.""" | |||
| import os | |||
| def _is_obs(url): | |||
| return url.startswith("obs://") or url.startswith("s3://") | |||
| def read(url, binary=False): | |||
| if _is_obs(url): | |||
| # TODO read cloud file. | |||
| return None | |||
| with open(url, "rb" if binary else "r") as f: | |||
| return f.read() | |||
| def walk(url): | |||
| if _is_obs(url): | |||
| # TODO read cloud file. | |||
| return None | |||
| return os.walk(url) | |||
| @@ -0,0 +1,92 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train.""" | |||
| import argparse | |||
| from mindspore import context | |||
| from mindspore.communication.management import init | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore import Model, ParallelMode | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.train.callback import Callback, CheckpointConfig, ModelCheckpoint, TimeMonitor | |||
| from src.md_dataset import create_dataset | |||
| from src.losses import OhemLoss | |||
| from src.deeplabv3 import deeplabv3_resnet50 | |||
| from src.config import config | |||
| parser = argparse.ArgumentParser(description="Deeplabv3 training") | |||
| parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.") | |||
| parser.add_argument('--epoch_size', type=int, default=6, help='Epoch size.') | |||
| parser.add_argument('--batch_size', type=int, default=2, help='Batch size.') | |||
| parser.add_argument('--data_url', required=True, default=None, help='Train data url') | |||
| parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") | |||
| parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path') | |||
| parser.add_argument("--enable_save_ckpt", type=str, default="true", help="Enable save checkpoint, default is true.") | |||
| parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, default is 1000.") | |||
| parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.") | |||
| args_opt = parser.parse_args() | |||
| print(args_opt) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) | |||
| class LossCallBack(Callback): | |||
| """ | |||
| Monitor the loss in training. | |||
| Note: | |||
| if per_print_times is 0 do not print loss. | |||
| Args: | |||
| per_print_times (int): Print loss every times. Default: 1. | |||
| """ | |||
| def __init__(self, per_print_times=1): | |||
| super(LossCallBack, self).__init__() | |||
| if not isinstance(per_print_times, int) or per_print_times < 0: | |||
| raise ValueError("print_step must be int and >= 0") | |||
| self._per_print_times = per_print_times | |||
| def step_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num, | |||
| str(cb_params.net_outputs))) | |||
| def model_fine_tune(flags, train_net, fix_weight_layer): | |||
| checkpoint_path = flags.checkpoint_url | |||
| if checkpoint_path is None: | |||
| return | |||
| param_dict = load_checkpoint(checkpoint_path) | |||
| load_param_into_net(train_net, param_dict) | |||
| for para in train_net.trainable_params(): | |||
| if fix_weight_layer in para.name: | |||
| para.requires_grad = False | |||
| if __name__ == "__main__": | |||
| if args_opt.distribute == "true": | |||
| context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) | |||
| init() | |||
| args_opt.base_size = config.crop_size | |||
| args_opt.crop_size = config.crop_size | |||
| train_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="train") | |||
| dataset_size = train_dataset.get_dataset_size() | |||
| time_cb = TimeMonitor(data_size=dataset_size) | |||
| callback = [time_cb, LossCallBack()] | |||
| if args_opt.enable_save_ckpt == "true": | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps, | |||
| keep_checkpoint_max=args_opt.save_checkpoint_num) | |||
| ckpoint_cb = ModelCheckpoint(prefix='checkpoint_deeplabv3', config=config_ck) | |||
| callback.append(ckpoint_cb) | |||
| net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size], | |||
| infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates, | |||
| decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride, | |||
| fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid) | |||
| net.set_train() | |||
| model_fine_tune(args_opt, net, 'layer') | |||
| loss = OhemLoss(config.seg_num_classes, config.ignore_label) | |||
| opt = Momentum(filter(lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'depth' not in x.name and 'bias' not in x.name, net.trainable_params()), learning_rate=config.learning_rate, momentum=config.momentum, weight_decay=config.weight_decay) | |||
| model = Model(net, loss, opt) | |||
| model.train(args_opt.epoch_size, train_dataset, callback) | |||
| @@ -0,0 +1,142 @@ | |||
| # FasterRcnn Example | |||
| ## Description | |||
| FasterRcnn is a two-stage target detection network,This network uses a region proposal network (RPN), which can share the convolution features of the whole image with the detection network, so that the calculation of region proposal is almost cost free. The whole network further combines RPN and FastRcnn into a network by sharing the convolution features. | |||
| ## Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the dataset COCO2017. | |||
| - We use coco2017 as training dataset in this example by default, and you can also use your own datasets. | |||
| 1. If coco dataset is used. **Select dataset to coco when run script.** | |||
| Install Cython and pycocotool, and you can also install mmcv to process data. | |||
| ``` | |||
| pip install Cython | |||
| pip install pycocotools | |||
| pip install mmcv | |||
| ``` | |||
| And change the COCO_ROOT and other settings you need in `config.py`. The directory structure is as follows: | |||
| ``` | |||
| . | |||
| └─cocodataset | |||
| ├─annotations | |||
| ├─instance_train2017.json | |||
| └─instance_val2017.json | |||
| ├─val2017 | |||
| └─train2017 | |||
| ``` | |||
| 2. If your own dataset is used. **Select dataset to other when run script.** | |||
| Organize the dataset infomation into a TXT file, each row in the file is as follows: | |||
| ``` | |||
| train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 | |||
| ``` | |||
| Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), `IMAGE_DIR` and `ANNO_PATH` are setting in `config.py`. | |||
| ## Example structure | |||
| ```shell | |||
| . | |||
| └─FasterRcnn | |||
| ├─README.md | |||
| ├─scripts | |||
| ├─run_download_process_data.sh | |||
| ├─run_standalone_train.sh | |||
| ├─run_train.sh | |||
| └─run_eval.sh | |||
| ├─src | |||
| ├─FasterRcnn | |||
| ├─__init__.py | |||
| ├─anchor_generator.py | |||
| ├─bbox_assign_sample.py | |||
| ├─bbox_assign_sample_stage2.py | |||
| ├─faster_rcnn_r50.py | |||
| ├─fpn_neck.py | |||
| ├─proposal_generator.py | |||
| ├─rcnn.py | |||
| ├─resnet50.py | |||
| ├─roi_align.py | |||
| └─rpn.py | |||
| ├─config.py | |||
| ├─dataset.py | |||
| ├─lr_schedule.py | |||
| ├─network_define.py | |||
| └─util.py | |||
| ├─eval.py | |||
| └─train.py | |||
| ``` | |||
| ## Running the example | |||
| ### Train | |||
| #### Usage | |||
| ``` | |||
| # distributed training | |||
| sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_MODEL] | |||
| # standalone training | |||
| sh run_standalone_train.sh [PRETRAINED_MODEL] | |||
| ``` | |||
| > About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). | |||
| #### Result | |||
| Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in loss.log. | |||
| ``` | |||
| # distribute training result(8p) | |||
| epoch: 1 step: 7393, rpn_loss: 0.12054, rcnn_loss: 0.40601, rpn_cls_loss: 0.04025, rpn_reg_loss: 0.08032, rcnn_cls_loss: 0.25854, rcnn_reg_loss: 0.14746, total_loss: 0.52655 | |||
| epoch: 2 step: 7393, rpn_loss: 0.06561, rcnn_loss: 0.50293, rpn_cls_loss: 0.02587, rpn_reg_loss: 0.03967, rcnn_cls_loss: 0.35669, rcnn_reg_loss: 0.14624, total_loss: 0.56854 | |||
| epoch: 3 step: 7393, rpn_loss: 0.06940, rcnn_loss: 0.49658, rpn_cls_loss: 0.03769, rpn_reg_loss: 0.03165, rcnn_cls_loss: 0.36353, rcnn_reg_loss: 0.13318, total_loss: 0.56598 | |||
| ... | |||
| epoch: 10 step: 7393, rpn_loss: 0.03555, rcnn_loss: 0.32666, rpn_cls_loss: 0.00697, rpn_reg_loss: 0.02859, rcnn_cls_loss: 0.16125, rcnn_reg_loss: 0.16541, total_loss: 0.36221 | |||
| epoch: 11 step: 7393, rpn_loss: 0.19849, rcnn_loss: 0.47827, rpn_cls_loss: 0.11639, rpn_reg_loss: 0.08209, rcnn_cls_loss: 0.29712, rcnn_reg_loss: 0.18115, total_loss: 0.67676 | |||
| epoch: 12 step: 7393, rpn_loss: 0.00691, rcnn_loss: 0.10168, rpn_cls_loss: 0.00529, rpn_reg_loss: 0.00162, rcnn_cls_loss: 0.05426, rcnn_reg_loss: 0.04745, total_loss: 0.10859 | |||
| ``` | |||
| ### Infer | |||
| #### Usage | |||
| ``` | |||
| # infer | |||
| sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH] | |||
| ``` | |||
| > checkpoint can be produced in training process. | |||
| #### Result | |||
| Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log. | |||
| ``` | |||
| Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.360 | |||
| Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.586 | |||
| Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.385 | |||
| Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.229 | |||
| Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.402 | |||
| Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.441 | |||
| Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.299 | |||
| Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.487 | |||
| Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.515 | |||
| Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.346 | |||
| Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.562 | |||
| Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.631 | |||
| ``` | |||
| @@ -0,0 +1,130 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # less required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Evaluation for FasterRcnn""" | |||
| import os | |||
| import argparse | |||
| import time | |||
| import random | |||
| import numpy as np | |||
| from pycocotools.coco import COCO | |||
| from mindspore import context, Tensor | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| import mindspore.dataset.engine as de | |||
| from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50 | |||
| from src.config import config | |||
| from src.dataset import data_to_mindrecord_byte_image, create_fasterrcnn_dataset | |||
| from src.util import coco_eval, bbox2result_1image, results2json | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| parser = argparse.ArgumentParser(description="FasterRcnn evaluation") | |||
| parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.") | |||
| parser.add_argument("--ann_file", type=str, default="val.json", help="Ann file, default is val.json.") | |||
| parser.add_argument("--checkpoint_path", type=str, required=True, help="Checkpoint file path.") | |||
| parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") | |||
| args_opt = parser.parse_args() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id) | |||
| def FasterRcnn_eval(dataset_path, ckpt_path, ann_file): | |||
| """FasterRcnn evaluation.""" | |||
| ds = create_fasterrcnn_dataset(dataset_path, batch_size=config.test_batch_size, | |||
| repeat_num=1, is_training=False) | |||
| net = Faster_Rcnn_Resnet50(config) | |||
| param_dict = load_checkpoint(ckpt_path) | |||
| load_param_into_net(net, param_dict) | |||
| net.set_train(False) | |||
| eval_iter = 0 | |||
| total = ds.get_dataset_size() | |||
| outputs = [] | |||
| dataset_coco = COCO(ann_file) | |||
| print("\n========================================\n") | |||
| print("total images num: ", total) | |||
| print("Processing, please wait a moment.") | |||
| max_num = 128 | |||
| for data in ds.create_dict_iterator(): | |||
| eval_iter = eval_iter + 1 | |||
| img_data = data['image'] | |||
| img_metas = data['image_shape'] | |||
| gt_bboxes = data['box'] | |||
| gt_labels = data['label'] | |||
| gt_num = data['valid_num'] | |||
| start = time.time() | |||
| # run net | |||
| output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num)) | |||
| end = time.time() | |||
| print("Iter {} cost time {}".format(eval_iter, end - start)) | |||
| # output | |||
| all_bbox = output[0] | |||
| all_label = output[1] | |||
| all_mask = output[2] | |||
| for j in range(config.test_batch_size): | |||
| all_bbox_squee = np.squeeze(all_bbox.asnumpy()[j, :, :]) | |||
| all_label_squee = np.squeeze(all_label.asnumpy()[j, :, :]) | |||
| all_mask_squee = np.squeeze(all_mask.asnumpy()[j, :, :]) | |||
| all_bboxes_tmp_mask = all_bbox_squee[all_mask_squee, :] | |||
| all_labels_tmp_mask = all_label_squee[all_mask_squee] | |||
| if all_bboxes_tmp_mask.shape[0] > max_num: | |||
| inds = np.argsort(-all_bboxes_tmp_mask[:, -1]) | |||
| inds = inds[:max_num] | |||
| all_bboxes_tmp_mask = all_bboxes_tmp_mask[inds] | |||
| all_labels_tmp_mask = all_labels_tmp_mask[inds] | |||
| outputs_tmp = bbox2result_1image(all_bboxes_tmp_mask, all_labels_tmp_mask, config.num_classes) | |||
| outputs.append(outputs_tmp) | |||
| eval_types = ["bbox"] | |||
| result_files = results2json(dataset_coco, outputs, "./results.pkl") | |||
| coco_eval(result_files, eval_types, dataset_coco, single_result=True) | |||
| if __name__ == '__main__': | |||
| prefix = "FasterRcnn_eval.mindrecord" | |||
| mindrecord_dir = config.mindrecord_dir | |||
| mindrecord_file = os.path.join(mindrecord_dir, prefix) | |||
| if not os.path.exists(mindrecord_file): | |||
| if not os.path.isdir(mindrecord_dir): | |||
| os.makedirs(mindrecord_dir) | |||
| if args_opt.dataset == "coco": | |||
| if os.path.isdir(config.coco_root): | |||
| print("Create Mindrecord.") | |||
| data_to_mindrecord_byte_image("coco", False, prefix, file_num=1) | |||
| print("Create Mindrecord Done, at {}".format(mindrecord_dir)) | |||
| else: | |||
| print("coco_root not exits.") | |||
| else: | |||
| if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): | |||
| print("Create Mindrecord.") | |||
| data_to_mindrecord_byte_image("other", False, prefix, file_num=1) | |||
| print("Create Mindrecord Done, at {}".format(mindrecord_dir)) | |||
| else: | |||
| print("IMAGE_DIR or ANNO_PATH not exits.") | |||
| print("Start Eval!") | |||
| FasterRcnn_eval(mindrecord_file, args_opt.checkpoint_path, args_opt.ann_file) | |||
| @@ -0,0 +1,69 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_PATH]" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| PATH2=$(get_real_path $2) | |||
| echo $PATH1 | |||
| echo $PATH2 | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ ! -f $PATH2 ] | |||
| then | |||
| echo "error: PRETRAINED_PATH=$PATH2 is not a file" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=8 | |||
| export RANK_SIZE=8 | |||
| export MINDSPORE_HCCL_CONFIG_PATH=$PATH1 | |||
| export RANK_TABLE_FILE=$PATH1 | |||
| for((i=0; i<${DEVICE_NUM}; i++)) | |||
| do | |||
| export DEVICE_ID=$i | |||
| export RANK_ID=$i | |||
| rm -rf ./train_parallel$i | |||
| mkdir ./train_parallel$i | |||
| cp ../*.py ./train_parallel$i | |||
| cp *.sh ./train_parallel$i | |||
| cp -r ../src ./train_parallel$i | |||
| cd ./train_parallel$i || exit | |||
| echo "start training for rank $RANK_ID, device $DEVICE_ID" | |||
| env > env.log | |||
| python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \ | |||
| --pre_trained=$PATH2 &> log & | |||
| cd .. | |||
| done | |||
| @@ -0,0 +1,65 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 2 ] | |||
| then | |||
| echo "Usage: sh run_eval.sh [ANN_FILE] [CHECKPOINT_PATH]" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| PATH2=$(get_real_path $2) | |||
| echo $PATH1 | |||
| echo $PATH2 | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| echo "error: ANN_FILE=$PATH1 is not a file" | |||
| exit 1 | |||
| fi | |||
| if [ ! -f $PATH2 ] | |||
| then | |||
| echo "error: CHECKPOINT_PATH=$PATH2 is not a file" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=1 | |||
| export RANK_SIZE=$DEVICE_NUM | |||
| export DEVICE_ID=0 | |||
| export RANK_ID=0 | |||
| if [ -d "eval" ]; | |||
| then | |||
| rm -rf ./eval | |||
| fi | |||
| mkdir ./eval | |||
| cp ../*.py ./eval | |||
| cp *.sh ./eval | |||
| cp -r ../src ./eval | |||
| cd ./eval || exit | |||
| env > env.log | |||
| echo "start eval for device $DEVICE_ID" | |||
| python eval.py --device_id=$DEVICE_ID --ann_file=$PATH1 --checkpoint_path=$PATH2 &> log & | |||
| cd .. | |||
| @@ -0,0 +1,57 @@ | |||
| #!/bin/bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 1 ] | |||
| then | |||
| echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH]" | |||
| exit 1 | |||
| fi | |||
| get_real_path(){ | |||
| if [ "${1:0:1}" == "/" ]; then | |||
| echo "$1" | |||
| else | |||
| echo "$(realpath -m $PWD/$1)" | |||
| fi | |||
| } | |||
| PATH1=$(get_real_path $1) | |||
| echo $PATH1 | |||
| if [ ! -f $PATH1 ] | |||
| then | |||
| echo "error: PRETRAINED_PATH=$PATH1 is not a file" | |||
| exit 1 | |||
| fi | |||
| ulimit -u unlimited | |||
| export DEVICE_NUM=1 | |||
| export DEVICE_ID=0 | |||
| export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| if [ -d "train" ]; | |||
| then | |||
| rm -rf ./train | |||
| fi | |||
| mkdir ./train | |||
| cp ../*.py ./train | |||
| cp *.sh ./train | |||
| cp -r ../src ./train | |||
| cd ./train || exit | |||
| echo "start training for device $DEVICE_ID" | |||
| env > env.log | |||
| python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log & | |||
| cd .. | |||
| @@ -0,0 +1,31 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn Init.""" | |||
| from .resnet50 import ResNetFea, ResidualBlockUsing | |||
| from .bbox_assign_sample import BboxAssignSample | |||
| from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn | |||
| from .fpn_neck import FeatPyramidNeck | |||
| from .proposal_generator import Proposal | |||
| from .rcnn import Rcnn | |||
| from .rpn import RPN | |||
| from .roi_align import SingleRoIExtractor | |||
| from .anchor_generator import AnchorGenerator | |||
| __all__ = [ | |||
| "ResNetFea", "BboxAssignSample", "BboxAssignSampleForRcnn", | |||
| "FeatPyramidNeck", "Proposal", "Rcnn", | |||
| "RPN", "SingleRoIExtractor", "AnchorGenerator", "ResidualBlockUsing" | |||
| ] | |||
| @@ -0,0 +1,84 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn anchor generator.""" | |||
| import numpy as np | |||
| class AnchorGenerator(): | |||
| """Anchor generator for FasterRcnn.""" | |||
| def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): | |||
| """Anchor generator init method.""" | |||
| self.base_size = base_size | |||
| self.scales = np.array(scales) | |||
| self.ratios = np.array(ratios) | |||
| self.scale_major = scale_major | |||
| self.ctr = ctr | |||
| self.base_anchors = self.gen_base_anchors() | |||
| def gen_base_anchors(self): | |||
| """Generate a single anchor.""" | |||
| w = self.base_size | |||
| h = self.base_size | |||
| if self.ctr is None: | |||
| x_ctr = 0.5 * (w - 1) | |||
| y_ctr = 0.5 * (h - 1) | |||
| else: | |||
| x_ctr, y_ctr = self.ctr | |||
| h_ratios = np.sqrt(self.ratios) | |||
| w_ratios = 1 / h_ratios | |||
| if self.scale_major: | |||
| ws = (w * w_ratios[:, None] * self.scales[None, :]).reshape(-1) | |||
| hs = (h * h_ratios[:, None] * self.scales[None, :]).reshape(-1) | |||
| else: | |||
| ws = (w * self.scales[:, None] * w_ratios[None, :]).reshape(-1) | |||
| hs = (h * self.scales[:, None] * h_ratios[None, :]).reshape(-1) | |||
| base_anchors = np.stack( | |||
| [ | |||
| x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), | |||
| x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) | |||
| ], | |||
| axis=-1).round() | |||
| return base_anchors | |||
| def _meshgrid(self, x, y, row_major=True): | |||
| """Generate grid.""" | |||
| xx = np.repeat(x.reshape(1, len(x)), len(y), axis=0).reshape(-1) | |||
| yy = np.repeat(y, len(x)) | |||
| if row_major: | |||
| return xx, yy | |||
| return yy, xx | |||
| def grid_anchors(self, featmap_size, stride=16): | |||
| """Generate anchor list.""" | |||
| base_anchors = self.base_anchors | |||
| feat_h, feat_w = featmap_size | |||
| shift_x = np.arange(0, feat_w) * stride | |||
| shift_y = np.arange(0, feat_h) * stride | |||
| shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) | |||
| shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1) | |||
| shifts = shifts.astype(base_anchors.dtype) | |||
| # first feat_w elements correspond to the first row of shifts | |||
| # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get | |||
| # shifted anchors (K, A, 4), reshape to (K*A, 4) | |||
| all_anchors = base_anchors[None, :, :] + shifts[:, None, :] | |||
| all_anchors = all_anchors.reshape(-1, 4) | |||
| return all_anchors | |||
| @@ -0,0 +1,164 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn positive and negative sample screening for RPN.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.tensor import Tensor | |||
| import mindspore.common.dtype as mstype | |||
| class BboxAssignSample(nn.Cell): | |||
| """ | |||
| Bbox assigner and sampler defination. | |||
| Args: | |||
| config (dict): Config. | |||
| batch_size (int): Batchsize. | |||
| num_bboxes (int): The anchor nums. | |||
| add_gt_as_proposals (bool): add gt bboxes as proposals flag. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| bbox_targets: bbox location, (batch_size, num_bboxes, 4) | |||
| bbox_weights: bbox weights, (batch_size, num_bboxes, 1) | |||
| labels: label for every bboxes, (batch_size, num_bboxes, 1) | |||
| label_weights: label weight for every bboxes, (batch_size, num_bboxes, 1) | |||
| Examples: | |||
| BboxAssignSample(config, 2, 1024, True) | |||
| """ | |||
| def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals): | |||
| super(BboxAssignSample, self).__init__() | |||
| cfg = config | |||
| self.batch_size = batch_size | |||
| self.neg_iou_thr = Tensor(cfg.neg_iou_thr, mstype.float16) | |||
| self.pos_iou_thr = Tensor(cfg.pos_iou_thr, mstype.float16) | |||
| self.min_pos_iou = Tensor(cfg.min_pos_iou, mstype.float16) | |||
| self.zero_thr = Tensor(0.0, mstype.float16) | |||
| self.num_bboxes = num_bboxes | |||
| self.num_gts = cfg.num_gts | |||
| self.num_expected_pos = cfg.num_expected_pos | |||
| self.num_expected_neg = cfg.num_expected_neg | |||
| self.add_gt_as_proposals = add_gt_as_proposals | |||
| if self.add_gt_as_proposals: | |||
| self.label_inds = Tensor(np.arange(1, self.num_gts + 1)) | |||
| self.concat = P.Concat(axis=0) | |||
| self.max_gt = P.ArgMaxWithValue(axis=0) | |||
| self.max_anchor = P.ArgMaxWithValue(axis=1) | |||
| self.sum_inds = P.ReduceSum() | |||
| self.iou = P.IOU() | |||
| self.greaterequal = P.GreaterEqual() | |||
| self.greater = P.Greater() | |||
| self.select = P.Select() | |||
| self.gatherND = P.GatherNd() | |||
| self.squeeze = P.Squeeze() | |||
| self.cast = P.Cast() | |||
| self.logicaland = P.LogicalAnd() | |||
| self.less = P.Less() | |||
| self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos) | |||
| self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg) | |||
| self.reshape = P.Reshape() | |||
| self.equal = P.Equal() | |||
| self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)) | |||
| self.scatterNdUpdate = P.ScatterNdUpdate() | |||
| self.scatterNd = P.ScatterNd() | |||
| self.logicalnot = P.LogicalNot() | |||
| self.tile = P.Tile() | |||
| self.zeros_like = P.ZerosLike() | |||
| self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) | |||
| self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32)) | |||
| self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32)) | |||
| self.assigned_gt_ignores = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) | |||
| self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) | |||
| self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) | |||
| self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16)) | |||
| self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16)) | |||
| self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16)) | |||
| def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): | |||
| gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ | |||
| (self.num_gts, 1)), (1, 4)), mstype.bool_), gt_bboxes_i, self.check_gt_one) | |||
| bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, mstype.int32), \ | |||
| (self.num_bboxes, 1)), (1, 4)), mstype.bool_), bboxes, self.check_anchor_two) | |||
| overlaps = self.iou(bboxes, gt_bboxes_i) | |||
| max_overlaps_w_gt_index, max_overlaps_w_gt = self.max_gt(overlaps) | |||
| _, max_overlaps_w_ac = self.max_anchor(overlaps) | |||
| neg_sample_iou_mask = self.logicaland(self.greaterequal(max_overlaps_w_gt, self.zero_thr), \ | |||
| self.less(max_overlaps_w_gt, self.neg_iou_thr)) | |||
| assigned_gt_inds2 = self.select(neg_sample_iou_mask, self.assigned_gt_zeros, self.assigned_gt_inds) | |||
| pos_sample_iou_mask = self.greaterequal(max_overlaps_w_gt, self.pos_iou_thr) | |||
| assigned_gt_inds3 = self.select(pos_sample_iou_mask, \ | |||
| max_overlaps_w_gt_index + self.assigned_gt_ones, assigned_gt_inds2) | |||
| assigned_gt_inds4 = assigned_gt_inds3 | |||
| for j in range(self.num_gts): | |||
| max_overlaps_w_ac_j = max_overlaps_w_ac[j:j+1:1] | |||
| overlaps_w_gt_j = self.squeeze(overlaps[j:j+1:1, ::]) | |||
| pos_mask_j = self.logicaland(self.greaterequal(max_overlaps_w_ac_j, self.min_pos_iou), \ | |||
| self.equal(overlaps_w_gt_j, max_overlaps_w_ac_j)) | |||
| assigned_gt_inds4 = self.select(pos_mask_j, self.assigned_gt_ones + j, assigned_gt_inds4) | |||
| assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds4, self.assigned_gt_ignores) | |||
| pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) | |||
| pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16) | |||
| pos_check_valid = self.sum_inds(pos_check_valid, -1) | |||
| valid_pos_index = self.less(self.range_pos_size, pos_check_valid) | |||
| pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) | |||
| pos_assigned_gt_index = self.gatherND(assigned_gt_inds5, pos_index) - self.assigned_pos_ones | |||
| pos_assigned_gt_index = pos_assigned_gt_index * self.cast(valid_pos_index, mstype.int32) | |||
| pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, (self.num_expected_pos, 1)) | |||
| neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) | |||
| num_pos = self.cast(self.logicalnot(valid_pos_index), mstype.float16) | |||
| num_pos = self.sum_inds(num_pos, -1) | |||
| unvalid_pos_index = self.less(self.range_pos_size, num_pos) | |||
| valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) | |||
| pos_bboxes_ = self.gatherND(bboxes, pos_index) | |||
| pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index) | |||
| pos_gt_labels = self.gatherND(gt_labels_i, pos_assigned_gt_index) | |||
| pos_bbox_targets_ = self.bounding_box_encode(pos_bboxes_, pos_gt_bboxes_) | |||
| valid_pos_index = self.cast(valid_pos_index, mstype.int32) | |||
| valid_neg_index = self.cast(valid_neg_index, mstype.int32) | |||
| bbox_targets_total = self.scatterNd(pos_index, pos_bbox_targets_, (self.num_bboxes, 4)) | |||
| bbox_weights_total = self.scatterNd(pos_index, valid_pos_index, (self.num_bboxes,)) | |||
| labels_total = self.scatterNd(pos_index, pos_gt_labels, (self.num_bboxes,)) | |||
| total_index = self.concat((pos_index, neg_index)) | |||
| total_valid_index = self.concat((valid_pos_index, valid_neg_index)) | |||
| label_weights_total = self.scatterNd(total_index, total_valid_index, (self.num_bboxes,)) | |||
| return bbox_targets_total, self.cast(bbox_weights_total, mstype.bool_), \ | |||
| labels_total, self.cast(label_weights_total, mstype.bool_) | |||
| @@ -0,0 +1,195 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn tpositive and negative sample screening for Rcnn.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.tensor import Tensor | |||
| class BboxAssignSampleForRcnn(nn.Cell): | |||
| """ | |||
| Bbox assigner and sampler defination. | |||
| Args: | |||
| config (dict): Config. | |||
| batch_size (int): Batchsize. | |||
| num_bboxes (int): The anchor nums. | |||
| add_gt_as_proposals (bool): add gt bboxes as proposals flag. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| bbox_targets: bbox location, (batch_size, num_bboxes, 4) | |||
| bbox_weights: bbox weights, (batch_size, num_bboxes, 1) | |||
| labels: label for every bboxes, (batch_size, num_bboxes, 1) | |||
| label_weights: label weight for every bboxes, (batch_size, num_bboxes, 1) | |||
| Examples: | |||
| BboxAssignSampleForRcnn(config, 2, 1024, True) | |||
| """ | |||
| def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals): | |||
| super(BboxAssignSampleForRcnn, self).__init__() | |||
| cfg = config | |||
| self.batch_size = batch_size | |||
| self.neg_iou_thr = cfg.neg_iou_thr_stage2 | |||
| self.pos_iou_thr = cfg.pos_iou_thr_stage2 | |||
| self.min_pos_iou = cfg.min_pos_iou_stage2 | |||
| self.num_gts = cfg.num_gts | |||
| self.num_bboxes = num_bboxes | |||
| self.num_expected_pos = cfg.num_expected_pos_stage2 | |||
| self.num_expected_neg = cfg.num_expected_neg_stage2 | |||
| self.num_expected_total = cfg.num_expected_total_stage2 | |||
| self.add_gt_as_proposals = add_gt_as_proposals | |||
| self.label_inds = Tensor(np.arange(1, self.num_gts + 1).astype(np.int32)) | |||
| self.add_gt_as_proposals_valid = Tensor(np.array(self.add_gt_as_proposals * np.ones(self.num_gts), | |||
| dtype=np.int32)) | |||
| self.concat = P.Concat(axis=0) | |||
| self.max_gt = P.ArgMaxWithValue(axis=0) | |||
| self.max_anchor = P.ArgMaxWithValue(axis=1) | |||
| self.sum_inds = P.ReduceSum() | |||
| self.iou = P.IOU() | |||
| self.greaterequal = P.GreaterEqual() | |||
| self.greater = P.Greater() | |||
| self.select = P.Select() | |||
| self.gatherND = P.GatherNd() | |||
| self.squeeze = P.Squeeze() | |||
| self.cast = P.Cast() | |||
| self.logicaland = P.LogicalAnd() | |||
| self.less = P.Less() | |||
| self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos) | |||
| self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg) | |||
| self.reshape = P.Reshape() | |||
| self.equal = P.Equal() | |||
| self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(10.0, 10.0, 5.0, 5.0)) | |||
| self.concat_axis1 = P.Concat(axis=1) | |||
| self.logicalnot = P.LogicalNot() | |||
| self.tile = P.Tile() | |||
| # Check | |||
| self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16)) | |||
| self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16)) | |||
| # Init tensor | |||
| self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) | |||
| self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32)) | |||
| self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32)) | |||
| self.assigned_gt_ignores = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32)) | |||
| self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) | |||
| self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32)) | |||
| self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16)) | |||
| self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) | |||
| self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float16)) | |||
| self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8)) | |||
| self.reshape_shape_pos = (self.num_expected_pos, 1) | |||
| self.reshape_shape_neg = (self.num_expected_neg, 1) | |||
| self.scalar_zero = Tensor(0.0, dtype=mstype.float16) | |||
| self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=mstype.float16) | |||
| self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=mstype.float16) | |||
| self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=mstype.float16) | |||
| def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): | |||
| gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ | |||
| (self.num_gts, 1)), (1, 4)), mstype.bool_), \ | |||
| gt_bboxes_i, self.check_gt_one) | |||
| bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, mstype.int32), \ | |||
| (self.num_bboxes, 1)), (1, 4)), mstype.bool_), \ | |||
| bboxes, self.check_anchor_two) | |||
| # 1 dim = gt, 2 dim = bbox | |||
| overlaps = self.iou(bboxes, gt_bboxes_i) | |||
| max_overlaps_w_gt_index, max_overlaps_w_gt = self.max_gt(overlaps) | |||
| _, max_overlaps_w_ac = self.max_anchor(overlaps) | |||
| neg_sample_iou_mask = self.logicaland(self.greaterequal(max_overlaps_w_gt, | |||
| self.scalar_zero), | |||
| self.less(max_overlaps_w_gt, | |||
| self.scalar_neg_iou_thr)) | |||
| assigned_gt_inds2 = self.select(neg_sample_iou_mask, self.assigned_gt_zeros, self.assigned_gt_inds) | |||
| pos_sample_iou_mask = self.greaterequal(max_overlaps_w_gt, self.scalar_pos_iou_thr) | |||
| assigned_gt_inds3 = self.select(pos_sample_iou_mask, \ | |||
| max_overlaps_w_gt_index + self.assigned_gt_ones, assigned_gt_inds2) | |||
| for j in range(self.num_gts): | |||
| max_overlaps_w_ac_j = max_overlaps_w_ac[j:j+1:1] | |||
| overlaps_w_ac_j = overlaps[j:j+1:1, ::] | |||
| temp1 = self.greaterequal(max_overlaps_w_ac_j, self.scalar_min_pos_iou) | |||
| temp2 = self.squeeze(self.equal(overlaps_w_ac_j, max_overlaps_w_ac_j)) | |||
| pos_mask_j = self.logicaland(temp1, temp2) | |||
| assigned_gt_inds3 = self.select(pos_mask_j, (j+1)*self.assigned_gt_ones, assigned_gt_inds3) | |||
| assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds3, self.assigned_gt_ignores) | |||
| bboxes = self.concat((gt_bboxes_i, bboxes)) | |||
| label_inds_valid = self.select(gt_valids, self.label_inds, self.gt_ignores) | |||
| label_inds_valid = label_inds_valid * self.add_gt_as_proposals_valid | |||
| assigned_gt_inds5 = self.concat((label_inds_valid, assigned_gt_inds5)) | |||
| # Get pos index | |||
| pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) | |||
| pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16) | |||
| pos_check_valid = self.sum_inds(pos_check_valid, -1) | |||
| valid_pos_index = self.less(self.range_pos_size, pos_check_valid) | |||
| pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) | |||
| num_pos = self.sum_inds(self.cast(self.logicalnot(valid_pos_index), mstype.float16), -1) | |||
| valid_pos_index = self.cast(valid_pos_index, mstype.int32) | |||
| pos_index = self.reshape(pos_index, self.reshape_shape_pos) | |||
| valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos) | |||
| pos_index = pos_index * valid_pos_index | |||
| pos_assigned_gt_index = self.gatherND(assigned_gt_inds5, pos_index) - self.assigned_pos_ones | |||
| pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, self.reshape_shape_pos) | |||
| pos_assigned_gt_index = pos_assigned_gt_index * valid_pos_index | |||
| pos_gt_labels = self.gatherND(gt_labels_i, pos_assigned_gt_index) | |||
| # Get neg index | |||
| neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) | |||
| unvalid_pos_index = self.less(self.range_pos_size, num_pos) | |||
| valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) | |||
| neg_index = self.reshape(neg_index, self.reshape_shape_neg) | |||
| valid_neg_index = self.cast(valid_neg_index, mstype.int32) | |||
| valid_neg_index = self.reshape(valid_neg_index, self.reshape_shape_neg) | |||
| neg_index = neg_index * valid_neg_index | |||
| pos_bboxes_ = self.gatherND(bboxes, pos_index) | |||
| neg_bboxes_ = self.gatherND(bboxes, neg_index) | |||
| pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, self.reshape_shape_pos) | |||
| pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index) | |||
| pos_bbox_targets_ = self.bounding_box_encode(pos_bboxes_, pos_gt_bboxes_) | |||
| total_bboxes = self.concat((pos_bboxes_, neg_bboxes_)) | |||
| total_deltas = self.concat((pos_bbox_targets_, self.bboxs_neg_mask)) | |||
| total_labels = self.concat((pos_gt_labels, self.labels_neg_mask)) | |||
| valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos) | |||
| valid_neg_index = self.reshape(valid_neg_index, self.reshape_shape_neg) | |||
| total_mask = self.concat((valid_pos_index, valid_neg_index)) | |||
| return total_bboxes, total_deltas, total_labels, total_mask | |||
| @@ -0,0 +1,425 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn based on ResNet50.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.tensor import Tensor | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import functional as F | |||
| from .resnet50 import ResNetFea, ResidualBlockUsing | |||
| from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn | |||
| from .fpn_neck import FeatPyramidNeck | |||
| from .proposal_generator import Proposal | |||
| from .rcnn import Rcnn | |||
| from .rpn import RPN | |||
| from .roi_align import SingleRoIExtractor | |||
| from .anchor_generator import AnchorGenerator | |||
| class Faster_Rcnn_Resnet50(nn.Cell): | |||
| """ | |||
| FasterRcnn Network. | |||
| Note: | |||
| backbone = resnet50 | |||
| Returns: | |||
| Tuple, tuple of output tensor. | |||
| rpn_loss: Scalar, Total loss of RPN subnet. | |||
| rcnn_loss: Scalar, Total loss of RCNN subnet. | |||
| rpn_cls_loss: Scalar, Classification loss of RPN subnet. | |||
| rpn_reg_loss: Scalar, Regression loss of RPN subnet. | |||
| rcnn_cls_loss: Scalar, Classification loss of RCNN subnet. | |||
| rcnn_reg_loss: Scalar, Regression loss of RCNN subnet. | |||
| Examples: | |||
| net = Faster_Rcnn_Resnet50() | |||
| """ | |||
| def __init__(self, config): | |||
| super(Faster_Rcnn_Resnet50, self).__init__() | |||
| self.train_batch_size = config.batch_size | |||
| self.num_classes = config.num_classes | |||
| self.anchor_scales = config.anchor_scales | |||
| self.anchor_ratios = config.anchor_ratios | |||
| self.anchor_strides = config.anchor_strides | |||
| self.target_means = tuple(config.rcnn_target_means) | |||
| self.target_stds = tuple(config.rcnn_target_stds) | |||
| # Anchor generator | |||
| anchor_base_sizes = None | |||
| self.anchor_base_sizes = list( | |||
| self.anchor_strides) if anchor_base_sizes is None else anchor_base_sizes | |||
| self.anchor_generators = [] | |||
| for anchor_base in self.anchor_base_sizes: | |||
| self.anchor_generators.append( | |||
| AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios)) | |||
| self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) | |||
| featmap_sizes = config.feature_shapes | |||
| assert len(featmap_sizes) == len(self.anchor_generators) | |||
| self.anchor_list = self.get_anchors(featmap_sizes) | |||
| # Backbone resnet50 | |||
| self.backbone = ResNetFea(ResidualBlockUsing, | |||
| config.resnet_block, | |||
| config.resnet_in_channels, | |||
| config.resnet_out_channels, | |||
| False) | |||
| # Fpn | |||
| self.fpn_ncek = FeatPyramidNeck(config.fpn_in_channels, | |||
| config.fpn_out_channels, | |||
| config.fpn_num_outs) | |||
| # Rpn and rpn loss | |||
| self.gt_labels_stage1 = Tensor(np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8)) | |||
| self.rpn_with_loss = RPN(config, | |||
| self.train_batch_size, | |||
| config.rpn_in_channels, | |||
| config.rpn_feat_channels, | |||
| config.num_anchors, | |||
| config.rpn_cls_out_channels) | |||
| # Proposal | |||
| self.proposal_generator = Proposal(config, | |||
| self.train_batch_size, | |||
| config.activate_num_classes, | |||
| config.use_sigmoid_cls) | |||
| self.proposal_generator.set_train_local(config, True) | |||
| self.proposal_generator_test = Proposal(config, | |||
| config.test_batch_size, | |||
| config.activate_num_classes, | |||
| config.use_sigmoid_cls) | |||
| self.proposal_generator_test.set_train_local(config, False) | |||
| # Assign and sampler stage two | |||
| self.bbox_assigner_sampler_for_rcnn = BboxAssignSampleForRcnn(config, self.train_batch_size, | |||
| config.num_bboxes_stage2, True) | |||
| self.decode = P.BoundingBoxDecode(max_shape=(768, 1280), means=self.target_means, \ | |||
| stds=self.target_stds) | |||
| # Roi | |||
| self.roi_align = SingleRoIExtractor(config, | |||
| config.roi_layer, | |||
| config.roi_align_out_channels, | |||
| config.roi_align_featmap_strides, | |||
| self.train_batch_size, | |||
| config.roi_align_finest_scale) | |||
| self.roi_align.set_train_local(config, True) | |||
| self.roi_align_test = SingleRoIExtractor(config, | |||
| config.roi_layer, | |||
| config.roi_align_out_channels, | |||
| config.roi_align_featmap_strides, | |||
| 1, | |||
| config.roi_align_finest_scale) | |||
| self.roi_align_test.set_train_local(config, False) | |||
| # Rcnn | |||
| self.rcnn = Rcnn(config, config.rcnn_in_channels * config.roi_layer['out_size'] * config.roi_layer['out_size'], | |||
| self.train_batch_size, self.num_classes) | |||
| # Op declare | |||
| self.squeeze = P.Squeeze() | |||
| self.cast = P.Cast() | |||
| self.concat = P.Concat(axis=0) | |||
| self.concat_1 = P.Concat(axis=1) | |||
| self.concat_2 = P.Concat(axis=2) | |||
| self.reshape = P.Reshape() | |||
| self.select = P.Select() | |||
| self.greater = P.Greater() | |||
| self.transpose = P.Transpose() | |||
| # Test mode | |||
| self.test_batch_size = config.test_batch_size | |||
| self.split = P.Split(axis=0, output_num=self.test_batch_size) | |||
| self.split_shape = P.Split(axis=0, output_num=4) | |||
| self.split_scores = P.Split(axis=1, output_num=self.num_classes) | |||
| self.split_cls = P.Split(axis=0, output_num=self.num_classes-1) | |||
| self.tile = P.Tile() | |||
| self.gather = P.GatherNd() | |||
| self.rpn_max_num = config.rpn_max_num | |||
| self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(np.float16)) | |||
| self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool) | |||
| self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool) | |||
| self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask, | |||
| self.ones_mask, self.zeros_mask), axis=1)) | |||
| self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask, | |||
| self.ones_mask, self.ones_mask, self.zeros_mask), axis=1)) | |||
| self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_score_thr) | |||
| self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * 0) | |||
| self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(np.float16) * -1) | |||
| self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_iou_thr) | |||
| self.test_max_per_img = config.test_max_per_img | |||
| self.nms_test = P.NMSWithMask(config.test_iou_thr) | |||
| self.softmax = P.Softmax(axis=1) | |||
| self.logicand = P.LogicalAnd() | |||
| self.oneslike = P.OnesLike() | |||
| self.test_topk = P.TopK(sorted=True) | |||
| self.test_num_proposal = self.test_batch_size * self.rpn_max_num | |||
| # Improve speed | |||
| self.concat_start = min(self.num_classes - 2, 55) | |||
| self.concat_end = (self.num_classes - 1) | |||
| # Init tensor | |||
| roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i, | |||
| dtype=np.float16) for i in range(self.train_batch_size)] | |||
| roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=np.float16) \ | |||
| for i in range(self.test_batch_size)] | |||
| self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index)) | |||
| self.roi_align_index_test_tensor = Tensor(np.concatenate(roi_align_index_test)) | |||
| def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids): | |||
| x = self.backbone(img_data) | |||
| x = self.fpn_ncek(x) | |||
| rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(x, | |||
| img_metas, | |||
| self.anchor_list, | |||
| gt_bboxes, | |||
| self.gt_labels_stage1, | |||
| gt_valids) | |||
| if self.training: | |||
| proposal, proposal_mask = self.proposal_generator(cls_score, bbox_pred, self.anchor_list) | |||
| else: | |||
| proposal, proposal_mask = self.proposal_generator_test(cls_score, bbox_pred, self.anchor_list) | |||
| gt_labels = self.cast(gt_labels, mstype.int32) | |||
| gt_valids = self.cast(gt_valids, mstype.int32) | |||
| bboxes_tuple = () | |||
| deltas_tuple = () | |||
| labels_tuple = () | |||
| mask_tuple = () | |||
| if self.training: | |||
| for i in range(self.train_batch_size): | |||
| gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::]) | |||
| gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::]) | |||
| gt_labels_i = self.cast(gt_labels_i, mstype.uint8) | |||
| gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::]) | |||
| gt_valids_i = self.cast(gt_valids_i, mstype.bool_) | |||
| bboxes, deltas, labels, mask = self.bbox_assigner_sampler_for_rcnn(gt_bboxes_i, | |||
| gt_labels_i, | |||
| proposal_mask[i], | |||
| proposal[i][::, 0:4:1], | |||
| gt_valids_i) | |||
| bboxes_tuple += (bboxes,) | |||
| deltas_tuple += (deltas,) | |||
| labels_tuple += (labels,) | |||
| mask_tuple += (mask,) | |||
| bbox_targets = self.concat(deltas_tuple) | |||
| rcnn_labels = self.concat(labels_tuple) | |||
| bbox_targets = F.stop_gradient(bbox_targets) | |||
| rcnn_labels = F.stop_gradient(rcnn_labels) | |||
| rcnn_labels = self.cast(rcnn_labels, mstype.int32) | |||
| else: | |||
| mask_tuple += proposal_mask | |||
| bbox_targets = proposal_mask | |||
| rcnn_labels = proposal_mask | |||
| for p_i in proposal: | |||
| bboxes_tuple += (p_i[::, 0:4:1],) | |||
| if self.training: | |||
| if self.train_batch_size > 1: | |||
| bboxes_all = self.concat(bboxes_tuple) | |||
| else: | |||
| bboxes_all = bboxes_tuple[0] | |||
| rois = self.concat_1((self.roi_align_index_tensor, bboxes_all)) | |||
| else: | |||
| if self.test_batch_size > 1: | |||
| bboxes_all = self.concat(bboxes_tuple) | |||
| else: | |||
| bboxes_all = bboxes_tuple[0] | |||
| rois = self.concat_1((self.roi_align_index_test_tensor, bboxes_all)) | |||
| rois = self.cast(rois, mstype.float32) | |||
| rois = F.stop_gradient(rois) | |||
| if self.training: | |||
| roi_feats = self.roi_align(rois, | |||
| self.cast(x[0], mstype.float32), | |||
| self.cast(x[1], mstype.float32), | |||
| self.cast(x[2], mstype.float32), | |||
| self.cast(x[3], mstype.float32)) | |||
| else: | |||
| roi_feats = self.roi_align_test(rois, | |||
| self.cast(x[0], mstype.float32), | |||
| self.cast(x[1], mstype.float32), | |||
| self.cast(x[2], mstype.float32), | |||
| self.cast(x[3], mstype.float32)) | |||
| roi_feats = self.cast(roi_feats, mstype.float16) | |||
| rcnn_masks = self.concat(mask_tuple) | |||
| rcnn_masks = F.stop_gradient(rcnn_masks) | |||
| rcnn_mask_squeeze = self.squeeze(self.cast(rcnn_masks, mstype.bool_)) | |||
| rcnn_loss, rcnn_cls_loss, rcnn_reg_loss, _ = self.rcnn(roi_feats, | |||
| bbox_targets, | |||
| rcnn_labels, | |||
| rcnn_mask_squeeze) | |||
| output = () | |||
| if self.training: | |||
| output += (rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss, rcnn_cls_loss, rcnn_reg_loss) | |||
| else: | |||
| output = self.get_det_bboxes(rcnn_cls_loss, rcnn_reg_loss, rcnn_masks, bboxes_all, img_metas) | |||
| return output | |||
| def get_det_bboxes(self, cls_logits, reg_logits, mask_logits, rois, img_metas): | |||
| """Get the actual detection box.""" | |||
| scores = self.softmax(cls_logits) | |||
| boxes_all = () | |||
| for i in range(self.num_classes): | |||
| k = i * 4 | |||
| reg_logits_i = self.squeeze(reg_logits[::, k:k+4:1]) | |||
| out_boxes_i = self.decode(rois, reg_logits_i) | |||
| boxes_all += (out_boxes_i,) | |||
| img_metas_all = self.split(img_metas) | |||
| scores_all = self.split(scores) | |||
| mask_all = self.split(self.cast(mask_logits, mstype.int32)) | |||
| boxes_all_with_batchsize = () | |||
| for i in range(self.test_batch_size): | |||
| scale = self.split_shape(self.squeeze(img_metas_all[i])) | |||
| scale_h = scale[2] | |||
| scale_w = scale[3] | |||
| boxes_tuple = () | |||
| for j in range(self.num_classes): | |||
| boxes_tmp = self.split(boxes_all[j]) | |||
| out_boxes_h = boxes_tmp[i] / scale_h | |||
| out_boxes_w = boxes_tmp[i] / scale_w | |||
| boxes_tuple += (self.select(self.bbox_mask, out_boxes_w, out_boxes_h),) | |||
| boxes_all_with_batchsize += (boxes_tuple,) | |||
| output = self.multiclass_nms(boxes_all_with_batchsize, scores_all, mask_all) | |||
| return output | |||
| def multiclass_nms(self, boxes_all, scores_all, mask_all): | |||
| """Multiscale postprocessing.""" | |||
| all_bboxes = () | |||
| all_labels = () | |||
| all_masks = () | |||
| for i in range(self.test_batch_size): | |||
| bboxes = boxes_all[i] | |||
| scores = scores_all[i] | |||
| masks = self.cast(mask_all[i], mstype.bool_) | |||
| res_boxes_tuple = () | |||
| res_labels_tuple = () | |||
| res_masks_tuple = () | |||
| for j in range(self.num_classes - 1): | |||
| k = j + 1 | |||
| _cls_scores = scores[::, k:k + 1:1] | |||
| _bboxes = self.squeeze(bboxes[k]) | |||
| _mask_o = self.reshape(masks, (self.rpn_max_num, 1)) | |||
| cls_mask = self.greater(_cls_scores, self.test_score_thresh) | |||
| _mask = self.logicand(_mask_o, cls_mask) | |||
| _reg_mask = self.cast(self.tile(self.cast(_mask, mstype.int32), (1, 4)), mstype.bool_) | |||
| _bboxes = self.select(_reg_mask, _bboxes, self.test_box_zeros) | |||
| _cls_scores = self.select(_mask, _cls_scores, self.test_score_zeros) | |||
| __cls_scores = self.squeeze(_cls_scores) | |||
| scores_sorted, topk_inds = self.test_topk(__cls_scores, self.rpn_max_num) | |||
| topk_inds = self.reshape(topk_inds, (self.rpn_max_num, 1)) | |||
| scores_sorted = self.reshape(scores_sorted, (self.rpn_max_num, 1)) | |||
| _bboxes_sorted = self.gather(_bboxes, topk_inds) | |||
| _mask_sorted = self.gather(_mask, topk_inds) | |||
| scores_sorted = self.tile(scores_sorted, (1, 4)) | |||
| cls_dets = self.concat_1((_bboxes_sorted, scores_sorted)) | |||
| cls_dets = P.Slice()(cls_dets, (0, 0), (self.rpn_max_num, 5)) | |||
| cls_dets, _index, _mask_nms = self.nms_test(cls_dets) | |||
| _index = self.reshape(_index, (self.rpn_max_num, 1)) | |||
| _mask_nms = self.reshape(_mask_nms, (self.rpn_max_num, 1)) | |||
| _mask_n = self.gather(_mask_sorted, _index) | |||
| _mask_n = self.logicand(_mask_n, _mask_nms) | |||
| cls_labels = self.oneslike(_index) * j | |||
| res_boxes_tuple += (cls_dets,) | |||
| res_labels_tuple += (cls_labels,) | |||
| res_masks_tuple += (_mask_n,) | |||
| res_boxes_start = self.concat(res_boxes_tuple[:self.concat_start]) | |||
| res_labels_start = self.concat(res_labels_tuple[:self.concat_start]) | |||
| res_masks_start = self.concat(res_masks_tuple[:self.concat_start]) | |||
| res_boxes_end = self.concat(res_boxes_tuple[self.concat_start:self.concat_end]) | |||
| res_labels_end = self.concat(res_labels_tuple[self.concat_start:self.concat_end]) | |||
| res_masks_end = self.concat(res_masks_tuple[self.concat_start:self.concat_end]) | |||
| res_boxes = self.concat((res_boxes_start, res_boxes_end)) | |||
| res_labels = self.concat((res_labels_start, res_labels_end)) | |||
| res_masks = self.concat((res_masks_start, res_masks_end)) | |||
| reshape_size = (self.num_classes - 1) * self.rpn_max_num | |||
| res_boxes = self.reshape(res_boxes, (1, reshape_size, 5)) | |||
| res_labels = self.reshape(res_labels, (1, reshape_size, 1)) | |||
| res_masks = self.reshape(res_masks, (1, reshape_size, 1)) | |||
| all_bboxes += (res_boxes,) | |||
| all_labels += (res_labels,) | |||
| all_masks += (res_masks,) | |||
| all_bboxes = self.concat(all_bboxes) | |||
| all_labels = self.concat(all_labels) | |||
| all_masks = self.concat(all_masks) | |||
| return all_bboxes, all_labels, all_masks | |||
| def get_anchors(self, featmap_sizes): | |||
| """Get anchors according to feature map sizes. | |||
| Args: | |||
| featmap_sizes (list[tuple]): Multi-level feature map sizes. | |||
| img_metas (list[dict]): Image meta info. | |||
| Returns: | |||
| tuple: anchors of each image, valid flags of each image | |||
| """ | |||
| num_levels = len(featmap_sizes) | |||
| # since feature map sizes of all images are the same, we only compute | |||
| # anchors for one time | |||
| multi_level_anchors = () | |||
| for i in range(num_levels): | |||
| anchors = self.anchor_generators[i].grid_anchors( | |||
| featmap_sizes[i], self.anchor_strides[i]) | |||
| multi_level_anchors += (Tensor(anchors.astype(np.float16)),) | |||
| return multi_level_anchors | |||
| @@ -0,0 +1,112 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn feature pyramid network.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore import context | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.common.initializer import initializer | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) | |||
| def bias_init_zeros(shape): | |||
| """Bias init method.""" | |||
| return Tensor(np.array(np.zeros(shape).astype(np.float32)).astype(np.float16)) | |||
| def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'): | |||
| """Conv2D wrapper.""" | |||
| shape = (out_channels, in_channels, kernel_size, kernel_size) | |||
| weights = initializer("XavierUniform", shape=shape, dtype=mstype.float16).to_tensor() | |||
| shape_bias = (out_channels,) | |||
| biass = bias_init_zeros(shape_bias) | |||
| return nn.Conv2d(in_channels, out_channels, | |||
| kernel_size=kernel_size, stride=stride, padding=padding, | |||
| pad_mode=pad_mode, weight_init=weights, has_bias=True, bias_init=biass) | |||
| class FeatPyramidNeck(nn.Cell): | |||
| """ | |||
| Feature pyramid network cell, usually uses as network neck. | |||
| Applies the convolution on multiple, input feature maps | |||
| and output feature map with same channel size. if required num of | |||
| output larger then num of inputs, add extra maxpooling for further | |||
| downsampling; | |||
| Args: | |||
| in_channels (tuple) - Channel size of input feature maps. | |||
| out_channels (int) - Channel size output. | |||
| num_outs (int) - Num of output features. | |||
| Returns: | |||
| Tuple, with tensors of same channel size. | |||
| Examples: | |||
| neck = FeatPyramidNeck([100,200,300], 50, 4) | |||
| input_data = (normal(0,0.1,(1,c,1280//(4*2**i), 768//(4*2**i)), | |||
| dtype=np.float32) \ | |||
| for i, c in enumerate(config.fpn_in_channels)) | |||
| x = neck(input_data) | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| num_outs): | |||
| super(FeatPyramidNeck, self).__init__() | |||
| self.num_outs = num_outs | |||
| self.in_channels = in_channels | |||
| self.fpn_layer = len(self.in_channels) | |||
| assert not self.num_outs < len(in_channels) | |||
| self.lateral_convs_list_ = [] | |||
| self.fpn_convs_ = [] | |||
| for _, channel in enumerate(in_channels): | |||
| l_conv = _conv(channel, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='valid') | |||
| fpn_conv = _conv(out_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='same') | |||
| self.lateral_convs_list_.append(l_conv) | |||
| self.fpn_convs_.append(fpn_conv) | |||
| self.lateral_convs_list = nn.layer.CellList(self.lateral_convs_list_) | |||
| self.fpn_convs_list = nn.layer.CellList(self.fpn_convs_) | |||
| self.interpolate1 = P.ResizeNearestNeighbor((48, 80)) | |||
| self.interpolate2 = P.ResizeNearestNeighbor((96, 160)) | |||
| self.interpolate3 = P.ResizeNearestNeighbor((192, 320)) | |||
| self.maxpool = P.MaxPool(ksize=1, strides=2, padding="same") | |||
| def construct(self, inputs): | |||
| x = () | |||
| for i in range(self.fpn_layer): | |||
| x += (self.lateral_convs_list[i](inputs[i]),) | |||
| y = (x[3],) | |||
| y = y + (x[2] + self.interpolate1(y[self.fpn_layer - 4]),) | |||
| y = y + (x[1] + self.interpolate2(y[self.fpn_layer - 3]),) | |||
| y = y + (x[0] + self.interpolate3(y[self.fpn_layer - 2]),) | |||
| z = () | |||
| for i in range(self.fpn_layer - 1, -1, -1): | |||
| z = z + (y[i],) | |||
| outs = () | |||
| for i in range(self.fpn_layer): | |||
| outs = outs + (self.fpn_convs_list[i](z[i]),) | |||
| for i in range(self.num_outs - self.fpn_layer): | |||
| outs = outs + (self.maxpool(outs[3]),) | |||
| return outs | |||
| @@ -0,0 +1,199 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn proposal generator.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import operations as P | |||
| from mindspore import Tensor | |||
| from mindspore import context | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) | |||
| class Proposal(nn.Cell): | |||
| """ | |||
| Proposal subnet. | |||
| Args: | |||
| config (dict): Config. | |||
| batch_size (int): Batchsize. | |||
| num_classes (int) - Class number. | |||
| use_sigmoid_cls (bool) - Select sigmoid or softmax function. | |||
| target_means (tuple) - Means for encode function. Default: (.0, .0, .0, .0). | |||
| target_stds (tuple) - Stds for encode function. Default: (1.0, 1.0, 1.0, 1.0). | |||
| Returns: | |||
| Tuple, tuple of output tensor,(proposal, mask). | |||
| Examples: | |||
| Proposal(config = config, batch_size = 1, num_classes = 81, use_sigmoid_cls = True, \ | |||
| target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)) | |||
| """ | |||
| def __init__(self, | |||
| config, | |||
| batch_size, | |||
| num_classes, | |||
| use_sigmoid_cls, | |||
| target_means=(.0, .0, .0, .0), | |||
| target_stds=(1.0, 1.0, 1.0, 1.0) | |||
| ): | |||
| super(Proposal, self).__init__() | |||
| cfg = config | |||
| self.batch_size = batch_size | |||
| self.num_classes = num_classes | |||
| self.target_means = target_means | |||
| self.target_stds = target_stds | |||
| self.use_sigmoid_cls = use_sigmoid_cls | |||
| if self.use_sigmoid_cls: | |||
| self.cls_out_channels = num_classes - 1 | |||
| self.activation = P.Sigmoid() | |||
| self.reshape_shape = (-1, 1) | |||
| else: | |||
| self.cls_out_channels = num_classes | |||
| self.activation = P.Softmax(axis=1) | |||
| self.reshape_shape = (-1, 2) | |||
| if self.cls_out_channels <= 0: | |||
| raise ValueError('num_classes={} is too small'.format(num_classes)) | |||
| self.num_pre = cfg.rpn_proposal_nms_pre | |||
| self.min_box_size = cfg.rpn_proposal_min_bbox_size | |||
| self.nms_thr = cfg.rpn_proposal_nms_thr | |||
| self.nms_post = cfg.rpn_proposal_nms_post | |||
| self.nms_across_levels = cfg.rpn_proposal_nms_across_levels | |||
| self.max_num = cfg.rpn_proposal_max_num | |||
| self.num_levels = cfg.fpn_num_outs | |||
| # Op Define | |||
| self.squeeze = P.Squeeze() | |||
| self.reshape = P.Reshape() | |||
| self.cast = P.Cast() | |||
| self.feature_shapes = cfg.feature_shapes | |||
| self.transpose_shape = (1, 2, 0) | |||
| self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \ | |||
| means=self.target_means, \ | |||
| stds=self.target_stds) | |||
| self.nms = P.NMSWithMask(self.nms_thr) | |||
| self.concat_axis0 = P.Concat(axis=0) | |||
| self.concat_axis1 = P.Concat(axis=1) | |||
| self.split = P.Split(axis=1, output_num=5) | |||
| self.min = P.Minimum() | |||
| self.gatherND = P.GatherNd() | |||
| self.slice = P.Slice() | |||
| self.select = P.Select() | |||
| self.greater = P.Greater() | |||
| self.transpose = P.Transpose() | |||
| self.tile = P.Tile() | |||
| self.set_train_local(config, training=True) | |||
| self.multi_10 = Tensor(10.0, mstype.float16) | |||
| def set_train_local(self, config, training=True): | |||
| """Set training flag.""" | |||
| self.training_local = training | |||
| cfg = config | |||
| self.topK_stage1 = () | |||
| self.topK_shape = () | |||
| total_max_topk_input = 0 | |||
| if not self.training_local: | |||
| self.num_pre = cfg.rpn_nms_pre | |||
| self.min_box_size = cfg.rpn_min_bbox_min_size | |||
| self.nms_thr = cfg.rpn_nms_thr | |||
| self.nms_post = cfg.rpn_nms_post | |||
| self.nms_across_levels = cfg.rpn_nms_across_levels | |||
| self.max_num = cfg.rpn_max_num | |||
| for shp in self.feature_shapes: | |||
| k_num = min(self.num_pre, (shp[0] * shp[1] * 3)) | |||
| total_max_topk_input += k_num | |||
| self.topK_stage1 += (k_num,) | |||
| self.topK_shape += ((k_num, 1),) | |||
| self.topKv2 = P.TopK(sorted=True) | |||
| self.topK_shape_stage2 = (self.max_num, 1) | |||
| self.min_float_num = -65536.0 | |||
| self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, np.float16)) | |||
| def construct(self, rpn_cls_score_total, rpn_bbox_pred_total, anchor_list): | |||
| proposals_tuple = () | |||
| masks_tuple = () | |||
| for img_id in range(self.batch_size): | |||
| cls_score_list = () | |||
| bbox_pred_list = () | |||
| for i in range(self.num_levels): | |||
| rpn_cls_score_i = self.squeeze(rpn_cls_score_total[i][img_id:img_id+1:1, ::, ::, ::]) | |||
| rpn_bbox_pred_i = self.squeeze(rpn_bbox_pred_total[i][img_id:img_id+1:1, ::, ::, ::]) | |||
| cls_score_list = cls_score_list + (rpn_cls_score_i,) | |||
| bbox_pred_list = bbox_pred_list + (rpn_bbox_pred_i,) | |||
| proposals, masks = self.get_bboxes_single(cls_score_list, bbox_pred_list, anchor_list) | |||
| proposals_tuple += (proposals,) | |||
| masks_tuple += (masks,) | |||
| return proposals_tuple, masks_tuple | |||
| def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors): | |||
| """Get proposal boundingbox.""" | |||
| mlvl_proposals = () | |||
| mlvl_mask = () | |||
| for idx in range(self.num_levels): | |||
| rpn_cls_score = self.transpose(cls_scores[idx], self.transpose_shape) | |||
| rpn_bbox_pred = self.transpose(bbox_preds[idx], self.transpose_shape) | |||
| anchors = mlvl_anchors[idx] | |||
| rpn_cls_score = self.reshape(rpn_cls_score, self.reshape_shape) | |||
| rpn_cls_score = self.activation(rpn_cls_score) | |||
| rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 0::]), mstype.float16) | |||
| rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), mstype.float16) | |||
| scores_sorted, topk_inds = self.topKv2(rpn_cls_score_process, self.topK_stage1[idx]) | |||
| topk_inds = self.reshape(topk_inds, self.topK_shape[idx]) | |||
| bboxes_sorted = self.gatherND(rpn_bbox_pred_process, topk_inds) | |||
| anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), mstype.float16) | |||
| proposals_decode = self.decode(anchors_sorted, bboxes_sorted) | |||
| proposals_decode = self.concat_axis1((proposals_decode, self.reshape(scores_sorted, self.topK_shape[idx]))) | |||
| proposals, _, mask_valid = self.nms(proposals_decode) | |||
| mlvl_proposals = mlvl_proposals + (proposals,) | |||
| mlvl_mask = mlvl_mask + (mask_valid,) | |||
| proposals = self.concat_axis0(mlvl_proposals) | |||
| masks = self.concat_axis0(mlvl_mask) | |||
| _, _, _, _, scores = self.split(proposals) | |||
| scores = self.squeeze(scores) | |||
| topk_mask = self.cast(self.topK_mask, mstype.float16) | |||
| scores_using = self.select(masks, scores, topk_mask) | |||
| _, topk_inds = self.topKv2(scores_using, self.max_num) | |||
| topk_inds = self.reshape(topk_inds, self.topK_shape_stage2) | |||
| proposals = self.gatherND(proposals, topk_inds) | |||
| masks = self.gatherND(masks, topk_inds) | |||
| return proposals, masks | |||
| @@ -0,0 +1,171 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn Rcnn network.""" | |||
| import numpy as np | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.common.initializer import initializer | |||
| from mindspore.common.parameter import Parameter | |||
| class DenseNoTranpose(nn.Cell): | |||
| """Dense method""" | |||
| def __init__(self, input_channels, output_channels, weight_init): | |||
| super(DenseNoTranpose, self).__init__() | |||
| self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16), | |||
| name="weight") | |||
| self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias") | |||
| self.matmul = P.MatMul(transpose_b=False) | |||
| self.bias_add = P.BiasAdd() | |||
| def construct(self, x): | |||
| output = self.bias_add(self.matmul(x, self.weight), self.bias) | |||
| return output | |||
| class Rcnn(nn.Cell): | |||
| """ | |||
| Rcnn subnet. | |||
| Args: | |||
| config (dict) - Config. | |||
| representation_size (int) - Channels of shared dense. | |||
| batch_size (int) - Batchsize. | |||
| num_classes (int) - Class number. | |||
| target_means (list) - Means for encode function. Default: (.0, .0, .0, .0]). | |||
| target_stds (list) - Stds for encode function. Default: (0.1, 0.1, 0.2, 0.2). | |||
| Returns: | |||
| Tuple, tuple of output tensor. | |||
| Examples: | |||
| Rcnn(config=config, representation_size = 1024, batch_size=2, num_classes = 81, \ | |||
| target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2)) | |||
| """ | |||
| def __init__(self, | |||
| config, | |||
| representation_size, | |||
| batch_size, | |||
| num_classes, | |||
| target_means=(0., 0., 0., 0.), | |||
| target_stds=(0.1, 0.1, 0.2, 0.2) | |||
| ): | |||
| super(Rcnn, self).__init__() | |||
| cfg = config | |||
| self.rcnn_loss_cls_weight = Tensor(np.array(cfg.rcnn_loss_cls_weight).astype(np.float16)) | |||
| self.rcnn_loss_reg_weight = Tensor(np.array(cfg.rcnn_loss_reg_weight).astype(np.float16)) | |||
| self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels | |||
| self.target_means = target_means | |||
| self.target_stds = target_stds | |||
| self.num_classes = num_classes | |||
| self.in_channels = cfg.rcnn_in_channels | |||
| self.train_batch_size = batch_size | |||
| self.test_batch_size = cfg.test_batch_size | |||
| shape_0 = (self.rcnn_fc_out_channels, representation_size) | |||
| weights_0 = initializer("XavierUniform", shape=shape_0[::-1], dtype=mstype.float16).to_tensor() | |||
| shape_1 = (self.rcnn_fc_out_channels, self.rcnn_fc_out_channels) | |||
| weights_1 = initializer("XavierUniform", shape=shape_1[::-1], dtype=mstype.float16).to_tensor() | |||
| self.shared_fc_0 = DenseNoTranpose(representation_size, self.rcnn_fc_out_channels, weights_0) | |||
| self.shared_fc_1 = DenseNoTranpose(self.rcnn_fc_out_channels, self.rcnn_fc_out_channels, weights_1) | |||
| cls_weight = initializer('Normal', shape=[num_classes, self.rcnn_fc_out_channels][::-1], | |||
| dtype=mstype.float16).to_tensor() | |||
| reg_weight = initializer('Normal', shape=[num_classes * 4, self.rcnn_fc_out_channels][::-1], | |||
| dtype=mstype.float16).to_tensor() | |||
| self.cls_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes, cls_weight) | |||
| self.reg_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes * 4, reg_weight) | |||
| self.flatten = P.Flatten() | |||
| self.relu = P.ReLU() | |||
| self.logicaland = P.LogicalAnd() | |||
| self.loss_cls = P.SoftmaxCrossEntropyWithLogits() | |||
| self.loss_bbox = P.SmoothL1Loss(sigma=1.0) | |||
| self.reshape = P.Reshape() | |||
| self.onehot = P.OneHot() | |||
| self.greater = P.Greater() | |||
| self.cast = P.Cast() | |||
| self.sum_loss = P.ReduceSum() | |||
| self.tile = P.Tile() | |||
| self.expandims = P.ExpandDims() | |||
| self.gather = P.GatherNd() | |||
| self.argmax = P.ArgMaxWithValue(axis=1) | |||
| self.on_value = Tensor(1.0, mstype.float32) | |||
| self.off_value = Tensor(0.0, mstype.float32) | |||
| self.value = Tensor(1.0, mstype.float16) | |||
| self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2) * batch_size | |||
| rmv_first = np.ones((self.num_bboxes, self.num_classes)) | |||
| rmv_first[:, 0] = np.zeros((self.num_bboxes,)) | |||
| self.rmv_first_tensor = Tensor(rmv_first.astype(np.float16)) | |||
| self.num_bboxes_test = cfg.rpn_max_num * cfg.test_batch_size | |||
| range_max = np.arange(self.num_bboxes_test).astype(np.int32) | |||
| self.range_max = Tensor(range_max) | |||
| def construct(self, featuremap, bbox_targets, labels, mask): | |||
| x = self.flatten(featuremap) | |||
| x = self.relu(self.shared_fc_0(x)) | |||
| x = self.relu(self.shared_fc_1(x)) | |||
| x_cls = self.cls_scores(x) | |||
| x_reg = self.reg_scores(x) | |||
| if self.training: | |||
| bbox_weights = self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32) * labels | |||
| labels = self.cast(self.onehot(labels, self.num_classes, self.on_value, self.off_value), mstype.float16) | |||
| bbox_targets = self.tile(self.expandims(bbox_targets, 1), (1, self.num_classes, 1)) | |||
| loss, loss_cls, loss_reg, loss_print = self.loss(x_cls, x_reg, bbox_targets, bbox_weights, labels, mask) | |||
| out = (loss, loss_cls, loss_reg, loss_print) | |||
| else: | |||
| out = (x_cls, (x_cls / self.value), x_reg, x_cls) | |||
| return out | |||
| def loss(self, cls_score, bbox_pred, bbox_targets, bbox_weights, labels, weights): | |||
| """Loss method.""" | |||
| loss_print = () | |||
| loss_cls, _ = self.loss_cls(cls_score, labels) | |||
| weights = self.cast(weights, mstype.float16) | |||
| loss_cls = loss_cls * weights | |||
| loss_cls = self.sum_loss(loss_cls, (0,)) / self.sum_loss(weights, (0,)) | |||
| bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value), | |||
| mstype.float16) | |||
| bbox_weights = bbox_weights * self.rmv_first_tensor | |||
| pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4)) | |||
| loss_reg = self.loss_bbox(pos_bbox_pred, bbox_targets) | |||
| loss_reg = self.sum_loss(loss_reg, (2,)) | |||
| loss_reg = loss_reg * bbox_weights | |||
| loss_reg = loss_reg / self.sum_loss(weights, (0,)) | |||
| loss_reg = self.sum_loss(loss_reg, (0, 1)) | |||
| loss = self.rcnn_loss_cls_weight * loss_cls + self.rcnn_loss_reg_weight * loss_reg | |||
| loss_print += (loss_cls, loss_reg) | |||
| return loss, loss_cls, loss_reg, loss_print | |||
| @@ -0,0 +1,248 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """Resnet50 backbone.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.ops import functional as F | |||
| from mindspore import context | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True) | |||
| def weight_init_ones(shape): | |||
| """Weight init.""" | |||
| return Tensor(np.array(np.ones(shape).astype(np.float32) * 0.01).astype(np.float16)) | |||
| def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'): | |||
| """Conv2D wrapper.""" | |||
| shape = (out_channels, in_channels, kernel_size, kernel_size) | |||
| weights = weight_init_ones(shape) | |||
| return nn.Conv2d(in_channels, out_channels, | |||
| kernel_size=kernel_size, stride=stride, padding=padding, | |||
| pad_mode=pad_mode, weight_init=weights, has_bias=False) | |||
| def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True): | |||
| """Batchnorm2D wrapper.""" | |||
| gamma_init = Tensor(np.array(np.ones(out_chls)).astype(np.float16)) | |||
| beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(np.float16)) | |||
| moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(np.float16)) | |||
| moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(np.float16)) | |||
| return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init, | |||
| beta_init=beta_init, moving_mean_init=moving_mean_init, | |||
| moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics) | |||
| class ResNetFea(nn.Cell): | |||
| """ | |||
| ResNet architecture. | |||
| Args: | |||
| block (Cell): Block for network. | |||
| layer_nums (list): Numbers of block in different layers. | |||
| in_channels (list): Input channel in each layer. | |||
| out_channels (list): Output channel in each layer. | |||
| weights_update (bool): Weight update flag. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ResNet(ResidualBlock, | |||
| >>> [3, 4, 6, 3], | |||
| >>> [64, 256, 512, 1024], | |||
| >>> [256, 512, 1024, 2048], | |||
| >>> False) | |||
| """ | |||
| def __init__(self, | |||
| block, | |||
| layer_nums, | |||
| in_channels, | |||
| out_channels, | |||
| weights_update=False): | |||
| super(ResNetFea, self).__init__() | |||
| if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: | |||
| raise ValueError("the length of " | |||
| "layer_num, inchannel, outchannel list must be 4!") | |||
| bn_training = False | |||
| self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad') | |||
| self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training) | |||
| self.relu = P.ReLU() | |||
| self.maxpool = P.MaxPool(ksize=3, strides=2, padding="SAME") | |||
| self.weights_update = weights_update | |||
| if not self.weights_update: | |||
| self.conv1.weight.requires_grad = False | |||
| self.layer1 = self._make_layer(block, | |||
| layer_nums[0], | |||
| in_channel=in_channels[0], | |||
| out_channel=out_channels[0], | |||
| stride=1, | |||
| training=bn_training, | |||
| weights_update=self.weights_update) | |||
| self.layer2 = self._make_layer(block, | |||
| layer_nums[1], | |||
| in_channel=in_channels[1], | |||
| out_channel=out_channels[1], | |||
| stride=2, | |||
| training=bn_training, | |||
| weights_update=True) | |||
| self.layer3 = self._make_layer(block, | |||
| layer_nums[2], | |||
| in_channel=in_channels[2], | |||
| out_channel=out_channels[2], | |||
| stride=2, | |||
| training=bn_training, | |||
| weights_update=True) | |||
| self.layer4 = self._make_layer(block, | |||
| layer_nums[3], | |||
| in_channel=in_channels[3], | |||
| out_channel=out_channels[3], | |||
| stride=2, | |||
| training=bn_training, | |||
| weights_update=True) | |||
| def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False): | |||
| """Make block layer.""" | |||
| layers = [] | |||
| down_sample = False | |||
| if stride != 1 or in_channel != out_channel: | |||
| down_sample = True | |||
| resblk = block(in_channel, | |||
| out_channel, | |||
| stride=stride, | |||
| down_sample=down_sample, | |||
| training=training, | |||
| weights_update=weights_update) | |||
| layers.append(resblk) | |||
| for _ in range(1, layer_num): | |||
| resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update) | |||
| layers.append(resblk) | |||
| return nn.SequentialCell(layers) | |||
| def construct(self, x): | |||
| x = self.conv1(x) | |||
| x = self.bn1(x) | |||
| x = self.relu(x) | |||
| c1 = self.maxpool(x) | |||
| c2 = self.layer1(c1) | |||
| identity = c2 | |||
| if not self.weights_update: | |||
| identity = F.stop_gradient(c2) | |||
| c3 = self.layer2(identity) | |||
| c4 = self.layer3(c3) | |||
| c5 = self.layer4(c4) | |||
| return identity, c3, c4, c5 | |||
| class ResidualBlockUsing(nn.Cell): | |||
| """ | |||
| ResNet V1 residual block definition. | |||
| Args: | |||
| in_channels (int) - Input channel. | |||
| out_channels (int) - Output channel. | |||
| stride (int) - Stride size for the initial convolutional layer. Default: 1. | |||
| down_sample (bool) - If to do the downsample in block. Default: False. | |||
| momentum (float) - Momentum for batchnorm layer. Default: 0.1. | |||
| training (bool) - Training flag. Default: False. | |||
| weights_updata (bool) - Weights update flag. Default: False. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| ResidualBlock(3,256,stride=2,down_sample=True) | |||
| """ | |||
| expansion = 4 | |||
| def __init__(self, | |||
| in_channels, | |||
| out_channels, | |||
| stride=1, | |||
| down_sample=False, | |||
| momentum=0.1, | |||
| training=False, | |||
| weights_update=False): | |||
| super(ResidualBlockUsing, self).__init__() | |||
| self.affine = weights_update | |||
| out_chls = out_channels // self.expansion | |||
| self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0) | |||
| self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training) | |||
| self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1) | |||
| self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training) | |||
| self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0) | |||
| self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training) | |||
| if training: | |||
| self.bn1 = self.bn1.set_train() | |||
| self.bn2 = self.bn2.set_train() | |||
| self.bn3 = self.bn3.set_train() | |||
| if not weights_update: | |||
| self.conv1.weight.requires_grad = False | |||
| self.conv2.weight.requires_grad = False | |||
| self.conv3.weight.requires_grad = False | |||
| self.relu = P.ReLU() | |||
| self.downsample = down_sample | |||
| if self.downsample: | |||
| self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0) | |||
| self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, | |||
| use_batch_statistics=training) | |||
| if training: | |||
| self.bn_down_sample = self.bn_down_sample.set_train() | |||
| if not weights_update: | |||
| self.conv_down_sample.weight.requires_grad = False | |||
| self.add = P.TensorAdd() | |||
| def construct(self, x): | |||
| identity = x | |||
| out = self.conv1(x) | |||
| out = self.bn1(out) | |||
| out = self.relu(out) | |||
| out = self.conv2(out) | |||
| out = self.bn2(out) | |||
| out = self.relu(out) | |||
| out = self.conv3(out) | |||
| out = self.bn3(out) | |||
| if self.downsample: | |||
| identity = self.conv_down_sample(identity) | |||
| identity = self.bn_down_sample(identity) | |||
| out = self.add(out, identity) | |||
| out = self.relu(out) | |||
| return out | |||
| @@ -0,0 +1,178 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn ROIAlign module.""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops import composite as C | |||
| from mindspore.nn import layer as L | |||
| from mindspore.common.tensor import Tensor | |||
| class ROIAlign(nn.Cell): | |||
| """ | |||
| Extract RoI features from mulitple feature map. | |||
| Args: | |||
| out_size_h (int) - RoI height. | |||
| out_size_w (int) - RoI width. | |||
| spatial_scale (int) - RoI spatial scale. | |||
| sample_num (int) - RoI sample number. | |||
| """ | |||
| def __init__(self, | |||
| out_size_h, | |||
| out_size_w, | |||
| spatial_scale, | |||
| sample_num=0): | |||
| super(ROIAlign, self).__init__() | |||
| self.out_size = (out_size_h, out_size_w) | |||
| self.spatial_scale = float(spatial_scale) | |||
| self.sample_num = int(sample_num) | |||
| self.align_op = P.ROIAlign(self.out_size[0], self.out_size[1], | |||
| self.spatial_scale, self.sample_num) | |||
| def construct(self, features, rois): | |||
| return self.align_op(features, rois) | |||
| def __repr__(self): | |||
| format_str = self.__class__.__name__ | |||
| format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format( | |||
| self.out_size, self.spatial_scale, self.sample_num) | |||
| return format_str | |||
| class SingleRoIExtractor(nn.Cell): | |||
| """ | |||
| Extract RoI features from a single level feature map. | |||
| If there are mulitple input feature levels, each RoI is mapped to a level | |||
| according to its scale. | |||
| Args: | |||
| config (dict): Config | |||
| roi_layer (dict): Specify RoI layer type and arguments. | |||
| out_channels (int): Output channels of RoI layers. | |||
| featmap_strides (int): Strides of input feature maps. | |||
| batch_size (int): Batchsize. | |||
| finest_scale (int): Scale threshold of mapping to level 0. | |||
| """ | |||
| def __init__(self, | |||
| config, | |||
| roi_layer, | |||
| out_channels, | |||
| featmap_strides, | |||
| batch_size=1, | |||
| finest_scale=56): | |||
| super(SingleRoIExtractor, self).__init__() | |||
| cfg = config | |||
| self.train_batch_size = batch_size | |||
| self.out_channels = out_channels | |||
| self.featmap_strides = featmap_strides | |||
| self.num_levels = len(self.featmap_strides) | |||
| self.out_size = roi_layer['out_size'] | |||
| self.sample_num = roi_layer['sample_num'] | |||
| self.roi_layers = self.build_roi_layers(self.featmap_strides) | |||
| self.roi_layers = L.CellList(self.roi_layers) | |||
| self.sqrt = P.Sqrt() | |||
| self.log = P.Log() | |||
| self.finest_scale_ = finest_scale | |||
| self.clamp = C.clip_by_value | |||
| self.cast = P.Cast() | |||
| self.equal = P.Equal() | |||
| self.select = P.Select() | |||
| _mode_16 = False | |||
| self.dtype = np.float16 if _mode_16 else np.float32 | |||
| self.ms_dtype = mstype.float16 if _mode_16 else mstype.float32 | |||
| self.set_train_local(cfg, training=True) | |||
| def set_train_local(self, config, training=True): | |||
| """Set training flag.""" | |||
| self.training_local = training | |||
| cfg = config | |||
| # Init tensor | |||
| self.batch_size = cfg.roi_sample_num if self.training_local else cfg.rpn_max_num | |||
| self.batch_size = self.train_batch_size*self.batch_size \ | |||
| if self.training_local else cfg.test_batch_size*self.batch_size | |||
| self.ones = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype)) | |||
| finest_scale = np.array(np.ones((self.batch_size, 1)), dtype=self.dtype) * self.finest_scale_ | |||
| self.finest_scale = Tensor(finest_scale) | |||
| self.epslion = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype)*self.dtype(1e-6)) | |||
| self.zeros = Tensor(np.array(np.zeros((self.batch_size, 1)), dtype=np.int32)) | |||
| self.max_levels = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=np.int32)*(self.num_levels-1)) | |||
| self.twos = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype) * 2) | |||
| self.res_ = Tensor(np.array(np.zeros((self.batch_size, self.out_channels, | |||
| self.out_size, self.out_size)), dtype=self.dtype)) | |||
| def num_inputs(self): | |||
| return len(self.featmap_strides) | |||
| def init_weights(self): | |||
| pass | |||
| def log2(self, value): | |||
| return self.log(value) / self.log(self.twos) | |||
| def build_roi_layers(self, featmap_strides): | |||
| roi_layers = [] | |||
| for s in featmap_strides: | |||
| layer_cls = ROIAlign(self.out_size, self.out_size, | |||
| spatial_scale=1 / s, | |||
| sample_num=self.sample_num) | |||
| roi_layers.append(layer_cls) | |||
| return roi_layers | |||
| def _c_map_roi_levels(self, rois): | |||
| """Map rois to corresponding feature levels by scales. | |||
| - scale < finest_scale * 2: level 0 | |||
| - finest_scale * 2 <= scale < finest_scale * 4: level 1 | |||
| - finest_scale * 4 <= scale < finest_scale * 8: level 2 | |||
| - scale >= finest_scale * 8: level 3 | |||
| Args: | |||
| rois (Tensor): Input RoIs, shape (k, 5). | |||
| num_levels (int): Total level number. | |||
| Returns: | |||
| Tensor: Level index (0-based) of each RoI, shape (k, ) | |||
| """ | |||
| scale = self.sqrt(rois[::, 3:4:1] - rois[::, 1:2:1] + self.ones) * \ | |||
| self.sqrt(rois[::, 4:5:1] - rois[::, 2:3:1] + self.ones) | |||
| target_lvls = self.log2(scale / self.finest_scale + self.epslion) | |||
| target_lvls = P.Floor()(target_lvls) | |||
| target_lvls = self.cast(target_lvls, mstype.int32) | |||
| target_lvls = self.clamp(target_lvls, self.zeros, self.max_levels) | |||
| return target_lvls | |||
| def construct(self, rois, feat1, feat2, feat3, feat4): | |||
| feats = (feat1, feat2, feat3, feat4) | |||
| res = self.res_ | |||
| target_lvls = self._c_map_roi_levels(rois) | |||
| for i in range(self.num_levels): | |||
| mask = self.equal(target_lvls, P.ScalarToArray()(i)) | |||
| mask = P.Reshape()(mask, (-1, 1, 1, 1)) | |||
| roi_feats_t = self.roi_layers[i](feats[i], rois) | |||
| mask = self.cast(P.Tile()(self.cast(mask, mstype.int32), (1, 256, 7, 7)), mstype.bool_) | |||
| res = self.select(mask, roi_feats_t, res) | |||
| return res | |||
| @@ -0,0 +1,311 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """RPN for fasterRCNN""" | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore.ops import operations as P | |||
| from mindspore import Tensor | |||
| from mindspore.ops import functional as F | |||
| from mindspore.common.initializer import initializer | |||
| from .bbox_assign_sample import BboxAssignSample | |||
| class RpnRegClsBlock(nn.Cell): | |||
| """ | |||
| Rpn reg cls block for rpn layer | |||
| Args: | |||
| in_channels (int) - Input channels of shared convolution. | |||
| feat_channels (int) - Output channels of shared convolution. | |||
| num_anchors (int) - The anchor number. | |||
| cls_out_channels (int) - Output channels of classification convolution. | |||
| weight_conv (Tensor) - weight init for rpn conv. | |||
| bias_conv (Tensor) - bias init for rpn conv. | |||
| weight_cls (Tensor) - weight init for rpn cls conv. | |||
| bias_cls (Tensor) - bias init for rpn cls conv. | |||
| weight_reg (Tensor) - weight init for rpn reg conv. | |||
| bias_reg (Tensor) - bias init for rpn reg conv. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| """ | |||
| def __init__(self, | |||
| in_channels, | |||
| feat_channels, | |||
| num_anchors, | |||
| cls_out_channels, | |||
| weight_conv, | |||
| bias_conv, | |||
| weight_cls, | |||
| bias_cls, | |||
| weight_reg, | |||
| bias_reg): | |||
| super(RpnRegClsBlock, self).__init__() | |||
| self.rpn_conv = nn.Conv2d(in_channels, feat_channels, kernel_size=3, stride=1, pad_mode='same', | |||
| has_bias=True, weight_init=weight_conv, bias_init=bias_conv) | |||
| self.relu = nn.ReLU() | |||
| self.rpn_cls = nn.Conv2d(feat_channels, num_anchors * cls_out_channels, kernel_size=1, pad_mode='valid', | |||
| has_bias=True, weight_init=weight_cls, bias_init=bias_cls) | |||
| self.rpn_reg = nn.Conv2d(feat_channels, num_anchors * 4, kernel_size=1, pad_mode='valid', | |||
| has_bias=True, weight_init=weight_reg, bias_init=bias_reg) | |||
| def construct(self, x): | |||
| x = self.relu(self.rpn_conv(x)) | |||
| x1 = self.rpn_cls(x) | |||
| x2 = self.rpn_reg(x) | |||
| return x1, x2 | |||
| class RPN(nn.Cell): | |||
| """ | |||
| ROI proposal network.. | |||
| Args: | |||
| config (dict) - Config. | |||
| batch_size (int) - Batchsize. | |||
| in_channels (int) - Input channels of shared convolution. | |||
| feat_channels (int) - Output channels of shared convolution. | |||
| num_anchors (int) - The anchor number. | |||
| cls_out_channels (int) - Output channels of classification convolution. | |||
| Returns: | |||
| Tuple, tuple of output tensor. | |||
| Examples: | |||
| RPN(config=config, batch_size=2, in_channels=256, feat_channels=1024, | |||
| num_anchors=3, cls_out_channels=512) | |||
| """ | |||
| def __init__(self, | |||
| config, | |||
| batch_size, | |||
| in_channels, | |||
| feat_channels, | |||
| num_anchors, | |||
| cls_out_channels): | |||
| super(RPN, self).__init__() | |||
| cfg_rpn = config | |||
| self.num_bboxes = cfg_rpn.num_bboxes | |||
| self.slice_index = () | |||
| self.feature_anchor_shape = () | |||
| self.slice_index += (0,) | |||
| index = 0 | |||
| for shape in cfg_rpn.feature_shapes: | |||
| self.slice_index += (self.slice_index[index] + shape[0] * shape[1] * num_anchors,) | |||
| self.feature_anchor_shape += (shape[0] * shape[1] * num_anchors * batch_size,) | |||
| index += 1 | |||
| self.num_anchors = num_anchors | |||
| self.batch_size = batch_size | |||
| self.test_batch_size = cfg_rpn.test_batch_size | |||
| self.num_layers = 5 | |||
| self.real_ratio = Tensor(np.ones((1, 1)).astype(np.float16)) | |||
| self.rpn_convs_list = nn.layer.CellList(self._make_rpn_layer(self.num_layers, in_channels, feat_channels, | |||
| num_anchors, cls_out_channels)) | |||
| self.transpose = P.Transpose() | |||
| self.reshape = P.Reshape() | |||
| self.concat = P.Concat(axis=0) | |||
| self.fill = P.Fill() | |||
| self.placeh1 = Tensor(np.ones((1,)).astype(np.float16)) | |||
| self.trans_shape = (0, 2, 3, 1) | |||
| self.reshape_shape_reg = (-1, 4) | |||
| self.reshape_shape_cls = (-1,) | |||
| self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(np.float16)) | |||
| self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(np.float16)) | |||
| self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(np.float16)) | |||
| self.num_bboxes = cfg_rpn.num_bboxes | |||
| self.get_targets = BboxAssignSample(cfg_rpn, self.batch_size, self.num_bboxes, False) | |||
| self.CheckValid = P.CheckValid() | |||
| self.sum_loss = P.ReduceSum() | |||
| self.loss_cls = P.SigmoidCrossEntropyWithLogits() | |||
| self.loss_bbox = P.SmoothL1Loss(sigma=1.0/9.0) | |||
| self.squeeze = P.Squeeze() | |||
| self.cast = P.Cast() | |||
| self.tile = P.Tile() | |||
| self.zeros_like = P.ZerosLike() | |||
| self.loss = Tensor(np.zeros((1,)).astype(np.float16)) | |||
| self.clsloss = Tensor(np.zeros((1,)).astype(np.float16)) | |||
| self.regloss = Tensor(np.zeros((1,)).astype(np.float16)) | |||
| def _make_rpn_layer(self, num_layers, in_channels, feat_channels, num_anchors, cls_out_channels): | |||
| """ | |||
| make rpn layer for rpn proposal network | |||
| Args: | |||
| num_layers (int) - layer num. | |||
| in_channels (int) - Input channels of shared convolution. | |||
| feat_channels (int) - Output channels of shared convolution. | |||
| num_anchors (int) - The anchor number. | |||
| cls_out_channels (int) - Output channels of classification convolution. | |||
| Returns: | |||
| List, list of RpnRegClsBlock cells. | |||
| """ | |||
| rpn_layer = [] | |||
| shp_weight_conv = (feat_channels, in_channels, 3, 3) | |||
| shp_bias_conv = (feat_channels,) | |||
| weight_conv = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float16).to_tensor() | |||
| bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float16).to_tensor() | |||
| shp_weight_cls = (num_anchors * cls_out_channels, feat_channels, 1, 1) | |||
| shp_bias_cls = (num_anchors * cls_out_channels,) | |||
| weight_cls = initializer('Normal', shape=shp_weight_cls, dtype=mstype.float16).to_tensor() | |||
| bias_cls = initializer(0, shape=shp_bias_cls, dtype=mstype.float16).to_tensor() | |||
| shp_weight_reg = (num_anchors * 4, feat_channels, 1, 1) | |||
| shp_bias_reg = (num_anchors * 4,) | |||
| weight_reg = initializer('Normal', shape=shp_weight_reg, dtype=mstype.float16).to_tensor() | |||
| bias_reg = initializer(0, shape=shp_bias_reg, dtype=mstype.float16).to_tensor() | |||
| for i in range(num_layers): | |||
| rpn_layer.append(RpnRegClsBlock(in_channels, feat_channels, num_anchors, cls_out_channels, \ | |||
| weight_conv, bias_conv, weight_cls, \ | |||
| bias_cls, weight_reg, bias_reg)) | |||
| for i in range(1, num_layers): | |||
| rpn_layer[i].rpn_conv.weight = rpn_layer[0].rpn_conv.weight | |||
| rpn_layer[i].rpn_cls.weight = rpn_layer[0].rpn_cls.weight | |||
| rpn_layer[i].rpn_reg.weight = rpn_layer[0].rpn_reg.weight | |||
| rpn_layer[i].rpn_conv.bias = rpn_layer[0].rpn_conv.bias | |||
| rpn_layer[i].rpn_cls.bias = rpn_layer[0].rpn_cls.bias | |||
| rpn_layer[i].rpn_reg.bias = rpn_layer[0].rpn_reg.bias | |||
| return rpn_layer | |||
| def construct(self, inputs, img_metas, anchor_list, gt_bboxes, gt_labels, gt_valids): | |||
| loss_print = () | |||
| rpn_cls_score = () | |||
| rpn_bbox_pred = () | |||
| rpn_cls_score_total = () | |||
| rpn_bbox_pred_total = () | |||
| for i in range(self.num_layers): | |||
| x1, x2 = self.rpn_convs_list[i](inputs[i]) | |||
| rpn_cls_score_total = rpn_cls_score_total + (x1,) | |||
| rpn_bbox_pred_total = rpn_bbox_pred_total + (x2,) | |||
| x1 = self.transpose(x1, self.trans_shape) | |||
| x1 = self.reshape(x1, self.reshape_shape_cls) | |||
| x2 = self.transpose(x2, self.trans_shape) | |||
| x2 = self.reshape(x2, self.reshape_shape_reg) | |||
| rpn_cls_score = rpn_cls_score + (x1,) | |||
| rpn_bbox_pred = rpn_bbox_pred + (x2,) | |||
| loss = self.loss | |||
| clsloss = self.clsloss | |||
| regloss = self.regloss | |||
| bbox_targets = () | |||
| bbox_weights = () | |||
| labels = () | |||
| label_weights = () | |||
| output = () | |||
| if self.training: | |||
| for i in range(self.batch_size): | |||
| multi_level_flags = () | |||
| anchor_list_tuple = () | |||
| for j in range(self.num_layers): | |||
| res = self.cast(self.CheckValid(anchor_list[j], self.squeeze(img_metas[i:i + 1:1, ::])), | |||
| mstype.int32) | |||
| multi_level_flags = multi_level_flags + (res,) | |||
| anchor_list_tuple = anchor_list_tuple + (anchor_list[j],) | |||
| valid_flag_list = self.concat(multi_level_flags) | |||
| anchor_using_list = self.concat(anchor_list_tuple) | |||
| gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::]) | |||
| gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::]) | |||
| gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::]) | |||
| bbox_target, bbox_weight, label, label_weight = self.get_targets(gt_bboxes_i, | |||
| gt_labels_i, | |||
| self.cast(valid_flag_list, | |||
| mstype.bool_), | |||
| anchor_using_list, gt_valids_i) | |||
| bbox_weight = self.cast(bbox_weight, mstype.float16) | |||
| label = self.cast(label, mstype.float16) | |||
| label_weight = self.cast(label_weight, mstype.float16) | |||
| for j in range(self.num_layers): | |||
| begin = self.slice_index[j] | |||
| end = self.slice_index[j + 1] | |||
| stride = 1 | |||
| bbox_targets += (bbox_target[begin:end:stride, ::],) | |||
| bbox_weights += (bbox_weight[begin:end:stride],) | |||
| labels += (label[begin:end:stride],) | |||
| label_weights += (label_weight[begin:end:stride],) | |||
| for i in range(self.num_layers): | |||
| bbox_target_using = () | |||
| bbox_weight_using = () | |||
| label_using = () | |||
| label_weight_using = () | |||
| for j in range(self.batch_size): | |||
| bbox_target_using += (bbox_targets[i + (self.num_layers * j)],) | |||
| bbox_weight_using += (bbox_weights[i + (self.num_layers * j)],) | |||
| label_using += (labels[i + (self.num_layers * j)],) | |||
| label_weight_using += (label_weights[i + (self.num_layers * j)],) | |||
| bbox_target_with_batchsize = self.concat(bbox_target_using) | |||
| bbox_weight_with_batchsize = self.concat(bbox_weight_using) | |||
| label_with_batchsize = self.concat(label_using) | |||
| label_weight_with_batchsize = self.concat(label_weight_using) | |||
| # stop | |||
| bbox_target_ = F.stop_gradient(bbox_target_with_batchsize) | |||
| bbox_weight_ = F.stop_gradient(bbox_weight_with_batchsize) | |||
| label_ = F.stop_gradient(label_with_batchsize) | |||
| label_weight_ = F.stop_gradient(label_weight_with_batchsize) | |||
| cls_score_i = rpn_cls_score[i] | |||
| reg_score_i = rpn_bbox_pred[i] | |||
| loss_cls = self.loss_cls(cls_score_i, label_) | |||
| loss_cls_item = loss_cls * label_weight_ | |||
| loss_cls_item = self.sum_loss(loss_cls_item, (0,)) / self.num_expected_total | |||
| loss_reg = self.loss_bbox(reg_score_i, bbox_target_) | |||
| bbox_weight_ = self.tile(self.reshape(bbox_weight_, (self.feature_anchor_shape[i], 1)), (1, 4)) | |||
| loss_reg = loss_reg * bbox_weight_ | |||
| loss_reg_item = self.sum_loss(loss_reg, (1,)) | |||
| loss_reg_item = self.sum_loss(loss_reg_item, (0,)) / self.num_expected_total | |||
| loss_total = self.rpn_loss_cls_weight * loss_cls_item + self.rpn_loss_reg_weight * loss_reg_item | |||
| loss += loss_total | |||
| loss_print += (loss_total, loss_cls_item, loss_reg_item) | |||
| clsloss += loss_cls_item | |||
| regloss += loss_reg_item | |||
| output = (loss, rpn_cls_score_total, rpn_bbox_pred_total, clsloss, regloss, loss_print) | |||
| else: | |||
| output = (self.placeh1, rpn_cls_score_total, rpn_bbox_pred_total, self.placeh1, self.placeh1, self.placeh1) | |||
| return output | |||
| @@ -0,0 +1,158 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| #" :=========================================================================== | |||
| """ | |||
| network config setting, will be used in train.py and eval.py | |||
| """ | |||
| from easydict import EasyDict as ed | |||
| config = ed({ | |||
| "img_width": 1280, | |||
| "img_height": 768, | |||
| "keep_ratio": False, | |||
| "flip_ratio": 0.5, | |||
| "photo_ratio": 0.5, | |||
| "expand_ratio": 1.0, | |||
| # anchor | |||
| "feature_shapes": [(192, 320), (96, 160), (48, 80), (24, 40), (12, 20)], | |||
| "anchor_scales": [8], | |||
| "anchor_ratios": [0.5, 1.0, 2.0], | |||
| "anchor_strides": [4, 8, 16, 32, 64], | |||
| "num_anchors": 3, | |||
| # resnet | |||
| "resnet_block": [3, 4, 6, 3], | |||
| "resnet_in_channels": [64, 256, 512, 1024], | |||
| "resnet_out_channels": [256, 512, 1024, 2048], | |||
| # fpn | |||
| "fpn_in_channels": [256, 512, 1024, 2048], | |||
| "fpn_out_channels": 256, | |||
| "fpn_num_outs": 5, | |||
| # rpn | |||
| "rpn_in_channels": 256, | |||
| "rpn_feat_channels": 256, | |||
| "rpn_loss_cls_weight": 1.0, | |||
| "rpn_loss_reg_weight": 1.0, | |||
| "rpn_cls_out_channels": 1, | |||
| "rpn_target_means": [0., 0., 0., 0.], | |||
| "rpn_target_stds": [1.0, 1.0, 1.0, 1.0], | |||
| # bbox_assign_sampler | |||
| "neg_iou_thr": 0.3, | |||
| "pos_iou_thr": 0.7, | |||
| "min_pos_iou": 0.3, | |||
| "num_bboxes": 245520, | |||
| "num_gts": 128, | |||
| "num_expected_neg": 256, | |||
| "num_expected_pos": 128, | |||
| # proposal | |||
| "activate_num_classes": 2, | |||
| "use_sigmoid_cls": True, | |||
| # roi_align | |||
| "roi_layer": dict(type='RoIAlign', out_size=7, sample_num=2), | |||
| "roi_align_out_channels": 256, | |||
| "roi_align_featmap_strides": [4, 8, 16, 32], | |||
| "roi_align_finest_scale": 56, | |||
| "roi_sample_num": 640, | |||
| # bbox_assign_sampler_stage2 | |||
| "neg_iou_thr_stage2": 0.5, | |||
| "pos_iou_thr_stage2": 0.5, | |||
| "min_pos_iou_stage2": 0.5, | |||
| "num_bboxes_stage2": 2000, | |||
| "num_expected_pos_stage2": 128, | |||
| "num_expected_neg_stage2": 512, | |||
| "num_expected_total_stage2": 512, | |||
| # rcnn | |||
| "rcnn_num_layers": 2, | |||
| "rcnn_in_channels": 256, | |||
| "rcnn_fc_out_channels": 1024, | |||
| "rcnn_loss_cls_weight": 1, | |||
| "rcnn_loss_reg_weight": 1, | |||
| "rcnn_target_means": [0., 0., 0., 0.], | |||
| "rcnn_target_stds": [0.1, 0.1, 0.2, 0.2], | |||
| # train proposal | |||
| "rpn_proposal_nms_across_levels": False, | |||
| "rpn_proposal_nms_pre": 2000, | |||
| "rpn_proposal_nms_post": 2000, | |||
| "rpn_proposal_max_num": 2000, | |||
| "rpn_proposal_nms_thr": 0.7, | |||
| "rpn_proposal_min_bbox_size": 0, | |||
| # test proposal | |||
| "rpn_nms_across_levels": False, | |||
| "rpn_nms_pre": 1000, | |||
| "rpn_nms_post": 1000, | |||
| "rpn_max_num": 1000, | |||
| "rpn_nms_thr": 0.7, | |||
| "rpn_min_bbox_min_size": 0, | |||
| "test_score_thr": 0.05, | |||
| "test_iou_thr": 0.5, | |||
| "test_max_per_img": 100, | |||
| "test_batch_size": 2, | |||
| "rpn_head_loss_type": "CrossEntropyLoss", | |||
| "rpn_head_use_sigmoid": True, | |||
| "rpn_head_weight": 1.0, | |||
| # LR | |||
| "base_lr": 0.02, | |||
| "base_step": 58633, | |||
| "total_epoch": 13, | |||
| "warmup_step": 500, | |||
| "warmup_mode": "linear", | |||
| "warmup_ratio": 1/3.0, | |||
| "sgd_step": [8, 11], | |||
| "sgd_momentum": 0.9, | |||
| # train | |||
| "batch_size": 2, | |||
| "loss_scale": 1, | |||
| "momentum": 0.91, | |||
| "weight_decay": 1e-4, | |||
| "epoch_size": 12, | |||
| "save_checkpoint": True, | |||
| "save_checkpoint_epochs": 1, | |||
| "keep_checkpoint_max": 10, | |||
| "save_checkpoint_path": "./checkpoint", | |||
| "mindrecord_dir": "../MindRecord_COCO_TRAIN", | |||
| "coco_root": "./cocodataset/", | |||
| "train_data_type": "train2017", | |||
| "val_data_type": "val2017", | |||
| "instance_set": "annotations/instances_{}.json", | |||
| "coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', | |||
| 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', | |||
| 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', | |||
| 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', | |||
| 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', | |||
| 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', | |||
| 'kite', 'baseball bat', 'baseball glove', 'skateboard', | |||
| 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', | |||
| 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', | |||
| 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', | |||
| 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', | |||
| 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', | |||
| 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', | |||
| 'refrigerator', 'book', 'clock', 'vase', 'scissors', | |||
| 'teddy bear', 'hair drier', 'toothbrush'), | |||
| "num_classes": 81 | |||
| }) | |||
| @@ -0,0 +1,441 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn dataset""" | |||
| from __future__ import division | |||
| import os | |||
| import numpy as np | |||
| from numpy import random | |||
| import mmcv | |||
| import mindspore.dataset as de | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| from mindspore.mindrecord import FileWriter | |||
| from src.config import config | |||
| def bbox_overlaps(bboxes1, bboxes2, mode='iou'): | |||
| """Calculate the ious between each bbox of bboxes1 and bboxes2. | |||
| Args: | |||
| bboxes1(ndarray): shape (n, 4) | |||
| bboxes2(ndarray): shape (k, 4) | |||
| mode(str): iou (intersection over union) or iof (intersection | |||
| over foreground) | |||
| Returns: | |||
| ious(ndarray): shape (n, k) | |||
| """ | |||
| assert mode in ['iou', 'iof'] | |||
| bboxes1 = bboxes1.astype(np.float32) | |||
| bboxes2 = bboxes2.astype(np.float32) | |||
| rows = bboxes1.shape[0] | |||
| cols = bboxes2.shape[0] | |||
| ious = np.zeros((rows, cols), dtype=np.float32) | |||
| if rows * cols == 0: | |||
| return ious | |||
| exchange = False | |||
| if bboxes1.shape[0] > bboxes2.shape[0]: | |||
| bboxes1, bboxes2 = bboxes2, bboxes1 | |||
| ious = np.zeros((cols, rows), dtype=np.float32) | |||
| exchange = True | |||
| area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1) | |||
| area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1) | |||
| for i in range(bboxes1.shape[0]): | |||
| x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) | |||
| y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) | |||
| x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) | |||
| y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) | |||
| overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( | |||
| y_end - y_start + 1, 0) | |||
| if mode == 'iou': | |||
| union = area1[i] + area2 - overlap | |||
| else: | |||
| union = area1[i] if not exchange else area2 | |||
| ious[i, :] = overlap / union | |||
| if exchange: | |||
| ious = ious.T | |||
| return ious | |||
| class PhotoMetricDistortion: | |||
| """Photo Metric Distortion""" | |||
| def __init__(self, | |||
| brightness_delta=32, | |||
| contrast_range=(0.5, 1.5), | |||
| saturation_range=(0.5, 1.5), | |||
| hue_delta=18): | |||
| self.brightness_delta = brightness_delta | |||
| self.contrast_lower, self.contrast_upper = contrast_range | |||
| self.saturation_lower, self.saturation_upper = saturation_range | |||
| self.hue_delta = hue_delta | |||
| def __call__(self, img, boxes, labels): | |||
| # random brightness | |||
| img = img.astype('float32') | |||
| if random.randint(2): | |||
| delta = random.uniform(-self.brightness_delta, | |||
| self.brightness_delta) | |||
| img += delta | |||
| # mode == 0 --> do random contrast first | |||
| # mode == 1 --> do random contrast last | |||
| mode = random.randint(2) | |||
| if mode == 1: | |||
| if random.randint(2): | |||
| alpha = random.uniform(self.contrast_lower, | |||
| self.contrast_upper) | |||
| img *= alpha | |||
| # convert color from BGR to HSV | |||
| img = mmcv.bgr2hsv(img) | |||
| # random saturation | |||
| if random.randint(2): | |||
| img[..., 1] *= random.uniform(self.saturation_lower, | |||
| self.saturation_upper) | |||
| # random hue | |||
| if random.randint(2): | |||
| img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta) | |||
| img[..., 0][img[..., 0] > 360] -= 360 | |||
| img[..., 0][img[..., 0] < 0] += 360 | |||
| # convert color from HSV to BGR | |||
| img = mmcv.hsv2bgr(img) | |||
| # random contrast | |||
| if mode == 0: | |||
| if random.randint(2): | |||
| alpha = random.uniform(self.contrast_lower, | |||
| self.contrast_upper) | |||
| img *= alpha | |||
| # randomly swap channels | |||
| if random.randint(2): | |||
| img = img[..., random.permutation(3)] | |||
| return img, boxes, labels | |||
| class Expand: | |||
| """expand image""" | |||
| def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): | |||
| if to_rgb: | |||
| self.mean = mean[::-1] | |||
| else: | |||
| self.mean = mean | |||
| self.min_ratio, self.max_ratio = ratio_range | |||
| def __call__(self, img, boxes, labels): | |||
| if random.randint(2): | |||
| return img, boxes, labels | |||
| h, w, c = img.shape | |||
| ratio = random.uniform(self.min_ratio, self.max_ratio) | |||
| expand_img = np.full((int(h * ratio), int(w * ratio), c), | |||
| self.mean).astype(img.dtype) | |||
| left = int(random.uniform(0, w * ratio - w)) | |||
| top = int(random.uniform(0, h * ratio - h)) | |||
| expand_img[top:top + h, left:left + w] = img | |||
| img = expand_img | |||
| boxes += np.tile((left, top), 2) | |||
| return img, boxes, labels | |||
| def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """rescale operation for image""" | |||
| img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True) | |||
| if img_data.shape[0] > config.img_height: | |||
| img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True) | |||
| scale_factor = scale_factor*scale_factor2 | |||
| img_shape = np.append(img_shape, scale_factor) | |||
| img_shape = np.asarray(img_shape, dtype=np.float32) | |||
| gt_bboxes = gt_bboxes * scale_factor | |||
| gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) | |||
| gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """resize operation for image""" | |||
| img_data = img | |||
| img_data, w_scale, h_scale = mmcv.imresize( | |||
| img_data, (config.img_width, config.img_height), return_scale=True) | |||
| scale_factor = np.array( | |||
| [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) | |||
| img_shape = (config.img_height, config.img_width, 1.0) | |||
| img_shape = np.asarray(img_shape, dtype=np.float32) | |||
| gt_bboxes = gt_bboxes * scale_factor | |||
| gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) | |||
| gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """resize operation for image of eval""" | |||
| img_data = img | |||
| img_data, w_scale, h_scale = mmcv.imresize( | |||
| img_data, (config.img_width, config.img_height), return_scale=True) | |||
| scale_factor = np.array( | |||
| [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) | |||
| img_shape = np.append(img_shape, (h_scale, w_scale)) | |||
| img_shape = np.asarray(img_shape, dtype=np.float32) | |||
| gt_bboxes = gt_bboxes * scale_factor | |||
| gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) | |||
| gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """impad operation for image""" | |||
| img_data = mmcv.impad(img, (config.img_height, config.img_width)) | |||
| img_data = img_data.astype(np.float32) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """imnormalize operation for image""" | |||
| img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True) | |||
| img_data = img_data.astype(np.float32) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """flip operation for image""" | |||
| img_data = img | |||
| img_data = mmcv.imflip(img_data) | |||
| flipped = gt_bboxes.copy() | |||
| _, w, _ = img_data.shape | |||
| flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 | |||
| flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 | |||
| return (img_data, img_shape, flipped, gt_label, gt_num) | |||
| def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """transpose operation for image""" | |||
| img_data = img.transpose(2, 0, 1).copy() | |||
| img_data = img_data.astype(np.float16) | |||
| img_shape = img_shape.astype(np.float16) | |||
| gt_bboxes = gt_bboxes.astype(np.float16) | |||
| gt_label = gt_label.astype(np.int32) | |||
| gt_num = gt_num.astype(np.bool) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """photo crop operation for image""" | |||
| random_photo = PhotoMetricDistortion() | |||
| img_data, gt_bboxes, gt_label = random_photo(img, gt_bboxes, gt_label) | |||
| return (img_data, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): | |||
| """expand operation for image""" | |||
| expand = Expand() | |||
| img, gt_bboxes, gt_label = expand(img, gt_bboxes, gt_label) | |||
| return (img, img_shape, gt_bboxes, gt_label, gt_num) | |||
| def preprocess_fn(image, box, is_training): | |||
| """Preprocess function for dataset.""" | |||
| def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert): | |||
| image_shape = image_shape[:2] | |||
| input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert | |||
| if config.keep_ratio: | |||
| input_data = rescale_column(*input_data) | |||
| else: | |||
| input_data = resize_column_test(*input_data) | |||
| input_data = imnormalize_column(*input_data) | |||
| output_data = transpose_column(*input_data) | |||
| return output_data | |||
| def _data_aug(image, box, is_training): | |||
| """Data augmentation function.""" | |||
| image_bgr = image.copy() | |||
| image_bgr[:, :, 0] = image[:, :, 2] | |||
| image_bgr[:, :, 1] = image[:, :, 1] | |||
| image_bgr[:, :, 2] = image[:, :, 0] | |||
| image_shape = image_bgr.shape[:2] | |||
| gt_box = box[:, :4] | |||
| gt_label = box[:, 4] | |||
| gt_iscrowd = box[:, 5] | |||
| pad_max_number = 128 | |||
| gt_box_new = np.pad(gt_box, ((0, pad_max_number - box.shape[0]), (0, 0)), mode="constant", constant_values=0) | |||
| gt_label_new = np.pad(gt_label, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=-1) | |||
| gt_iscrowd_new = np.pad(gt_iscrowd, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=1) | |||
| gt_iscrowd_new_revert = (~(gt_iscrowd_new.astype(np.bool))).astype(np.int32) | |||
| if not is_training: | |||
| return _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert) | |||
| flip = (np.random.rand() < config.flip_ratio) | |||
| photo = (np.random.rand() < config.photo_ratio) | |||
| expand = (np.random.rand() < config.expand_ratio) | |||
| input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert | |||
| if expand: | |||
| input_data = expand_column(*input_data) | |||
| if config.keep_ratio: | |||
| input_data = rescale_column(*input_data) | |||
| else: | |||
| input_data = resize_column(*input_data) | |||
| if photo: | |||
| input_data = photo_crop_column(*input_data) | |||
| input_data = imnormalize_column(*input_data) | |||
| if flip: | |||
| input_data = flip_column(*input_data) | |||
| output_data = transpose_column(*input_data) | |||
| return output_data | |||
| return _data_aug(image, box, is_training) | |||
| def create_coco_label(is_training): | |||
| """Get image path and annotation from COCO.""" | |||
| from pycocotools.coco import COCO | |||
| coco_root = config.coco_root | |||
| data_type = config.val_data_type | |||
| if is_training: | |||
| data_type = config.train_data_type | |||
| #Classes need to train or test. | |||
| train_cls = config.coco_classes | |||
| train_cls_dict = {} | |||
| for i, cls in enumerate(train_cls): | |||
| train_cls_dict[cls] = i | |||
| anno_json = os.path.join(coco_root, config.instance_set.format(data_type)) | |||
| coco = COCO(anno_json) | |||
| classs_dict = {} | |||
| cat_ids = coco.loadCats(coco.getCatIds()) | |||
| for cat in cat_ids: | |||
| classs_dict[cat["id"]] = cat["name"] | |||
| image_ids = coco.getImgIds() | |||
| image_files = [] | |||
| image_anno_dict = {} | |||
| for img_id in image_ids: | |||
| image_info = coco.loadImgs(img_id) | |||
| file_name = image_info[0]["file_name"] | |||
| anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None) | |||
| anno = coco.loadAnns(anno_ids) | |||
| image_path = os.path.join(coco_root, data_type, file_name) | |||
| annos = [] | |||
| for label in anno: | |||
| bbox = label["bbox"] | |||
| class_name = classs_dict[label["category_id"]] | |||
| if class_name in train_cls: | |||
| x1, x2 = bbox[0], bbox[0] + bbox[2] | |||
| y1, y2 = bbox[1], bbox[1] + bbox[3] | |||
| annos.append([x1, y1, x2, y2] + [train_cls_dict[class_name]] + [int(label["iscrowd"])]) | |||
| image_files.append(image_path) | |||
| if annos: | |||
| image_anno_dict[image_path] = np.array(annos) | |||
| else: | |||
| image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1]) | |||
| return image_files, image_anno_dict | |||
| def anno_parser(annos_str): | |||
| """Parse annotation from string to list.""" | |||
| annos = [] | |||
| for anno_str in annos_str: | |||
| anno = list(map(int, anno_str.strip().split(','))) | |||
| annos.append(anno) | |||
| return annos | |||
| def filter_valid_data(image_dir, anno_path): | |||
| """Filter valid image file, which both in image_dir and anno_path.""" | |||
| image_files = [] | |||
| image_anno_dict = {} | |||
| if not os.path.isdir(image_dir): | |||
| raise RuntimeError("Path given is not valid.") | |||
| if not os.path.isfile(anno_path): | |||
| raise RuntimeError("Annotation file is not valid.") | |||
| with open(anno_path, "rb") as f: | |||
| lines = f.readlines() | |||
| for line in lines: | |||
| line_str = line.decode("utf-8").strip() | |||
| line_split = str(line_str).split(' ') | |||
| file_name = line_split[0] | |||
| image_path = os.path.join(image_dir, file_name) | |||
| if os.path.isfile(image_path): | |||
| image_anno_dict[image_path] = anno_parser(line_split[1:]) | |||
| image_files.append(image_path) | |||
| return image_files, image_anno_dict | |||
| def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8): | |||
| """Create MindRecord file.""" | |||
| mindrecord_dir = config.mindrecord_dir | |||
| mindrecord_path = os.path.join(mindrecord_dir, prefix) | |||
| writer = FileWriter(mindrecord_path, file_num) | |||
| if dataset == "coco": | |||
| image_files, image_anno_dict = create_coco_label(is_training) | |||
| else: | |||
| image_files, image_anno_dict = filter_valid_data(config.IMAGE_DIR, config.ANNO_PATH) | |||
| fasterrcnn_json = { | |||
| "image": {"type": "bytes"}, | |||
| "annotation": {"type": "int32", "shape": [-1, 6]}, | |||
| } | |||
| writer.add_schema(fasterrcnn_json, "fasterrcnn_json") | |||
| for image_name in image_files: | |||
| with open(image_name, 'rb') as f: | |||
| img = f.read() | |||
| annos = np.array(image_anno_dict[image_name], dtype=np.int32) | |||
| row = {"image": img, "annotation": annos} | |||
| writer.write_raw_data([row]) | |||
| writer.commit() | |||
| def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, device_num=1, rank_id=0, | |||
| is_training=True, num_parallel_workers=8): | |||
| """Creatr FasterRcnn dataset with MindDataset.""" | |||
| ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id, | |||
| num_parallel_workers=num_parallel_workers, shuffle=is_training) | |||
| decode = C.Decode() | |||
| ds = ds.map(input_columns=["image"], operations=decode) | |||
| compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) | |||
| if is_training: | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| output_columns=["image", "image_shape", "box", "label", "valid_num"], | |||
| columns_order=["image", "image_shape", "box", "label", "valid_num"], | |||
| operations=compose_map_func, python_multiprocessing=True, num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_num) | |||
| else: | |||
| ds = ds.map(input_columns=["image", "annotation"], | |||
| output_columns=["image", "image_shape", "box", "label", "valid_num"], | |||
| columns_order=["image", "image_shape", "box", "label", "valid_num"], | |||
| operations=compose_map_func, | |||
| num_parallel_workers=num_parallel_workers) | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| @@ -0,0 +1,42 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """lr generator for fasterrcnn""" | |||
| import math | |||
| def linear_warmup_learning_rate(current_step, warmup_steps, base_lr, init_lr): | |||
| lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) | |||
| learning_rate = float(init_lr) + lr_inc * current_step | |||
| return learning_rate | |||
| def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps): | |||
| base = float(current_step - warmup_steps) / float(decay_steps) | |||
| learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr | |||
| return learning_rate | |||
| def dynamic_lr(config, rank_size=1): | |||
| """dynamic learning rate generator""" | |||
| base_lr = config.base_lr | |||
| base_step = (config.base_step // rank_size) + rank_size | |||
| total_steps = int(base_step * config.total_epoch) | |||
| warmup_steps = int(config.warmup_step) | |||
| lr = [] | |||
| for i in range(total_steps): | |||
| if i < warmup_steps: | |||
| lr.append(linear_warmup_learning_rate(i, warmup_steps, base_lr, base_lr * config.warmup_ratio)) | |||
| else: | |||
| lr.append(a_cosine_learning_rate(i, base_lr, warmup_steps, total_steps)) | |||
| return lr | |||
| @@ -0,0 +1,182 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """FasterRcnn training network wrapper.""" | |||
| import time | |||
| import numpy as np | |||
| import mindspore.nn as nn | |||
| from mindspore.common.tensor import Tensor | |||
| from mindspore.ops import functional as F | |||
| from mindspore.ops import composite as C | |||
| from mindspore import ParameterTuple | |||
| from mindspore.train.callback import Callback | |||
| from mindspore.nn.wrap.grad_reducer import DistributedGradReducer | |||
| time_stamp_init = False | |||
| time_stamp_first = 0 | |||
| class LossCallBack(Callback): | |||
| """ | |||
| Monitor the loss in training. | |||
| If the loss is NAN or INF terminating training. | |||
| Note: | |||
| If per_print_times is 0 do not print loss. | |||
| Args: | |||
| per_print_times (int): Print loss every times. Default: 1. | |||
| """ | |||
| def __init__(self, per_print_times=1): | |||
| super(LossCallBack, self).__init__() | |||
| if not isinstance(per_print_times, int) or per_print_times < 0: | |||
| raise ValueError("print_step must be int and >= 0.") | |||
| self._per_print_times = per_print_times | |||
| self.count = 0 | |||
| self.rpn_loss_sum = 0 | |||
| self.rcnn_loss_sum = 0 | |||
| self.rpn_cls_loss_sum = 0 | |||
| self.rpn_reg_loss_sum = 0 | |||
| self.rcnn_cls_loss_sum = 0 | |||
| self.rcnn_reg_loss_sum = 0 | |||
| global time_stamp_init, time_stamp_first | |||
| if not time_stamp_init: | |||
| time_stamp_first = time.time() | |||
| time_stamp_init = True | |||
| def step_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| rpn_loss = cb_params.net_outputs[0].asnumpy() | |||
| rcnn_loss = cb_params.net_outputs[1].asnumpy() | |||
| rpn_cls_loss = cb_params.net_outputs[2].asnumpy() | |||
| rpn_reg_loss = cb_params.net_outputs[3].asnumpy() | |||
| rcnn_cls_loss = cb_params.net_outputs[4].asnumpy() | |||
| rcnn_reg_loss = cb_params.net_outputs[5].asnumpy() | |||
| self.count += 1 | |||
| self.rpn_loss_sum += float(rpn_loss) | |||
| self.rcnn_loss_sum += float(rcnn_loss) | |||
| self.rpn_cls_loss_sum += float(rpn_cls_loss) | |||
| self.rpn_reg_loss_sum += float(rpn_reg_loss) | |||
| self.rcnn_cls_loss_sum += float(rcnn_cls_loss) | |||
| self.rcnn_reg_loss_sum += float(rcnn_reg_loss) | |||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 | |||
| if self.count >= 1: | |||
| global time_stamp_first | |||
| time_stamp_current = time.time() | |||
| rpn_loss = self.rpn_loss_sum/self.count | |||
| rcnn_loss = self.rcnn_loss_sum/self.count | |||
| rpn_cls_loss = self.rpn_cls_loss_sum/self.count | |||
| rpn_reg_loss = self.rpn_reg_loss_sum/self.count | |||
| rcnn_cls_loss = self.rcnn_cls_loss_sum/self.count | |||
| rcnn_reg_loss = self.rcnn_reg_loss_sum/self.count | |||
| total_loss = rpn_loss + rcnn_loss | |||
| loss_file = open("./loss.log", "a+") | |||
| loss_file.write("%lu epoch: %s step: %s ,rpn_loss: %.5f, rcnn_loss: %.5f, rpn_cls_loss: %.5f, " | |||
| "rpn_reg_loss: %.5f, rcnn_cls_loss: %.5f, rcnn_reg_loss: %.5f, total_loss: %.5f" % | |||
| (time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch, | |||
| rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss, | |||
| rcnn_cls_loss, rcnn_reg_loss, total_loss)) | |||
| loss_file.write("\n") | |||
| loss_file.close() | |||
| self.count = 0 | |||
| self.rpn_loss_sum = 0 | |||
| self.rcnn_loss_sum = 0 | |||
| self.rpn_cls_loss_sum = 0 | |||
| self.rpn_reg_loss_sum = 0 | |||
| self.rcnn_cls_loss_sum = 0 | |||
| self.rcnn_reg_loss_sum = 0 | |||
| class LossNet(nn.Cell): | |||
| """FasterRcnn loss method""" | |||
| def __init__(self): | |||
| super(LossNet, self).__init__() | |||
| def construct(self, x1, x2, x3, x4, x5, x6): | |||
| return x1 + x2 | |||
| class WithLossCell(nn.Cell): | |||
| """ | |||
| Wrap the network with loss function to compute loss. | |||
| Args: | |||
| backbone (Cell): The target network to wrap. | |||
| loss_fn (Cell): The loss function used to compute loss. | |||
| """ | |||
| def __init__(self, backbone, loss_fn): | |||
| super(WithLossCell, self).__init__(auto_prefix=False) | |||
| self._backbone = backbone | |||
| self._loss_fn = loss_fn | |||
| def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num): | |||
| loss1, loss2, loss3, loss4, loss5, loss6 = self._backbone(x, img_shape, gt_bboxe, gt_label, gt_num) | |||
| return self._loss_fn(loss1, loss2, loss3, loss4, loss5, loss6) | |||
| @property | |||
| def backbone_network(self): | |||
| """ | |||
| Get the backbone network. | |||
| Returns: | |||
| Cell, return backbone network. | |||
| """ | |||
| return self._backbone | |||
| class TrainOneStepCell(nn.Cell): | |||
| """ | |||
| Network training package class. | |||
| Append an optimizer to the training network after that the construct function | |||
| can be called to create the backward graph. | |||
| Args: | |||
| network (Cell): The training network. | |||
| network_backbone (Cell): The forward network. | |||
| optimizer (Cell): Optimizer for updating the weights. | |||
| sens (Number): The adjust parameter. Default value is 1.0. | |||
| reduce_flag (bool): The reduce flag. Default value is False. | |||
| mean (bool): Allreduce method. Default value is False. | |||
| degree (int): Device number. Default value is None. | |||
| """ | |||
| def __init__(self, network, network_backbone, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None): | |||
| super(TrainOneStepCell, self).__init__(auto_prefix=False) | |||
| self.network = network | |||
| self.backbone = network_backbone | |||
| self.weights = ParameterTuple(network.trainable_params()) | |||
| self.optimizer = optimizer | |||
| self.grad = C.GradOperation('grad', | |||
| get_by_list=True, | |||
| sens_param=True) | |||
| self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16)) | |||
| self.reduce_flag = reduce_flag | |||
| if reduce_flag: | |||
| self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) | |||
| def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num): | |||
| weights = self.weights | |||
| loss1, loss2, loss3, loss4, loss5, loss6 = self.backbone(x, img_shape, gt_bboxe, gt_label, gt_num) | |||
| grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) | |||
| if self.reduce_flag: | |||
| grads = self.grad_reducer(grads) | |||
| return F.depend(loss1, self.optimizer(grads)), loss2, loss3, loss4, loss5, loss6 | |||
| @@ -0,0 +1,225 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """coco eval for fasterrcnn""" | |||
| import json | |||
| import numpy as np | |||
| from pycocotools.coco import COCO | |||
| from pycocotools.cocoeval import COCOeval | |||
| import mmcv | |||
| _init_value = np.array(0.0) | |||
| summary_init = { | |||
| 'Precision/mAP': _init_value, | |||
| 'Precision/mAP@.50IOU': _init_value, | |||
| 'Precision/mAP@.75IOU': _init_value, | |||
| 'Precision/mAP (small)': _init_value, | |||
| 'Precision/mAP (medium)': _init_value, | |||
| 'Precision/mAP (large)': _init_value, | |||
| 'Recall/AR@1': _init_value, | |||
| 'Recall/AR@10': _init_value, | |||
| 'Recall/AR@100': _init_value, | |||
| 'Recall/AR@100 (small)': _init_value, | |||
| 'Recall/AR@100 (medium)': _init_value, | |||
| 'Recall/AR@100 (large)': _init_value, | |||
| } | |||
| def coco_eval(result_files, result_types, coco, max_dets=(100, 300, 1000), single_result=False): | |||
| """coco eval for fasterrcnn""" | |||
| anns = json.load(open(result_files['bbox'])) | |||
| if not anns: | |||
| return summary_init | |||
| if mmcv.is_str(coco): | |||
| coco = COCO(coco) | |||
| assert isinstance(coco, COCO) | |||
| for res_type in result_types: | |||
| result_file = result_files[res_type] | |||
| assert result_file.endswith('.json') | |||
| coco_dets = coco.loadRes(result_file) | |||
| gt_img_ids = coco.getImgIds() | |||
| det_img_ids = coco_dets.getImgIds() | |||
| iou_type = 'bbox' if res_type == 'proposal' else res_type | |||
| cocoEval = COCOeval(coco, coco_dets, iou_type) | |||
| if res_type == 'proposal': | |||
| cocoEval.params.useCats = 0 | |||
| cocoEval.params.maxDets = list(max_dets) | |||
| tgt_ids = gt_img_ids if not single_result else det_img_ids | |||
| if single_result: | |||
| res_dict = dict() | |||
| for id_i in tgt_ids: | |||
| cocoEval = COCOeval(coco, coco_dets, iou_type) | |||
| if res_type == 'proposal': | |||
| cocoEval.params.useCats = 0 | |||
| cocoEval.params.maxDets = list(max_dets) | |||
| cocoEval.params.imgIds = [id_i] | |||
| cocoEval.evaluate() | |||
| cocoEval.accumulate() | |||
| cocoEval.summarize() | |||
| res_dict.update({coco.imgs[id_i]['file_name']: cocoEval.stats[1]}) | |||
| cocoEval = COCOeval(coco, coco_dets, iou_type) | |||
| if res_type == 'proposal': | |||
| cocoEval.params.useCats = 0 | |||
| cocoEval.params.maxDets = list(max_dets) | |||
| cocoEval.params.imgIds = tgt_ids | |||
| cocoEval.evaluate() | |||
| cocoEval.accumulate() | |||
| cocoEval.summarize() | |||
| summary_metrics = { | |||
| 'Precision/mAP': cocoEval.stats[0], | |||
| 'Precision/mAP@.50IOU': cocoEval.stats[1], | |||
| 'Precision/mAP@.75IOU': cocoEval.stats[2], | |||
| 'Precision/mAP (small)': cocoEval.stats[3], | |||
| 'Precision/mAP (medium)': cocoEval.stats[4], | |||
| 'Precision/mAP (large)': cocoEval.stats[5], | |||
| 'Recall/AR@1': cocoEval.stats[6], | |||
| 'Recall/AR@10': cocoEval.stats[7], | |||
| 'Recall/AR@100': cocoEval.stats[8], | |||
| 'Recall/AR@100 (small)': cocoEval.stats[9], | |||
| 'Recall/AR@100 (medium)': cocoEval.stats[10], | |||
| 'Recall/AR@100 (large)': cocoEval.stats[11], | |||
| } | |||
| return summary_metrics | |||
| def xyxy2xywh(bbox): | |||
| _bbox = bbox.tolist() | |||
| return [ | |||
| _bbox[0], | |||
| _bbox[1], | |||
| _bbox[2] - _bbox[0] + 1, | |||
| _bbox[3] - _bbox[1] + 1, | |||
| ] | |||
| def bbox2result_1image(bboxes, labels, num_classes): | |||
| """Convert detection results to a list of numpy arrays. | |||
| Args: | |||
| bboxes (Tensor): shape (n, 5) | |||
| labels (Tensor): shape (n, ) | |||
| num_classes (int): class number, including background class | |||
| Returns: | |||
| list(ndarray): bbox results of each class | |||
| """ | |||
| if bboxes.shape[0] == 0: | |||
| result = [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)] | |||
| else: | |||
| result = [bboxes[labels == i, :] for i in range(num_classes - 1)] | |||
| return result | |||
| def proposal2json(dataset, results): | |||
| """convert proposal to json mode""" | |||
| img_ids = dataset.getImgIds() | |||
| json_results = [] | |||
| dataset_len = dataset.get_dataset_size()*2 | |||
| for idx in range(dataset_len): | |||
| img_id = img_ids[idx] | |||
| bboxes = results[idx] | |||
| for i in range(bboxes.shape[0]): | |||
| data = dict() | |||
| data['image_id'] = img_id | |||
| data['bbox'] = xyxy2xywh(bboxes[i]) | |||
| data['score'] = float(bboxes[i][4]) | |||
| data['category_id'] = 1 | |||
| json_results.append(data) | |||
| return json_results | |||
| def det2json(dataset, results): | |||
| """convert det to json mode""" | |||
| cat_ids = dataset.getCatIds() | |||
| img_ids = dataset.getImgIds() | |||
| json_results = [] | |||
| dataset_len = len(img_ids) | |||
| for idx in range(dataset_len): | |||
| img_id = img_ids[idx] | |||
| if idx == len(results): break | |||
| result = results[idx] | |||
| for label, result_label in enumerate(result): | |||
| bboxes = result_label | |||
| for i in range(bboxes.shape[0]): | |||
| data = dict() | |||
| data['image_id'] = img_id | |||
| data['bbox'] = xyxy2xywh(bboxes[i]) | |||
| data['score'] = float(bboxes[i][4]) | |||
| data['category_id'] = cat_ids[label] | |||
| json_results.append(data) | |||
| return json_results | |||
| def segm2json(dataset, results): | |||
| """convert segm to json mode""" | |||
| bbox_json_results = [] | |||
| segm_json_results = [] | |||
| for idx in range(len(dataset)): | |||
| img_id = dataset.img_ids[idx] | |||
| det, seg = results[idx] | |||
| for label, det_label in enumerate(det): | |||
| # bbox results | |||
| bboxes = det_label | |||
| for i in range(bboxes.shape[0]): | |||
| data = dict() | |||
| data['image_id'] = img_id | |||
| data['bbox'] = xyxy2xywh(bboxes[i]) | |||
| data['score'] = float(bboxes[i][4]) | |||
| data['category_id'] = dataset.cat_ids[label] | |||
| bbox_json_results.append(data) | |||
| if len(seg) == 2: | |||
| segms = seg[0][label] | |||
| mask_score = seg[1][label] | |||
| else: | |||
| segms = seg[label] | |||
| mask_score = [bbox[4] for bbox in bboxes] | |||
| for i in range(bboxes.shape[0]): | |||
| data = dict() | |||
| data['image_id'] = img_id | |||
| data['score'] = float(mask_score[i]) | |||
| data['category_id'] = dataset.cat_ids[label] | |||
| segms[i]['counts'] = segms[i]['counts'].decode() | |||
| data['segmentation'] = segms[i] | |||
| segm_json_results.append(data) | |||
| return bbox_json_results, segm_json_results | |||
| def results2json(dataset, results, out_file): | |||
| """convert result convert to json mode""" | |||
| result_files = dict() | |||
| if isinstance(results[0], list): | |||
| json_results = det2json(dataset, results) | |||
| result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox') | |||
| result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox') | |||
| mmcv.dump(json_results, result_files['bbox']) | |||
| elif isinstance(results[0], tuple): | |||
| json_results = segm2json(dataset, results) | |||
| result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox') | |||
| result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox') | |||
| result_files['segm'] = '{}.{}.json'.format(out_file, 'segm') | |||
| mmcv.dump(json_results[0], result_files['bbox']) | |||
| mmcv.dump(json_results[1], result_files['segm']) | |||
| elif isinstance(results[0], np.ndarray): | |||
| json_results = proposal2json(dataset, results) | |||
| result_files['proposal'] = '{}.{}.json'.format(out_file, 'proposal') | |||
| mmcv.dump(json_results, result_files['proposal']) | |||
| else: | |||
| raise TypeError('invalid type of results') | |||
| return result_files | |||
| @@ -0,0 +1,136 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # less required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train FasterRcnn and get checkpoint files.""" | |||
| import os | |||
| import argparse | |||
| import random | |||
| import numpy as np | |||
| import mindspore.common.dtype as mstype | |||
| from mindspore import context, Tensor | |||
| from mindspore.communication.management import init | |||
| from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor | |||
| from mindspore.train import Model, ParallelMode | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.nn import SGD | |||
| import mindspore.dataset.engine as de | |||
| from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50 | |||
| from src.network_define import LossCallBack, WithLossCell, TrainOneStepCell, LossNet | |||
| from src.config import config | |||
| from src.dataset import data_to_mindrecord_byte_image, create_fasterrcnn_dataset | |||
| from src.lr_schedule import dynamic_lr | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| parser = argparse.ArgumentParser(description="FasterRcnn training") | |||
| parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create " | |||
| "Mindrecord, default is false.") | |||
| parser.add_argument("--run_distribute", type=bool, default=False, help="Run distribute, default is false.") | |||
| parser.add_argument("--do_train", type=bool, default=True, help="Do train or not, default is true.") | |||
| parser.add_argument("--do_eval", type=bool, default=False, help="Do eval or not, default is false.") | |||
| parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.") | |||
| parser.add_argument("--pre_trained", type=str, default="", help="Pretrain file path.") | |||
| parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") | |||
| parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") | |||
| parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default is 0.") | |||
| args_opt = parser.parse_args() | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id) | |||
| if __name__ == '__main__': | |||
| if not args_opt.do_eval and args_opt.run_distribute: | |||
| rank = args_opt.rank_id | |||
| device_num = args_opt.device_num | |||
| context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| mirror_mean=True, parameter_broadcast=True) | |||
| init() | |||
| else: | |||
| rank = 0 | |||
| device_num = 1 | |||
| print("Start create dataset!") | |||
| # It will generate mindrecord file in args_opt.mindrecord_dir, | |||
| # and the file name is FasterRcnn.mindrecord0, 1, ... file_num. | |||
| prefix = "FasterRcnn.mindrecord" | |||
| mindrecord_dir = config.mindrecord_dir | |||
| mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") | |||
| if not os.path.exists(mindrecord_file): | |||
| if not os.path.isdir(mindrecord_dir): | |||
| os.makedirs(mindrecord_dir) | |||
| if args_opt.dataset == "coco": | |||
| if os.path.isdir(config.coco_root): | |||
| print("Create Mindrecord.") | |||
| data_to_mindrecord_byte_image("coco", True, prefix) | |||
| print("Create Mindrecord Done, at {}".format(mindrecord_dir)) | |||
| else: | |||
| print("coco_root not exits.") | |||
| else: | |||
| if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): | |||
| print("Create Mindrecord.") | |||
| data_to_mindrecord_byte_image("other", True, prefix) | |||
| print("Create Mindrecord Done, at {}".format(mindrecord_dir)) | |||
| else: | |||
| print("IMAGE_DIR or ANNO_PATH not exits.") | |||
| if not args_opt.only_create_dataset: | |||
| loss_scale = float(config.loss_scale) | |||
| # When create MindDataset, using the fitst mindrecord file, such as FasterRcnn.mindrecord0. | |||
| dataset = create_fasterrcnn_dataset(mindrecord_file, repeat_num=config.epoch_size, | |||
| batch_size=config.batch_size, device_num=device_num, rank_id=rank) | |||
| dataset_size = dataset.get_dataset_size() | |||
| print("Create dataset done!") | |||
| net = Faster_Rcnn_Resnet50(config=config) | |||
| net = net.set_train() | |||
| load_path = args_opt.pre_trained | |||
| if load_path != "": | |||
| param_dict = load_checkpoint(load_path) | |||
| for item in list(param_dict.keys()): | |||
| if not item.startswith('backbone'): | |||
| param_dict.pop(item) | |||
| load_param_into_net(net, param_dict) | |||
| loss = LossNet() | |||
| lr = Tensor(dynamic_lr(config, rank_size=device_num), mstype.float32) | |||
| opt = SGD(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum, | |||
| weight_decay=config.weight_decay, loss_scale=config.loss_scale) | |||
| net_with_loss = WithLossCell(net, loss) | |||
| if args_opt.run_distribute: | |||
| net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True, | |||
| mean=True, degree=device_num) | |||
| else: | |||
| net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale) | |||
| time_cb = TimeMonitor(data_size=dataset_size) | |||
| loss_cb = LossCallBack() | |||
| cb = [time_cb, loss_cb] | |||
| if config.save_checkpoint: | |||
| ckptconfig = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * dataset_size, | |||
| keep_checkpoint_max=config.keep_checkpoint_max) | |||
| ckpoint_cb = ModelCheckpoint(prefix='faster_rcnn', directory=config.save_checkpoint_path, config=ckptconfig) | |||
| cb += [ckpoint_cb] | |||
| model = Model(net) | |||
| model.train(config.epoch_size, dataset, callbacks=cb) | |||
| @@ -1,9 +1,12 @@ | |||
| #!/bin/bash | |||
| rm /tmp/citeseer/mindrecord/* | |||
| SRC_PATH=/tmp/citeseer/dataset | |||
| MINDRECORD_PATH=/tmp/citeseer/mindrecord | |||
| rm -f $MINDRECORD_PATH/* | |||
| python writer.py --mindrecord_script citeseer \ | |||
| --mindrecord_file "/tmp/citeseer/mindrecord/citeseer_mr" \ | |||
| --mindrecord_file "$MINDRECORD_PATH/citeseer_mr" \ | |||
| --mindrecord_partitions 1 \ | |||
| --mindrecord_header_size_by_bit 18 \ | |||
| --mindrecord_page_size_by_bit 20 \ | |||
| --graph_api_args "/tmp/citeseer/dataset/citeseer.content:/tmp/citeseer/dataset/citeseer.cites" | |||
| --graph_api_args "$SRC_PATH/citeseer.content:$SRC_PATH/citeseer.cites" | |||
| @@ -1,9 +1,12 @@ | |||
| #!/bin/bash | |||
| rm /tmp/cora/mindrecord/* | |||
| SRC_PATH=/tmp/cora/dataset | |||
| MINDRECORD_PATH=/tmp/cora/mindrecord | |||
| rm -f $MINDRECORD_PATH/* | |||
| python writer.py --mindrecord_script cora \ | |||
| --mindrecord_file "/tmp/cora/mindrecord/cora_mr" \ | |||
| --mindrecord_file "$MINDRECORD_PATH/cora_mr" \ | |||
| --mindrecord_partitions 1 \ | |||
| --mindrecord_header_size_by_bit 18 \ | |||
| --mindrecord_page_size_by_bit 20 \ | |||
| --graph_api_args "/tmp/cora/dataset/cora_content.csv:/tmp/cora/dataset/cora_cites.csv" | |||
| --graph_api_args "$SRC_PATH/cora_content.csv:$SRC_PATH/cora_cites.csv" | |||
| @@ -24,7 +24,7 @@ from importlib import import_module | |||
| from multiprocessing import Pool | |||
| from mindspore.mindrecord import FileWriter | |||
| from mindspore.mindrecord import GraphMapSchema | |||
| from graph_map_schema import GraphMapSchema | |||
| def exec_task(task_id, parallel_writer=True): | |||
| @@ -0,0 +1,100 @@ | |||
| # LSTM Example | |||
| ## Description | |||
| This example is for LSTM model training and evaluation. | |||
| ## Requirements | |||
| - Install [MindSpore](https://www.mindspore.cn/install/en). | |||
| - Download the dataset aclImdb_v1. | |||
| > Unzip the aclImdb_v1 dataset to any path you want and the folder structure should be as follows: | |||
| > ``` | |||
| > . | |||
| > ├── train # train dataset | |||
| > └── test # infer dataset | |||
| > ``` | |||
| - Download the GloVe file. | |||
| > Unzip the glove.6B.zip to any path you want and the folder structure should be as follows: | |||
| > ``` | |||
| > . | |||
| > ├── glove.6B.100d.txt | |||
| > ├── glove.6B.200d.txt | |||
| > ├── glove.6B.300d.txt # we will use this one later. | |||
| > └── glove.6B.50d.txt | |||
| > ``` | |||
| > Adding a new line at the beginning of the file which named `glove.6B.300d.txt`. | |||
| > It means reading a total of 400,000 words, each represented by a 300-latitude word vector. | |||
| > ``` | |||
| > 400000 300 | |||
| > ``` | |||
| ## Running the Example | |||
| ### Training | |||
| ``` | |||
| python train.py --preprocess=true --aclimdb_path=your_imdb_path --glove_path=your_glove_path > out.train.log 2>&1 & | |||
| ``` | |||
| The python command above will run in the background, you can view the results through the file `out.train.log`. | |||
| After training, you'll get some checkpoint files under the script folder by default. | |||
| You will get the loss value as following: | |||
| ``` | |||
| # grep "loss is " out.train.log | |||
| epoch: 1 step: 390, loss is 0.6003723 | |||
| epcoh: 2 step: 390, loss is 0.35312173 | |||
| ... | |||
| ``` | |||
| ### Evaluation | |||
| ``` | |||
| python eval.py --ckpt_path=./lstm-20-390.ckpt > out.eval.log 2>&1 & | |||
| ``` | |||
| The above python command will run in the background, you can view the results through the file `out.eval.log`. | |||
| You will get the accuracy as following: | |||
| ``` | |||
| # grep "acc" out.eval.log | |||
| result: {'acc': 0.83} | |||
| ``` | |||
| ## Usage: | |||
| ### Training | |||
| ``` | |||
| usage: train.py [--preprocess {true,false}] [--aclimdb_path ACLIMDB_PATH] | |||
| [--glove_path GLOVE_PATH] [--preprocess_path PREPROCESS_PATH] | |||
| [--ckpt_path CKPT_PATH] [--device_target {GPU,CPU}] | |||
| parameters/options: | |||
| --preprocess whether to preprocess data. | |||
| --aclimdb_path path where the dataset is stored. | |||
| --glove_path path where the GloVe is stored. | |||
| --preprocess_path path where the pre-process data is stored. | |||
| --ckpt_path the path to save the checkpoint file. | |||
| --device_target the target device to run, support "GPU", "CPU". | |||
| ``` | |||
| ### Evaluation | |||
| ``` | |||
| usage: eval.py [--preprocess {true,false}] [--aclimdb_path ACLIMDB_PATH] | |||
| [--glove_path GLOVE_PATH] [--preprocess_path PREPROCESS_PATH] | |||
| [--ckpt_path CKPT_PATH] [--device_target {GPU,CPU}] | |||
| parameters/options: | |||
| --preprocess whether to preprocess data. | |||
| --aclimdb_path path where the dataset is stored. | |||
| --glove_path path where the GloVe is stored. | |||
| --preprocess_path path where the pre-process data is stored. | |||
| --ckpt_path the checkpoint file path used to evaluate model. | |||
| --device_target the target device to run, support "GPU", "CPU". | |||
| ``` | |||
| @@ -0,0 +1,33 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting | |||
| """ | |||
| from easydict import EasyDict as edict | |||
| # LSTM CONFIG | |||
| lstm_cfg = edict({ | |||
| 'num_classes': 2, | |||
| 'learning_rate': 0.1, | |||
| 'momentum': 0.9, | |||
| 'num_epochs': 20, | |||
| 'batch_size': 64, | |||
| 'embed_size': 300, | |||
| 'num_hiddens': 100, | |||
| 'num_layers': 2, | |||
| 'bidirectional': True, | |||
| 'save_checkpoint_steps': 390, | |||
| 'keep_checkpoint_max': 10 | |||
| }) | |||
| @@ -0,0 +1,92 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| Data operations, will be used in train.py and eval.py | |||
| """ | |||
| import os | |||
| import numpy as np | |||
| from imdb import ImdbParser | |||
| import mindspore.dataset as ds | |||
| from mindspore.mindrecord import FileWriter | |||
| def create_dataset(data_home, batch_size, repeat_num=1, training=True): | |||
| """Data operations.""" | |||
| ds.config.set_seed(1) | |||
| data_dir = os.path.join(data_home, "aclImdb_train.mindrecord0") | |||
| if not training: | |||
| data_dir = os.path.join(data_home, "aclImdb_test.mindrecord0") | |||
| data_set = ds.MindDataset(data_dir, columns_list=["feature", "label"], num_parallel_workers=4) | |||
| # apply map operations on images | |||
| data_set = data_set.shuffle(buffer_size=data_set.get_dataset_size()) | |||
| data_set = data_set.batch(batch_size=batch_size, drop_remainder=True) | |||
| data_set = data_set.repeat(count=repeat_num) | |||
| return data_set | |||
| def _convert_to_mindrecord(data_home, features, labels, weight_np=None, training=True): | |||
| """ | |||
| convert imdb dataset to mindrecoed dataset | |||
| """ | |||
| if weight_np is not None: | |||
| np.savetxt(os.path.join(data_home, 'weight.txt'), weight_np) | |||
| # write mindrecord | |||
| schema_json = {"id": {"type": "int32"}, | |||
| "label": {"type": "int32"}, | |||
| "feature": {"type": "int32", "shape": [-1]}} | |||
| data_dir = os.path.join(data_home, "aclImdb_train.mindrecord") | |||
| if not training: | |||
| data_dir = os.path.join(data_home, "aclImdb_test.mindrecord") | |||
| def get_imdb_data(features, labels): | |||
| data_list = [] | |||
| for i, (label, feature) in enumerate(zip(labels, features)): | |||
| data_json = {"id": i, | |||
| "label": int(label), | |||
| "feature": feature.reshape(-1)} | |||
| data_list.append(data_json) | |||
| return data_list | |||
| writer = FileWriter(data_dir, shard_num=4) | |||
| data = get_imdb_data(features, labels) | |||
| writer.add_schema(schema_json, "nlp_schema") | |||
| writer.add_index(["id", "label"]) | |||
| writer.write_raw_data(data) | |||
| writer.commit() | |||
| def convert_to_mindrecord(embed_size, aclimdb_path, preprocess_path, glove_path): | |||
| """ | |||
| convert imdb dataset to mindrecoed dataset | |||
| """ | |||
| parser = ImdbParser(aclimdb_path, glove_path, embed_size) | |||
| parser.parse() | |||
| if not os.path.exists(preprocess_path): | |||
| print(f"preprocess path {preprocess_path} is not exist") | |||
| os.makedirs(preprocess_path) | |||
| train_features, train_labels, train_weight_np = parser.get_datas('train') | |||
| _convert_to_mindrecord(preprocess_path, train_features, train_labels, train_weight_np) | |||
| test_features, test_labels, _ = parser.get_datas('test') | |||
| _convert_to_mindrecord(preprocess_path, test_features, test_labels, training=False) | |||
| @@ -0,0 +1,81 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| #################train lstm example on aclImdb######################## | |||
| python eval.py --ckpt_path=./lstm-20-390.ckpt | |||
| """ | |||
| import argparse | |||
| import os | |||
| import numpy as np | |||
| from config import lstm_cfg as cfg | |||
| from dataset import create_dataset, convert_to_mindrecord | |||
| from mindspore import Tensor, nn, Model, context | |||
| from mindspore.model_zoo.lstm import SentimentNet | |||
| from mindspore.nn import Accuracy | |||
| from mindspore.train.callback import LossMonitor | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| if __name__ == '__main__': | |||
| parser = argparse.ArgumentParser(description='MindSpore LSTM Example') | |||
| parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'], | |||
| help='whether to preprocess data.') | |||
| parser.add_argument('--aclimdb_path', type=str, default="./aclImdb", | |||
| help='path where the dataset is stored.') | |||
| parser.add_argument('--glove_path', type=str, default="./glove", | |||
| help='path where the GloVe is stored.') | |||
| parser.add_argument('--preprocess_path', type=str, default="./preprocess", | |||
| help='path where the pre-process data is stored.') | |||
| parser.add_argument('--ckpt_path', type=str, default=None, | |||
| help='the checkpoint file path used to evaluate model.') | |||
| parser.add_argument('--device_target', type=str, default="GPU", choices=['GPU', 'CPU'], | |||
| help='the target device to run, support "GPU", "CPU". Default: "GPU".') | |||
| args = parser.parse_args() | |||
| context.set_context( | |||
| mode=context.GRAPH_MODE, | |||
| save_graphs=False, | |||
| device_target=args.device_target) | |||
| if args.preprocess == "true": | |||
| print("============== Starting Data Pre-processing ==============") | |||
| convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path) | |||
| embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32) | |||
| network = SentimentNet(vocab_size=embedding_table.shape[0], | |||
| embed_size=cfg.embed_size, | |||
| num_hiddens=cfg.num_hiddens, | |||
| num_layers=cfg.num_layers, | |||
| bidirectional=cfg.bidirectional, | |||
| num_classes=cfg.num_classes, | |||
| weight=Tensor(embedding_table), | |||
| batch_size=cfg.batch_size) | |||
| loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) | |||
| opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) | |||
| loss_cb = LossMonitor() | |||
| model = Model(network, loss, opt, {'acc': Accuracy()}) | |||
| print("============== Starting Testing ==============") | |||
| ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, training=False) | |||
| param_dict = load_checkpoint(args.ckpt_path) | |||
| load_param_into_net(network, param_dict) | |||
| if args.device_target == "CPU": | |||
| acc = model.eval(ds_eval, dataset_sink_mode=False) | |||
| else: | |||
| acc = model.eval(ds_eval) | |||
| print("============== Accuracy:{} ==============".format(acc)) | |||
| @@ -0,0 +1,155 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| imdb dataset parser. | |||
| """ | |||
| import os | |||
| from itertools import chain | |||
| import gensim | |||
| import numpy as np | |||
| class ImdbParser(): | |||
| """ | |||
| parse aclImdb data to features and labels. | |||
| sentence->tokenized->encoded->padding->features | |||
| """ | |||
| def __init__(self, imdb_path, glove_path, embed_size=300): | |||
| self.__segs = ['train', 'test'] | |||
| self.__label_dic = {'pos': 1, 'neg': 0} | |||
| self.__imdb_path = imdb_path | |||
| self.__glove_dim = embed_size | |||
| self.__glove_file = os.path.join(glove_path, 'glove.6B.' + str(self.__glove_dim) + 'd.txt') | |||
| # properties | |||
| self.__imdb_datas = {} | |||
| self.__features = {} | |||
| self.__labels = {} | |||
| self.__vacab = {} | |||
| self.__word2idx = {} | |||
| self.__weight_np = {} | |||
| self.__wvmodel = None | |||
| def parse(self): | |||
| """ | |||
| parse imdb data to memory | |||
| """ | |||
| self.__wvmodel = gensim.models.KeyedVectors.load_word2vec_format(self.__glove_file) | |||
| for seg in self.__segs: | |||
| self.__parse_imdb_datas(seg) | |||
| self.__parse_features_and_labels(seg) | |||
| self.__gen_weight_np(seg) | |||
| def __parse_imdb_datas(self, seg): | |||
| """ | |||
| load data from txt | |||
| """ | |||
| data_lists = [] | |||
| for label_name, label_id in self.__label_dic.items(): | |||
| sentence_dir = os.path.join(self.__imdb_path, seg, label_name) | |||
| for file in os.listdir(sentence_dir): | |||
| with open(os.path.join(sentence_dir, file), mode='r', encoding='utf8') as f: | |||
| sentence = f.read().replace('\n', '') | |||
| data_lists.append([sentence, label_id]) | |||
| self.__imdb_datas[seg] = data_lists | |||
| def __parse_features_and_labels(self, seg): | |||
| """ | |||
| parse features and labels | |||
| """ | |||
| features = [] | |||
| labels = [] | |||
| for sentence, label in self.__imdb_datas[seg]: | |||
| features.append(sentence) | |||
| labels.append(label) | |||
| self.__features[seg] = features | |||
| self.__labels[seg] = labels | |||
| # update feature to tokenized | |||
| self.__updata_features_to_tokenized(seg) | |||
| # parse vacab | |||
| self.__parse_vacab(seg) | |||
| # encode feature | |||
| self.__encode_features(seg) | |||
| # padding feature | |||
| self.__padding_features(seg) | |||
| def __updata_features_to_tokenized(self, seg): | |||
| tokenized_features = [] | |||
| for sentence in self.__features[seg]: | |||
| tokenized_sentence = [word.lower() for word in sentence.split(" ")] | |||
| tokenized_features.append(tokenized_sentence) | |||
| self.__features[seg] = tokenized_features | |||
| def __parse_vacab(self, seg): | |||
| # vocab | |||
| tokenized_features = self.__features[seg] | |||
| vocab = set(chain(*tokenized_features)) | |||
| self.__vacab[seg] = vocab | |||
| # word_to_idx: {'hello': 1, 'world':111, ... '<unk>': 0} | |||
| word_to_idx = {word: i + 1 for i, word in enumerate(vocab)} | |||
| word_to_idx['<unk>'] = 0 | |||
| self.__word2idx[seg] = word_to_idx | |||
| def __encode_features(self, seg): | |||
| """ encode word to index """ | |||
| word_to_idx = self.__word2idx['train'] | |||
| encoded_features = [] | |||
| for tokenized_sentence in self.__features[seg]: | |||
| encoded_sentence = [] | |||
| for word in tokenized_sentence: | |||
| encoded_sentence.append(word_to_idx.get(word, 0)) | |||
| encoded_features.append(encoded_sentence) | |||
| self.__features[seg] = encoded_features | |||
| def __padding_features(self, seg, maxlen=500, pad=0): | |||
| """ pad all features to the same length """ | |||
| padded_features = [] | |||
| for feature in self.__features[seg]: | |||
| if len(feature) >= maxlen: | |||
| padded_feature = feature[:maxlen] | |||
| else: | |||
| padded_feature = feature | |||
| while len(padded_feature) < maxlen: | |||
| padded_feature.append(pad) | |||
| padded_features.append(padded_feature) | |||
| self.__features[seg] = padded_features | |||
| def __gen_weight_np(self, seg): | |||
| """ | |||
| generate weight by gensim | |||
| """ | |||
| weight_np = np.zeros((len(self.__word2idx[seg]), self.__glove_dim), dtype=np.float32) | |||
| for word, idx in self.__word2idx[seg].items(): | |||
| if word not in self.__wvmodel: | |||
| continue | |||
| word_vector = self.__wvmodel.get_vector(word) | |||
| weight_np[idx, :] = word_vector | |||
| self.__weight_np[seg] = weight_np | |||
| def get_datas(self, seg): | |||
| """ | |||
| return features, labels, and weight | |||
| """ | |||
| features = np.array(self.__features[seg]).astype(np.int32) | |||
| labels = np.array(self.__labels[seg]).astype(np.int32) | |||
| weight = np.array(self.__weight_np[seg]) | |||
| return features, labels, weight | |||
| @@ -0,0 +1,83 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| #################train lstm example on aclImdb######################## | |||
| python train.py --preprocess=true --aclimdb_path=your_imdb_path --glove_path=your_glove_path | |||
| """ | |||
| import argparse | |||
| import os | |||
| import numpy as np | |||
| from config import lstm_cfg as cfg | |||
| from dataset import convert_to_mindrecord | |||
| from dataset import create_dataset | |||
| from mindspore import Tensor, nn, Model, context | |||
| from mindspore.model_zoo.lstm import SentimentNet | |||
| from mindspore.nn import Accuracy | |||
| from mindspore.train.callback import LossMonitor, CheckpointConfig, ModelCheckpoint, TimeMonitor | |||
| if __name__ == '__main__': | |||
| parser = argparse.ArgumentParser(description='MindSpore LSTM Example') | |||
| parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'], | |||
| help='whether to preprocess data.') | |||
| parser.add_argument('--aclimdb_path', type=str, default="./aclImdb", | |||
| help='path where the dataset is stored.') | |||
| parser.add_argument('--glove_path', type=str, default="./glove", | |||
| help='path where the GloVe is stored.') | |||
| parser.add_argument('--preprocess_path', type=str, default="./preprocess", | |||
| help='path where the pre-process data is stored.') | |||
| parser.add_argument('--ckpt_path', type=str, default="./", | |||
| help='the path to save the checkpoint file.') | |||
| parser.add_argument('--device_target', type=str, default="GPU", choices=['GPU', 'CPU'], | |||
| help='the target device to run, support "GPU", "CPU". Default: "GPU".') | |||
| args = parser.parse_args() | |||
| context.set_context( | |||
| mode=context.GRAPH_MODE, | |||
| save_graphs=False, | |||
| device_target=args.device_target) | |||
| if args.preprocess == "true": | |||
| print("============== Starting Data Pre-processing ==============") | |||
| convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path) | |||
| embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32) | |||
| network = SentimentNet(vocab_size=embedding_table.shape[0], | |||
| embed_size=cfg.embed_size, | |||
| num_hiddens=cfg.num_hiddens, | |||
| num_layers=cfg.num_layers, | |||
| bidirectional=cfg.bidirectional, | |||
| num_classes=cfg.num_classes, | |||
| weight=Tensor(embedding_table), | |||
| batch_size=cfg.batch_size) | |||
| loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) | |||
| opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) | |||
| loss_cb = LossMonitor() | |||
| model = Model(network, loss, opt, {'acc': Accuracy()}) | |||
| print("============== Starting Training ==============") | |||
| ds_train = create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs) | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps, | |||
| keep_checkpoint_max=cfg.keep_checkpoint_max) | |||
| ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck) | |||
| time_cb = TimeMonitor(data_size=ds_train.get_dataset_size()) | |||
| if args.device_target == "CPU": | |||
| model.train(cfg.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb], dataset_sink_mode=False) | |||
| else: | |||
| model.train(cfg.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb]) | |||
| print("============== Training Success ==============") | |||
| @@ -1,30 +1,21 @@ | |||
| # MobileNetV2 Description | |||
| MobileNetV2 is a significant improvement over MobileNetV1 and pushes the state of the art for mobile visual recognition including classification, object detection and semantic segmentation. | |||
| MobileNetV2 is tuned to mobile phone CPUs through a combination of hardware- aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances.Nov 20, 2019. | |||
| MobileNetV2 builds upon the ideas from MobileNetV1, using depthwise separable convolution as efficient building blocks. However, V2 introduces two new features to the architecture: 1) linear bottlenecks between the layers, and 2) shortcut connections between the bottlenecks1. | |||
| [Paper](https://arxiv.org/pdf/1905.02244) Howard, Andrew, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang et al. "Searching for MobileNetV2." In Proceedings of the IEEE International Conference on Computer Vision, pp. 1314-1324. 2019. | |||
| # Model architecture | |||
| The overall network architecture of MobileNetV2 is show below: | |||
| [Link](https://arxiv.org/pdf/1905.02244) | |||
| [Paper](https://arxiv.org/pdf/1801.04381) Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. | |||
| # Dataset | |||
| Dataset used: [imagenet](http://www.image-net.org/) | |||
| Dataset used: imagenet2012 | |||
| - Dataset size: ~125G, 1.2W colorful images in 1000 classes | |||
| - Train: 120G, 1.2W images | |||
| - Test: 5G, 50000 images | |||
| - Dataset size: ~125G | |||
| - Train: 120G, 1281167 images: 1000 directories | |||
| - Test: 5G, 50000 images: images should be classified into 1000 directories firstly, just like train images | |||
| - Data format: RGB images. | |||
| - Note: Data will be processed in src/dataset.py | |||
| # Features | |||
| # Environment Requirements | |||
| - Hardware(Ascend/GPU) | |||
| @@ -60,8 +51,8 @@ Dataset used: [imagenet](http://www.image-net.org/) | |||
| ### Usage | |||
| - Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] | |||
| - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] | |||
| - Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH] | |||
| - GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH] | |||
| ### Launch | |||
| @@ -133,7 +124,7 @@ result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625. | |||
| #### Inference Performance | |||
| | Parameters | GoogLeNet | | | | |||
| | Parameters | | | | | |||
| | -------------------------- | ----------------------------- | ------------------------- | -------------------- | | |||
| | Model Version | V1 | | | | |||
| | Resource | Huawei 910 | NV SMX2 V100-32G | Huawei 310 | | |||
| @@ -148,4 +139,4 @@ result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625. | |||
| | Model for inference | | | | | |||
| # ModelZoo Homepage | |||
| [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) | |||
| [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) | |||
| @@ -22,9 +22,10 @@ from mindspore import nn | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.common import dtype as mstype | |||
| from src.dataset import create_dataset | |||
| from mindspore.model_zoo.mobilenetV2 import mobilenet_v2 | |||
| from src.dataset import create_dataset_py | |||
| from src.config import config_ascend, config_gpu | |||
| from src.mobilenetV2 import mobilenet_v2 | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||
| @@ -35,21 +36,23 @@ args_opt = parser.parse_args() | |||
| if __name__ == '__main__': | |||
| config_platform = None | |||
| net = None | |||
| if args_opt.platform == "Ascend": | |||
| config_platform = config_ascend | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", | |||
| device_id=device_id, save_graphs=False) | |||
| net = mobilenet_v2(num_classes=config_platform.num_classes, platform="Ascend") | |||
| elif args_opt.platform == "GPU": | |||
| config_platform = config_gpu | |||
| context.set_context(mode=context.GRAPH_MODE, | |||
| device_target="GPU", save_graphs=False) | |||
| net = mobilenet_v2(num_classes=config_platform.num_classes, platform="GPU") | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| loss = nn.SoftmaxCrossEntropyWithLogits( | |||
| is_grad=False, sparse=True, reduction='mean') | |||
| net = mobilenet_v2(num_classes=config_platform.num_classes) | |||
| if args_opt.platform == "Ascend": | |||
| net.to_float(mstype.float16) | |||
| @@ -57,11 +60,11 @@ if __name__ == '__main__': | |||
| if isinstance(cell, nn.Dense): | |||
| cell.to_float(mstype.float32) | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, | |||
| do_train=False, | |||
| config=config_platform, | |||
| platform=args_opt.platform, | |||
| batch_size=config_platform.batch_size) | |||
| dataset = create_dataset_py(dataset_path=args_opt.dataset_path, | |||
| do_train=False, | |||
| config=config_platform, | |||
| platform=args_opt.platform, | |||
| batch_size=config_platform.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| if args_opt.checkpoint_path: | |||
| @@ -52,4 +52,4 @@ python ${BASEPATH}/../eval.py \ | |||
| --platform=$1 \ | |||
| --dataset_path=$2 \ | |||
| --checkpoint_path=$3 \ | |||
| &> infer.log & # dataset val folder path | |||
| &> ../infer.log & # dataset val folder path | |||
| @@ -36,13 +36,14 @@ run_ascend() | |||
| fi | |||
| mkdir ../train | |||
| cd ../train || exit | |||
| python ${BASEPATH}/../launch.py \ | |||
| python ${BASEPATH}/../src/launch.py \ | |||
| --nproc_per_node=$2 \ | |||
| --visible_devices=$4 \ | |||
| --server_id=$3 \ | |||
| --training_script=${BASEPATH}/train.py \ | |||
| --training_script=${BASEPATH}/../train.py \ | |||
| --dataset_path=$5 \ | |||
| --platform=$1 &> train.log & # dataset train folder | |||
| --pre_trained=$6 \ | |||
| --platform=$1 &> ../train.log & # dataset train folder | |||
| } | |||
| run_gpu() | |||
| @@ -73,14 +74,15 @@ run_gpu() | |||
| python ${BASEPATH}/../train.py \ | |||
| --dataset_path=$4 \ | |||
| --platform=$1 \ | |||
| &> train.log & # dataset train folder | |||
| --pre_trained=$5 \ | |||
| &> ../train.log & # dataset train folder | |||
| } | |||
| if [ $# -gt 5 ] || [ $# -lt 4 ] | |||
| if [ $# -gt 6 ] || [ $# -lt 4 ] | |||
| then | |||
| echo "Usage:\n \ | |||
| Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \ | |||
| GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \ | |||
| Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ | |||
| GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ | |||
| " | |||
| exit 1 | |||
| fi | |||
| @@ -0,0 +1,160 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| create train or eval dataset. | |||
| """ | |||
| import os | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| import mindspore.dataset.transforms.c_transforms as C2 | |||
| import mindspore.dataset.transforms.vision.py_transforms as P | |||
| def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): | |||
| """ | |||
| create a train or eval dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| repeat_num(int): the repeat times of dataset. Default: 1. | |||
| batch_size(int): the batch size of dataset. Default: 32. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| if platform == "Ascend": | |||
| rank_size = int(os.getenv("RANK_SIZE")) | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| if do_train: | |||
| if rank_size == 1: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=rank_size, shard_id=rank_id) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) | |||
| elif platform == "GPU": | |||
| if do_train: | |||
| from mindspore.communication.management import get_rank, get_group_size | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=get_group_size(), shard_id=get_rank()) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| resize_height = config.image_height | |||
| if do_train: | |||
| buffer_size = 20480 | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| # define map operations | |||
| decode_op = C.Decode() | |||
| resize_crop_decode_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) | |||
| horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) | |||
| resize_op = C.Resize(256) | |||
| center_crop = C.CenterCrop(resize_height) | |||
| normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255]) | |||
| change_swap_op = C.HWC2CHW() | |||
| if do_train: | |||
| trans = [resize_crop_decode_op, horizontal_flip_op, normalize_op, change_swap_op] | |||
| else: | |||
| trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| def create_dataset_py(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): | |||
| """ | |||
| create a train or eval dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| repeat_num(int): the repeat times of dataset. Default: 1. | |||
| batch_size(int): the batch size of dataset. Default: 32. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| if platform == "Ascend": | |||
| rank_size = int(os.getenv("RANK_SIZE")) | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| if do_train: | |||
| if rank_size == 1: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=rank_size, shard_id=rank_id) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) | |||
| elif platform == "GPU": | |||
| if do_train: | |||
| from mindspore.communication.management import get_rank, get_group_size | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=get_group_size(), shard_id=get_rank()) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| resize_height = config.image_height | |||
| if do_train: | |||
| buffer_size = 20480 | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| # define map operations | |||
| decode_op = P.Decode() | |||
| resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) | |||
| horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) | |||
| resize_op = P.Resize(256) | |||
| center_crop = P.CenterCrop(resize_height) | |||
| to_tensor = P.ToTensor() | |||
| normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |||
| if do_train: | |||
| trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op] | |||
| else: | |||
| trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] | |||
| compose = P.ComposeOp(trans) | |||
| ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| @@ -18,6 +18,7 @@ import sys | |||
| import json | |||
| import subprocess | |||
| import shutil | |||
| import platform | |||
| from argparse import ArgumentParser | |||
| def parse_args(): | |||
| @@ -79,7 +80,8 @@ def main(): | |||
| device_ips[device_id] = device_ip | |||
| print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) | |||
| hccn_table = {} | |||
| hccn_table['board_id'] = '0x0000' | |||
| arch = platform.processor() | |||
| hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch] | |||
| hccn_table['chip_info'] = '910' | |||
| hccn_table['deploy_mode'] = 'lab' | |||
| hccn_table['group_count'] = '1' | |||
| @@ -32,12 +32,12 @@ from mindspore.train.model import Model, ParallelMode | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback | |||
| from mindspore.train.loss_scale_manager import FixedLossScaleManager | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.communication.management import init | |||
| from mindspore.communication.management import init, get_group_size | |||
| from mindspore.model_zoo.mobilenetV2 import mobilenet_v2 | |||
| import mindspore.dataset.engine as de | |||
| from src.dataset import create_dataset | |||
| from src.dataset import create_dataset_py | |||
| from src.lr_generator import get_lr | |||
| from src.config import config_gpu, config_ascend | |||
| from src.mobilenetV2 import mobilenet_v2 | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| @@ -146,7 +146,7 @@ class Monitor(Callback): | |||
| self.losses.append(step_loss) | |||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num | |||
| print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format( | |||
| print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format( | |||
| cb_params.cur_epoch_num - | |||
| 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss, | |||
| np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1])) | |||
| @@ -157,6 +157,11 @@ if __name__ == '__main__': | |||
| # train on gpu | |||
| print("train args: ", args_opt, "\ncfg: ", config_gpu) | |||
| init('nccl') | |||
| context.set_auto_parallel_context(parallel_mode="data_parallel", | |||
| mirror_mean=True, | |||
| device_num=get_group_size()) | |||
| # define net | |||
| net = mobilenet_v2(num_classes=config_gpu.num_classes, platform="GPU") | |||
| # define loss | |||
| @@ -168,12 +173,12 @@ if __name__ == '__main__': | |||
| is_grad=False, sparse=True, reduction='mean') | |||
| # define dataset | |||
| epoch_size = config_gpu.epoch_size | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, | |||
| do_train=True, | |||
| config=config_gpu, | |||
| platform=args_opt.platform, | |||
| repeat_num=epoch_size, | |||
| batch_size=config_gpu.batch_size) | |||
| dataset = create_dataset_py(dataset_path=args_opt.dataset_path, | |||
| do_train=True, | |||
| config=config_gpu, | |||
| platform=args_opt.platform, | |||
| repeat_num=epoch_size, | |||
| batch_size=config_gpu.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| # resume | |||
| if args_opt.pre_trained: | |||
| @@ -216,23 +221,23 @@ if __name__ == '__main__': | |||
| init() | |||
| epoch_size = config_ascend.epoch_size | |||
| net = mobilenet_v2(num_classes=config_ascend.num_classes) | |||
| net = mobilenet_v2(num_classes=config_ascend.num_classes, platform="Ascend") | |||
| net.to_float(mstype.float16) | |||
| for _, cell in net.cells_and_names(): | |||
| if isinstance(cell, nn.Dense): | |||
| cell.to_float(mstype.float32) | |||
| if config_ascend.label_smooth > 0: | |||
| loss = CrossEntropyWithLabelSmooth( | |||
| smooth_factor=config_ascend.label_smooth, num_classes=config.num_classes) | |||
| smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes) | |||
| else: | |||
| loss = SoftmaxCrossEntropyWithLogits( | |||
| is_grad=False, sparse=True, reduction='mean') | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, | |||
| do_train=True, | |||
| config=config_ascend, | |||
| platform=args_opt.platform, | |||
| repeat_num=epoch_size, | |||
| batch_size=config_ascend.batch_size) | |||
| dataset = create_dataset_py(dataset_path=args_opt.dataset_path, | |||
| do_train=True, | |||
| config=config_ascend, | |||
| platform=args_opt.platform, | |||
| repeat_num=epoch_size, | |||
| batch_size=config_ascend.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| if args_opt.pre_trained: | |||
| param_dict = load_checkpoint(args_opt.pre_trained) | |||
| @@ -0,0 +1,101 @@ | |||
| # MobileNetV2 Description | |||
| MobileNetV2 is a significant improvement over MobileNetV1 and pushes the state of the art for mobile visual recognition including classification, object detection and semantic segmentation. | |||
| MobileNetV2 builds upon the ideas from MobileNetV1, using depthwise separable convolution as efficient building blocks. However, V2 introduces two new features to the architecture: 1) linear bottlenecks between the layers, and 2) shortcut connections between the bottlenecks1. | |||
| [Paper](https://arxiv.org/pdf/1801.04381) Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. | |||
| # Dataset | |||
| Dataset used: imagenet | |||
| - Dataset size: ~125G | |||
| - Train: 120G, 1281167 images: 1000 directories | |||
| - Test: 5G, 50000 images: images should be classified into 1000 directories firstly, just like train images | |||
| - Data format: RGB images. | |||
| - Note: Data will be processed in src/dataset.py | |||
| # Environment Requirements | |||
| - Hardware(Ascend) | |||
| - Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. | |||
| - Framework | |||
| - [MindSpore](http://10.90.67.50/mindspore/archive/20200506/OpenSource/me_vm_x86/) | |||
| - For more information, please check the resources below: | |||
| - [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html) | |||
| - [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html) | |||
| # Script description | |||
| ## Script and sample code | |||
| ```python | |||
| ├── mobilenetv2_quant | |||
| ├── Readme.md | |||
| ├── scripts | |||
| │ ├──run_train.sh | |||
| │ ├──run_eval.sh | |||
| ├── src | |||
| │ ├──config.py | |||
| │ ├──dataset.py | |||
| │ ├──luanch.py | |||
| │ ├──lr_generator.py | |||
| │ ├──mobilenetV2_quant.py | |||
| ├── train.py | |||
| ├── eval.py | |||
| ``` | |||
| ## Training process | |||
| ### Usage | |||
| - Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH] | |||
| ### Launch | |||
| ``` | |||
| # training example | |||
| Ascend: sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt | |||
| ``` | |||
| ### Result | |||
| Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log` like followings. | |||
| ``` | |||
| epoch: [ 0/200], step:[ 624/ 625], loss:[5.258/5.258], time:[140412.236], lr:[0.100] | |||
| epoch time: 140522.500, per step time: 224.836, avg loss: 5.258 | |||
| epoch: [ 1/200], step:[ 624/ 625], loss:[3.917/3.917], time:[138221.250], lr:[0.200] | |||
| epoch time: 138331.250, per step time: 221.330, avg loss: 3.917 | |||
| ``` | |||
| ## Eval process | |||
| ### Usage | |||
| - Ascend: sh run_infer.sh Ascend [DATASET_PATH] [CHECKPOINT_PATH] | |||
| ### Launch | |||
| ``` | |||
| # infer example | |||
| Ascend: sh run_infer.sh Ascend ~/imagenet/val/ ~/train/mobilenet-200_625.ckpt | |||
| ``` | |||
| > checkpoint can be produced in training process. | |||
| ### Result | |||
| Inference result will be stored in the example path, you can find result like the followings in `val.log`. | |||
| ``` | |||
| result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt | |||
| ``` | |||
| # ModelZoo Homepage | |||
| [Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo) | |||
| @@ -0,0 +1,63 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| eval. | |||
| """ | |||
| import os | |||
| import argparse | |||
| from mindspore import context | |||
| from mindspore import nn | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from src.mobilenetV2_quant import mobilenet_v2_quant | |||
| from src.dataset import create_dataset | |||
| from src.config import config_ascend | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| parser.add_argument('--platform', type=str, default=None, help='run platform') | |||
| args_opt = parser.parse_args() | |||
| if __name__ == '__main__': | |||
| config_platform = None | |||
| net = None | |||
| if args_opt.platform == "Ascend": | |||
| config_platform = config_ascend | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", | |||
| device_id=device_id, save_graphs=False) | |||
| net = mobilenet_v2_quant(num_classes=config_platform.num_classes) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| loss = nn.SoftmaxCrossEntropyWithLogits( | |||
| is_grad=False, sparse=True, reduction='mean') | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, | |||
| do_train=False, | |||
| config=config_platform, | |||
| platform=args_opt.platform, | |||
| batch_size=config_platform.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| if args_opt.checkpoint_path: | |||
| param_dict = load_checkpoint(args_opt.checkpoint_path) | |||
| load_param_into_net(net, param_dict) | |||
| net.set_train(False) | |||
| model = Model(net, loss_fn=loss, metrics={'acc'}) | |||
| res = model.eval(dataset) | |||
| print("result:", res, "ckpt=", args_opt.checkpoint_path) | |||
| @@ -0,0 +1,53 @@ | |||
| #!/usr/bin/env bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| if [ $# != 3 ] | |||
| then | |||
| echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]" | |||
| exit 1 | |||
| fi | |||
| # check dataset path | |||
| if [ ! -d $2 ] | |||
| then | |||
| echo "error: DATASET_PATH=$2 is not a directory" | |||
| exit 1 | |||
| fi | |||
| # check checkpoint file | |||
| if [ ! -f $3 ] | |||
| then | |||
| echo "error: CHECKPOINT_PATH=$3 is not a file" | |||
| exit 1 | |||
| fi | |||
| # set environment | |||
| BASEPATH=$(cd "`dirname $0`" || exit; pwd) | |||
| export DEVICE_ID=0 | |||
| export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| if [ -d "../eval" ]; | |||
| then | |||
| rm -rf ../eval | |||
| fi | |||
| mkdir ../eval | |||
| cd ../eval || exit | |||
| # launch | |||
| python ${BASEPATH}/../eval.py \ | |||
| --platform=$1 \ | |||
| --dataset_path=$2 \ | |||
| --checkpoint_path=$3 \ | |||
| &> infer.log & # dataset val folder path | |||
| @@ -0,0 +1,62 @@ | |||
| #!/usr/bin/env bash | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| run_ascend() | |||
| { | |||
| if [ $2 -lt 1 ] && [ $2 -gt 8 ] | |||
| then | |||
| echo "error: DEVICE_NUM=$2 is not in (1-8)" | |||
| exit 1 | |||
| fi | |||
| if [ ! -d $5 ] && [ ! -f $5 ] | |||
| then | |||
| echo "error: DATASET_PATH=$5 is not a directory or file" | |||
| exit 1 | |||
| fi | |||
| BASEPATH=$(cd "`dirname $0`" || exit; pwd) | |||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||
| if [ -d "../train" ]; | |||
| then | |||
| rm -rf ../train | |||
| fi | |||
| mkdir ../train | |||
| cd ../train || exit | |||
| python ${BASEPATH}/../src/launch.py \ | |||
| --nproc_per_node=$2 \ | |||
| --visible_devices=$4 \ | |||
| --server_id=$3 \ | |||
| --training_script=${BASEPATH}/../train.py \ | |||
| --dataset_path=$5 \ | |||
| --pre_trained=$6 \ | |||
| --platform=$1 &> train.log & # dataset train folder | |||
| } | |||
| if [ $# -gt 6 ] || [ $# -lt 4 ] | |||
| then | |||
| echo "Usage:\n \ | |||
| Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ | |||
| " | |||
| exit 1 | |||
| fi | |||
| if [ $1 = "Ascend" ] ; then | |||
| run_ascend "$@" | |||
| else | |||
| echo "not support platform" | |||
| fi; | |||
| @@ -0,0 +1,38 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| network config setting, will be used in train.py and eval.py | |||
| """ | |||
| from easydict import EasyDict as ed | |||
| config_ascend = ed({ | |||
| "num_classes": 1000, | |||
| "image_height": 224, | |||
| "image_width": 224, | |||
| "batch_size": 192, | |||
| "data_load_mode": "mindrecord", | |||
| "epoch_size": 60, | |||
| "start_epoch": 200, | |||
| "warmup_epochs": 1, | |||
| "lr": 0.3, | |||
| "momentum": 0.9, | |||
| "weight_decay": 4e-5, | |||
| "label_smooth": 0.1, | |||
| "loss_scale": 1024, | |||
| "save_checkpoint": True, | |||
| "save_checkpoint_epochs": 1, | |||
| "keep_checkpoint_max": 200, | |||
| "save_checkpoint_path": "./checkpoint", | |||
| }) | |||
| @@ -0,0 +1,156 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """ | |||
| create train or eval dataset. | |||
| """ | |||
| import os | |||
| from functools import partial | |||
| import mindspore.common.dtype as mstype | |||
| import mindspore.dataset.engine as de | |||
| import mindspore.dataset.transforms.vision.c_transforms as C | |||
| import mindspore.dataset.transforms.c_transforms as C2 | |||
| import mindspore.dataset.transforms.vision.py_transforms as P | |||
| from src.config import config_ascend | |||
| def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): | |||
| """ | |||
| create a train or eval dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| repeat_num(int): the repeat times of dataset. Default: 1. | |||
| batch_size(int): the batch size of dataset. Default: 32. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| if platform == "Ascend": | |||
| rank_size = int(os.getenv("RANK_SIZE")) | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| columns_list = ['image', 'label'] | |||
| if config_ascend.data_load_mode == "mindrecord": | |||
| load_func = partial(de.MindDataset, dataset_path, columns_list) | |||
| else: | |||
| load_func = partial(de.ImageFolderDatasetV2, dataset_path) | |||
| if do_train: | |||
| if rank_size == 1: | |||
| ds = load_func(num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| ds = load_func(num_parallel_workers=8, shuffle=True, | |||
| num_shards=rank_size, shard_id=rank_id) | |||
| else: | |||
| ds = load_func(num_parallel_workers=8, shuffle=False) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| resize_height = config.image_height | |||
| if do_train: | |||
| buffer_size = 20480 | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| # define map operations | |||
| decode_op = C.Decode() | |||
| resize_crop_decode_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) | |||
| horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) | |||
| resize_op = C.Resize(256) | |||
| center_crop = C.CenterCrop(resize_height) | |||
| normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], | |||
| std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) | |||
| change_swap_op = C.HWC2CHW() | |||
| if do_train: | |||
| trans = [resize_crop_decode_op, horizontal_flip_op, normalize_op, change_swap_op] | |||
| else: | |||
| trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op] | |||
| type_cast_op = C2.TypeCast(mstype.int32) | |||
| ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16) | |||
| ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| def create_dataset_py(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32): | |||
| """ | |||
| create a train or eval dataset | |||
| Args: | |||
| dataset_path(string): the path of dataset. | |||
| do_train(bool): whether dataset is used for train or eval. | |||
| repeat_num(int): the repeat times of dataset. Default: 1. | |||
| batch_size(int): the batch size of dataset. Default: 32. | |||
| Returns: | |||
| dataset | |||
| """ | |||
| if platform == "Ascend": | |||
| rank_size = int(os.getenv("RANK_SIZE")) | |||
| rank_id = int(os.getenv("RANK_ID")) | |||
| if do_train: | |||
| if rank_size == 1: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=rank_size, shard_id=rank_id) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| resize_height = config.image_height | |||
| if do_train: | |||
| buffer_size = 20480 | |||
| # apply shuffle operations | |||
| ds = ds.shuffle(buffer_size=buffer_size) | |||
| # define map operations | |||
| decode_op = P.Decode() | |||
| resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) | |||
| horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) | |||
| resize_op = P.Resize(256) | |||
| center_crop = P.CenterCrop(resize_height) | |||
| to_tensor = P.ToTensor() | |||
| normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) | |||
| if do_train: | |||
| trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op] | |||
| else: | |||
| trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] | |||
| compose = P.ComposeOp(trans) | |||
| ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True) | |||
| # apply batch operations | |||
| ds = ds.batch(batch_size, drop_remainder=True) | |||
| # apply dataset repeat operation | |||
| ds = ds.repeat(repeat_num) | |||
| return ds | |||
| @@ -0,0 +1,166 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """launch train script""" | |||
| import os | |||
| import sys | |||
| import json | |||
| import subprocess | |||
| import shutil | |||
| import platform | |||
| from argparse import ArgumentParser | |||
| def parse_args(): | |||
| """ | |||
| parse args . | |||
| Args: | |||
| Returns: | |||
| args. | |||
| Examples: | |||
| >>> parse_args() | |||
| """ | |||
| parser = ArgumentParser(description="mindspore distributed training launch " | |||
| "helper utilty that will spawn up " | |||
| "multiple distributed processes") | |||
| parser.add_argument("--nproc_per_node", type=int, default=1, | |||
| help="The number of processes to launch on each node, " | |||
| "for D training, this is recommended to be set " | |||
| "to the number of D in your system so that " | |||
| "each process can be bound to a single D.") | |||
| parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7", | |||
| help="will use the visible devices sequentially") | |||
| parser.add_argument("--server_id", type=str, default="", | |||
| help="server ip") | |||
| parser.add_argument("--training_script", type=str, | |||
| help="The full path to the single D training " | |||
| "program/script to be launched in parallel, " | |||
| "followed by all the arguments for the " | |||
| "training script") | |||
| # rest from the training program | |||
| args, unknown = parser.parse_known_args() | |||
| args.training_script_args = unknown | |||
| return args | |||
| def main(): | |||
| print("start", __file__) | |||
| args = parse_args() | |||
| print(args) | |||
| visible_devices = args.visible_devices.split(',') | |||
| assert os.path.isfile(args.training_script) | |||
| assert len(visible_devices) >= args.nproc_per_node | |||
| print('visible_devices:{}'.format(visible_devices)) | |||
| if not args.server_id: | |||
| print('pleaser input server ip!!!') | |||
| exit(0) | |||
| print('server_id:{}'.format(args.server_id)) | |||
| # construct hccn_table | |||
| hccn_configs = open('/etc/hccn.conf', 'r').readlines() | |||
| device_ips = {} | |||
| for hccn_item in hccn_configs: | |||
| hccn_item = hccn_item.strip() | |||
| if hccn_item.startswith('address_'): | |||
| device_id, device_ip = hccn_item.split('=') | |||
| device_id = device_id.split('_')[1] | |||
| device_ips[device_id] = device_ip | |||
| print('device_id:{}, device_ip:{}'.format(device_id, device_ip)) | |||
| hccn_table = {} | |||
| arch = platform.processor() | |||
| hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch] | |||
| hccn_table['chip_info'] = '910' | |||
| hccn_table['deploy_mode'] = 'lab' | |||
| hccn_table['group_count'] = '1' | |||
| hccn_table['group_list'] = [] | |||
| instance_list = [] | |||
| usable_dev = '' | |||
| for instance_id in range(args.nproc_per_node): | |||
| instance = {} | |||
| instance['devices'] = [] | |||
| device_id = visible_devices[instance_id] | |||
| device_ip = device_ips[device_id] | |||
| usable_dev += str(device_id) | |||
| instance['devices'].append({ | |||
| 'device_id': device_id, | |||
| 'device_ip': device_ip, | |||
| }) | |||
| instance['rank_id'] = str(instance_id) | |||
| instance['server_id'] = args.server_id | |||
| instance_list.append(instance) | |||
| hccn_table['group_list'].append({ | |||
| 'device_num': str(args.nproc_per_node), | |||
| 'server_num': '1', | |||
| 'group_name': '', | |||
| 'instance_count': str(args.nproc_per_node), | |||
| 'instance_list': instance_list, | |||
| }) | |||
| hccn_table['para_plane_nic_location'] = 'device' | |||
| hccn_table['para_plane_nic_name'] = [] | |||
| for instance_id in range(args.nproc_per_node): | |||
| eth_id = visible_devices[instance_id] | |||
| hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id)) | |||
| hccn_table['para_plane_nic_num'] = str(args.nproc_per_node) | |||
| hccn_table['status'] = 'completed' | |||
| # save hccn_table to file | |||
| table_path = os.getcwd() | |||
| if not os.path.exists(table_path): | |||
| os.mkdir(table_path) | |||
| table_fn = os.path.join(table_path, | |||
| 'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id)) | |||
| with open(table_fn, 'w') as table_fp: | |||
| json.dump(hccn_table, table_fp, indent=4) | |||
| sys.stdout.flush() | |||
| # spawn the processes | |||
| processes = [] | |||
| cmds = [] | |||
| log_files = [] | |||
| env = os.environ.copy() | |||
| env['RANK_SIZE'] = str(args.nproc_per_node) | |||
| cur_path = os.getcwd() | |||
| for rank_id in range(0, args.nproc_per_node): | |||
| os.chdir(cur_path) | |||
| device_id = visible_devices[rank_id] | |||
| device_dir = os.path.join(cur_path, 'device{}'.format(rank_id)) | |||
| env['RANK_ID'] = str(rank_id) | |||
| env['DEVICE_ID'] = str(device_id) | |||
| if args.nproc_per_node > 1: | |||
| env['MINDSPORE_HCCL_CONFIG_PATH'] = table_fn | |||
| env['RANK_TABLE_FILE'] = table_fn | |||
| if os.path.exists(device_dir): | |||
| shutil.rmtree(device_dir) | |||
| os.mkdir(device_dir) | |||
| os.chdir(device_dir) | |||
| cmd = [sys.executable, '-u'] | |||
| cmd.append(args.training_script) | |||
| cmd.extend(args.training_script_args) | |||
| log_file = open('{dir}/log{id}.log'.format(dir=device_dir, id=rank_id), 'w') | |||
| process = subprocess.Popen(cmd, stdout=log_file, stderr=log_file, env=env) | |||
| processes.append(process) | |||
| cmds.append(cmd) | |||
| log_files.append(log_file) | |||
| for process, cmd, log_file in zip(processes, cmds, log_files): | |||
| process.wait() | |||
| if process.returncode != 0: | |||
| raise subprocess.CalledProcessError(returncode=process, cmd=cmd) | |||
| log_file.close() | |||
| if __name__ == "__main__": | |||
| main() | |||
| @@ -0,0 +1,215 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """MobileNetV2 Quant model define""" | |||
| import mindspore.nn as nn | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops.operations import TensorAdd | |||
| __all__ = ['mobilenet_v2_quant'] | |||
| _ema_decay = 0.999 | |||
| _symmetric = True | |||
| _per_channel = True | |||
| def _make_divisible(v, divisor, min_value=None): | |||
| if min_value is None: | |||
| min_value = divisor | |||
| new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) | |||
| # Make sure that round down does not go down by more than 10%. | |||
| if new_v < 0.9 * v: | |||
| new_v += divisor | |||
| return new_v | |||
| class GlobalAvgPooling(nn.Cell): | |||
| """ | |||
| Global avg pooling definition. | |||
| Args: | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> GlobalAvgPooling() | |||
| """ | |||
| def __init__(self): | |||
| super(GlobalAvgPooling, self).__init__() | |||
| self.mean = P.ReduceMean(keep_dims=False) | |||
| def construct(self, x): | |||
| x = self.mean(x, (2, 3)) | |||
| return x | |||
| class ConvBNReLU(nn.Cell): | |||
| """ | |||
| Convolution/Depthwise fused with Batchnorm and ReLU block definition. | |||
| Args: | |||
| in_planes (int): Input channel. | |||
| out_planes (int): Output channel. | |||
| kernel_size (int): Input kernel size. | |||
| stride (int): Stride size for the first convolutional layer. Default: 1. | |||
| groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1. | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1) | |||
| """ | |||
| def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): | |||
| super(ConvBNReLU, self).__init__() | |||
| padding = (kernel_size - 1) // 2 | |||
| conv = nn.Conv2dBatchNormQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding, | |||
| group=groups, per_channel=_per_channel, symmetric=_symmetric) | |||
| layers = [conv, nn.ReLU()] | |||
| self.features = nn.SequentialCell(layers) | |||
| self.fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay, min_init=0) | |||
| def construct(self, x): | |||
| output = self.features(x) | |||
| output = self.fake(output) | |||
| return output | |||
| class InvertedResidual(nn.Cell): | |||
| """ | |||
| Mobilenetv2 residual block definition. | |||
| Args: | |||
| inp (int): Input channel. | |||
| oup (int): Output channel. | |||
| stride (int): Stride size for the first convolutional layer. Default: 1. | |||
| expand_ratio (int): expand ration of input channel | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> ResidualBlock(3, 256, 1, 1) | |||
| """ | |||
| def __init__(self, inp, oup, stride, expand_ratio): | |||
| super(InvertedResidual, self).__init__() | |||
| assert stride in [1, 2] | |||
| hidden_dim = int(round(inp * expand_ratio)) | |||
| self.use_res_connect = stride == 1 and inp == oup | |||
| layers = [] | |||
| if expand_ratio != 1: | |||
| layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) | |||
| layers.extend([ | |||
| # dw | |||
| ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), | |||
| # pw-linear | |||
| nn.Conv2dBatchNormQuant(hidden_dim, oup, kernel_size=1, stride=1, pad_mode='pad', padding=0, group=1, | |||
| per_channel=_per_channel, symmetric=_symmetric), | |||
| nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay) | |||
| ]) | |||
| self.conv = nn.SequentialCell(layers) | |||
| self.add = TensorAdd() | |||
| self.add_fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay) | |||
| def construct(self, x): | |||
| identity = x | |||
| x = self.conv(x) | |||
| if self.use_res_connect: | |||
| x = self.add(identity, x) | |||
| x = self.add_fake(x) | |||
| return x | |||
| class MobileNetV2Quant(nn.Cell): | |||
| """ | |||
| MobileNetV2Quant architecture. | |||
| Args: | |||
| class_num (Cell): number of classes. | |||
| width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1. | |||
| has_dropout (bool): Is dropout used. Default is false | |||
| inverted_residual_setting (list): Inverted residual settings. Default is None | |||
| round_nearest (list): Channel round to . Default is 8 | |||
| Returns: | |||
| Tensor, output tensor. | |||
| Examples: | |||
| >>> MobileNetV2Quant(num_classes=1000) | |||
| """ | |||
| def __init__(self, num_classes=1000, width_mult=1., | |||
| has_dropout=False, inverted_residual_setting=None, round_nearest=8): | |||
| super(MobileNetV2Quant, self).__init__() | |||
| block = InvertedResidual | |||
| input_channel = 32 | |||
| last_channel = 1280 | |||
| # setting of inverted residual blocks | |||
| self.cfgs = inverted_residual_setting | |||
| if inverted_residual_setting is None: | |||
| self.cfgs = [ | |||
| # t, c, n, s | |||
| [1, 16, 1, 1], | |||
| [6, 24, 2, 2], | |||
| [6, 32, 3, 2], | |||
| [6, 64, 4, 2], | |||
| [6, 96, 3, 1], | |||
| [6, 160, 3, 2], | |||
| [6, 320, 1, 1], | |||
| ] | |||
| # building first layer | |||
| input_channel = _make_divisible(input_channel * width_mult, round_nearest) | |||
| self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) | |||
| self.input_fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay) | |||
| features = [ConvBNReLU(3, input_channel, stride=2)] | |||
| # building inverted residual blocks | |||
| for t, c, n, s in self.cfgs: | |||
| output_channel = _make_divisible(c * width_mult, round_nearest) | |||
| for i in range(n): | |||
| stride = s if i == 0 else 1 | |||
| features.append(block(input_channel, output_channel, stride, expand_ratio=t)) | |||
| input_channel = output_channel | |||
| # building last several layers | |||
| features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1)) | |||
| # make it nn.CellList | |||
| self.features = nn.SequentialCell(features) | |||
| # mobilenet head | |||
| head = ([GlobalAvgPooling(), | |||
| nn.DenseQuant(self.out_channels, num_classes, has_bias=True, per_channel=_per_channel, | |||
| symmetric=_symmetric), | |||
| nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)] if not has_dropout else | |||
| [GlobalAvgPooling(), nn.Dropout(0.2), | |||
| nn.DenseQuant(self.out_channels, num_classes, has_bias=True, per_channel=_per_channel, | |||
| symmetric=_symmetric), | |||
| nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)]) | |||
| self.head = nn.SequentialCell(head) | |||
| def construct(self, x): | |||
| x = self.input_fake(x) | |||
| x = self.features(x) | |||
| x = self.head(x) | |||
| return x | |||
| def mobilenet_v2_quant(**kwargs): | |||
| """ | |||
| Constructs a MobileNet V2 model | |||
| """ | |||
| return MobileNetV2Quant(**kwargs) | |||
| @@ -0,0 +1,232 @@ | |||
| # Copyright 2020 Huawei Technologies Co., Ltd | |||
| # | |||
| # Licensed under the Apache License, Version 2.0 (the "License"); | |||
| # you may not use this file except in compliance with the License. | |||
| # You may obtain a copy of the License at | |||
| # | |||
| # http://www.apache.org/licenses/LICENSE-2.0 | |||
| # | |||
| # Unless required by applicable law or agreed to in writing, software | |||
| # distributed under the License is distributed on an "AS IS" BASIS, | |||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
| # See the License for the specific language governing permissions and | |||
| # limitations under the License. | |||
| # ============================================================================ | |||
| """train_imagenet.""" | |||
| import os | |||
| import time | |||
| import argparse | |||
| import random | |||
| import numpy as np | |||
| from mindspore import context | |||
| from mindspore import Tensor | |||
| from mindspore import nn | |||
| from mindspore.nn.optim.momentum import Momentum | |||
| from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits | |||
| from mindspore.nn.loss.loss import _Loss | |||
| from mindspore.ops import operations as P | |||
| from mindspore.ops import functional as F | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.train.model import Model, ParallelMode | |||
| from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback | |||
| from mindspore.train.serialization import load_checkpoint | |||
| from mindspore.communication.management import init | |||
| import mindspore.dataset.engine as de | |||
| from src.dataset import create_dataset | |||
| from src.lr_generator import get_lr | |||
| from src.config import config_ascend | |||
| from src.mobilenetV2_quant import mobilenet_v2_quant | |||
| random.seed(1) | |||
| np.random.seed(1) | |||
| de.config.set_seed(1) | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') | |||
| parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') | |||
| parser.add_argument('--platform', type=str, default=None, help='run platform') | |||
| args_opt = parser.parse_args() | |||
| if args_opt.platform == "Ascend": | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| rank_id = int(os.getenv('RANK_ID')) | |||
| rank_size = int(os.getenv('RANK_SIZE')) | |||
| run_distribute = rank_size > 1 | |||
| device_id = int(os.getenv('DEVICE_ID')) | |||
| context.set_context(mode=context.GRAPH_MODE, | |||
| device_target="Ascend", | |||
| device_id=device_id, save_graphs=False) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||
| class CrossEntropyWithLabelSmooth(_Loss): | |||
| """ | |||
| CrossEntropyWith LabelSmooth. | |||
| Args: | |||
| smooth_factor (float): smooth factor, default=0. | |||
| num_classes (int): num classes | |||
| Returns: | |||
| None. | |||
| Examples: | |||
| >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000) | |||
| """ | |||
| def __init__(self, smooth_factor=0., num_classes=1000): | |||
| super(CrossEntropyWithLabelSmooth, self).__init__() | |||
| self.onehot = P.OneHot() | |||
| self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) | |||
| self.off_value = Tensor(1.0 * smooth_factor / | |||
| (num_classes - 1), mstype.float32) | |||
| self.ce = nn.SoftmaxCrossEntropyWithLogits() | |||
| self.mean = P.ReduceMean(False) | |||
| self.cast = P.Cast() | |||
| def construct(self, logit, label): | |||
| one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], | |||
| self.on_value, self.off_value) | |||
| out_loss = self.ce(logit, one_hot_label) | |||
| out_loss = self.mean(out_loss, 0) | |||
| return out_loss | |||
| class Monitor(Callback): | |||
| """ | |||
| Monitor loss and time. | |||
| Args: | |||
| lr_init (numpy array): train lr | |||
| Returns: | |||
| None | |||
| Examples: | |||
| >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) | |||
| """ | |||
| def __init__(self, lr_init=None): | |||
| super(Monitor, self).__init__() | |||
| self.lr_init = lr_init | |||
| self.lr_init_len = len(lr_init) | |||
| def epoch_begin(self, run_context): | |||
| self.losses = [] | |||
| self.epoch_time = time.time() | |||
| def epoch_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| epoch_mseconds = (time.time() - self.epoch_time) * 1000 | |||
| per_step_mseconds = epoch_mseconds / cb_params.batch_num | |||
| print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, | |||
| per_step_mseconds, | |||
| np.mean(self.losses))) | |||
| def step_begin(self, run_context): | |||
| self.step_time = time.time() | |||
| def step_end(self, run_context): | |||
| cb_params = run_context.original_args() | |||
| step_mseconds = (time.time() - self.step_time) * 1000 | |||
| step_loss = cb_params.net_outputs | |||
| if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): | |||
| step_loss = step_loss[0] | |||
| if isinstance(step_loss, Tensor): | |||
| step_loss = np.mean(step_loss.asnumpy()) | |||
| self.losses.append(step_loss) | |||
| cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num | |||
| print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format( | |||
| cb_params.cur_epoch_num - | |||
| 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss, | |||
| np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1])) | |||
| def _load_param_into_net(ori_model, ckpt_param_dict): | |||
| """ | |||
| load fp32 model parameters to quantization model. | |||
| Args: | |||
| ori_model: quantization model | |||
| ckpt_param_dict: f32 param | |||
| Returns: | |||
| None | |||
| """ | |||
| iterable_dict = { | |||
| 'weight': iter([item for item in ckpt_param_dict.items() if item[0].endswith('weight')]), | |||
| 'bias': iter([item for item in ckpt_param_dict.items() if item[0].endswith('bias')]), | |||
| 'gamma': iter([item for item in ckpt_param_dict.items() if item[0].endswith('gamma')]), | |||
| 'beta': iter([item for item in ckpt_param_dict.items() if item[0].endswith('beta')]), | |||
| 'moving_mean': iter([item for item in ckpt_param_dict.items() if item[0].endswith('moving_mean')]), | |||
| 'moving_variance': iter( | |||
| [item for item in ckpt_param_dict.items() if item[0].endswith('moving_variance')]), | |||
| 'minq': iter([item for item in ckpt_param_dict.items() if item[0].endswith('minq')]), | |||
| 'maxq': iter([item for item in ckpt_param_dict.items() if item[0].endswith('maxq')]) | |||
| } | |||
| for name, param in ori_model.parameters_and_names(): | |||
| key_name = name.split(".")[-1] | |||
| if key_name not in iterable_dict.keys(): | |||
| continue | |||
| value_param = next(iterable_dict[key_name], None) | |||
| if value_param is not None: | |||
| param.set_parameter_data(value_param[1].data) | |||
| print(f'init model param {name} with checkpoint param {value_param[0]}') | |||
| if __name__ == '__main__': | |||
| # train on ascend | |||
| print("train args: ", args_opt, "\ncfg: ", config_ascend, | |||
| "\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size)) | |||
| if run_distribute: | |||
| context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL, | |||
| parameter_broadcast=True, mirror_mean=True) | |||
| init() | |||
| epoch_size = config_ascend.epoch_size | |||
| net = mobilenet_v2_quant(num_classes=config_ascend.num_classes) | |||
| if config_ascend.label_smooth > 0: | |||
| loss = CrossEntropyWithLabelSmooth( | |||
| smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes) | |||
| else: | |||
| loss = SoftmaxCrossEntropyWithLogits( | |||
| is_grad=False, sparse=True, reduction='mean') | |||
| dataset = create_dataset(dataset_path=args_opt.dataset_path, | |||
| do_train=True, | |||
| config=config_ascend, | |||
| platform=args_opt.platform, | |||
| repeat_num=epoch_size, | |||
| batch_size=config_ascend.batch_size) | |||
| step_size = dataset.get_dataset_size() | |||
| if args_opt.pre_trained: | |||
| param_dict = load_checkpoint(args_opt.pre_trained) | |||
| _load_param_into_net(net, param_dict) | |||
| lr = Tensor(get_lr(global_step=config_ascend.start_epoch * step_size, | |||
| lr_init=0, | |||
| lr_end=0, | |||
| lr_max=config_ascend.lr, | |||
| warmup_epochs=config_ascend.warmup_epochs, | |||
| total_epochs=epoch_size + config_ascend.start_epoch, | |||
| steps_per_epoch=step_size)) | |||
| opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_ascend.momentum, | |||
| config_ascend.weight_decay) | |||
| model = Model(net, loss_fn=loss, optimizer=opt) | |||
| cb = None | |||
| if rank_id == 0: | |||
| cb = [Monitor(lr_init=lr.asnumpy())] | |||
| if config_ascend.save_checkpoint: | |||
| config_ck = CheckpointConfig(save_checkpoint_steps=config_ascend.save_checkpoint_epochs * step_size, | |||
| keep_checkpoint_max=config_ascend.keep_checkpoint_max) | |||
| ckpt_cb = ModelCheckpoint( | |||
| prefix="mobilenet", directory=config_ascend.save_checkpoint_path, config=config_ck) | |||
| cb += [ckpt_cb] | |||
| model.train(epoch_size, dataset, callbacks=cb) | |||
| @@ -13,7 +13,7 @@ The overall network architecture of MobileNetV3 is show below: | |||
| # Dataset | |||
| Dataset used: [imagenet](http://www.image-net.org/) | |||
| Dataset used: imagenet | |||
| - Dataset size: ~125G, 1.2W colorful images in 1000 classes | |||
| - Train: 120G, 1.2W images | |||
| @@ -67,8 +67,8 @@ Dataset used: [imagenet](http://www.image-net.org/) | |||
| ``` | |||
| # training example | |||
| Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/ | |||
| GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/ | |||
| Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/ mobilenet_199.ckpt | |||
| GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/ mobilenet_199.ckpt | |||
| ``` | |||
| ### Result | |||
| @@ -133,7 +133,7 @@ result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625. | |||
| #### Inference Performance | |||
| | Parameters | GoogLeNet | | | | |||
| | Parameters | | | | | |||
| | -------------------------- | ----------------------------- | ------------------------- | -------------------- | | |||
| | Model Version | V1 | | | | |||
| | Resource | Huawei 910 | NV SMX2 V100-32G | Huawei 310 | | |||
| @@ -22,9 +22,9 @@ from mindspore import nn | |||
| from mindspore.train.model import Model | |||
| from mindspore.train.serialization import load_checkpoint, load_param_into_net | |||
| from mindspore.common import dtype as mstype | |||
| from mindspore.model_zoo.mobilenetV3 import mobilenet_v3_large | |||
| from src.dataset import create_dataset | |||
| from src.config import config_ascend, config_gpu | |||
| from src.mobilenetV2 import mobilenet_v2 | |||
| parser = argparse.ArgumentParser(description='Image classification') | |||
| @@ -50,7 +50,7 @@ if __name__ == '__main__': | |||
| loss = nn.SoftmaxCrossEntropyWithLogits( | |||
| is_grad=False, sparse=True, reduction='mean') | |||
| net = mobilenet_v2(num_classes=config_platform.num_classes) | |||
| net = mobilenet_v3_large(num_classes=config_platform.num_classes) | |||
| if args_opt.platform == "Ascend": | |||
| net.to_float(mstype.float16) | |||
| @@ -42,14 +42,14 @@ export RANK_ID=0 | |||
| export RANK_SIZE=1 | |||
| if [ -d "eval" ]; | |||
| then | |||
| rm -rf ./eval | |||
| rm -rf ../eval | |||
| fi | |||
| mkdir ./eval | |||
| cd ./eval || exit | |||
| mkdir ../eval | |||
| cd ../eval || exit | |||
| # luanch | |||
| python ${BASEPATH}/eval.py \ | |||
| python ${BASEPATH}/../eval.py \ | |||
| --platform=$1 \ | |||
| --dataset_path=$2 \ | |||
| --checkpoint_path=$3 \ | |||
| &> infer.log & # dataset val folder path | |||
| &> ../infer.log & # dataset val folder path | |||
| @@ -31,17 +31,18 @@ run_ascend() | |||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||
| if [ -d "train" ]; | |||
| then | |||
| rm -rf ./train | |||
| rm -rf ../train | |||
| fi | |||
| mkdir ./train | |||
| cd ./train || exit | |||
| python ${BASEPATH}/launch.py \ | |||
| mkdir ../train | |||
| cd ../train || exit | |||
| python ${BASEPATH}/../src/launch.py \ | |||
| --nproc_per_node=$2 \ | |||
| --visible_devices=$4 \ | |||
| --server_id=$3 \ | |||
| --training_script=${BASEPATH}/train.py \ | |||
| --training_script=${BASEPATH}/../train.py \ | |||
| --dataset_path=$5 \ | |||
| --platform=$1 &> train.log & # dataset train folder | |||
| --pre_trained=$6 \ | |||
| --platform=$1 &> ../train.log & # dataset train folder | |||
| } | |||
| run_gpu() | |||
| @@ -62,24 +63,25 @@ run_gpu() | |||
| export PYTHONPATH=${BASEPATH}:$PYTHONPATH | |||
| if [ -d "train" ]; | |||
| then | |||
| rm -rf ./train | |||
| rm -rf ../train | |||
| fi | |||
| mkdir ./train | |||
| cd ./train || exit | |||
| mkdir ../train | |||
| cd ../train || exit | |||
| export CUDA_VISIBLE_DEVICES="$3" | |||
| mpirun -n $2 --allow-run-as-root \ | |||
| python ${BASEPATH}/train.py \ | |||
| python ${BASEPATH}/../train.py \ | |||
| --dataset_path=$4 \ | |||
| --platform=$1 \ | |||
| &> train.log & # dataset train folder | |||
| --pre_trained=$5 \ | |||
| &> ../train.log & # dataset train folder | |||
| } | |||
| if [ $# -gt 5 ] || [ $# -lt 4 ] | |||
| if [ $# -gt 6 ] || [ $# -lt 4 ] | |||
| then | |||
| echo "Usage:\n \ | |||
| Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \ | |||
| GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \ | |||
| Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ | |||
| GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \ | |||
| " | |||
| exit 1 | |||
| fi | |||
| @@ -44,7 +44,12 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=rank_size, shard_id=rank_id) | |||
| elif platform == "GPU": | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| if do_train: | |||
| from mindspore.communication.management import get_rank, get_group_size | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True, | |||
| num_shards=get_group_size(), shard_id=get_rank()) | |||
| else: | |||
| ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True) | |||
| else: | |||
| raise ValueError("Unsupport platform.") | |||