Compare commits

...

272 Commits
master ... r0.3

Author SHA1 Message Date
  mindspore-ci-bot ad0a705489 !2952 udpate release notes for r0.3.1 5 years ago
  jonyguo d3fbc1523b update r0.3.1 release notes 5 years ago
  mindspore-ci-bot 1c9ed09fd0 !2812 Remove submodule akg. 5 years ago
  mindspore-ci-bot 1ccedcde65 !2826 add libtiff notice info to r0.3 5 years ago
  xulei2020 921e7de987 add libtiff notice to r0.3 5 years ago
  Tron Zhang d5d9e92336 remove submodule akg 5 years ago
  mindspore-ci-bot 9343746ef7 !2607 Modify long description format of whl package 5 years ago
  zhoufeng e2593466fc Modify long description format of whl package 5 years ago
  mindspore-ci-bot 2e5a76e0df !2606 Update version to 0.3.1 5 years ago
  zhoufeng 067b619034 Update version to 0.3.1 5 years ago
  mindspore-ci-bot 5e5c66e300 !2540 Move LayerNormGrad split pass ahead of kernel select 5 years ago
  mindspore-ci-bot d8969d243e !2568 update run_train.sh of mobilenetv2_quant && resnet50_quant 5 years ago
  wandongdong f03e88c26f update run_train.sh 5 years ago
  mindspore-ci-bot 8c30045178 !2553 add mindrecord to mobilenetv2_quant && resnet50_quant 5 years ago
  mindspore-ci-bot 095d86e16f !2556 fix: change field name from 'data' to 'image' - sync 5 years ago
  jonyguo 862bc22b38 fix: change field name from data to image in mindrecord for imagenet 5 years ago
  wandongdong a6590d1866 add mindrecord 5 years ago
  mindspore-ci-bot 3f0a350d68 !2510 THOR ops modified 5 years ago
  huanghui e936d5cd4b place layernormgrad split pass before kernel select 5 years ago
  mindspore-ci-bot 46700bec69 !2500 add output activation quant in mobilenetv2 and resnet50 5 years ago
  zongha 0920094f81 fine img2col_impl 5 years ago
  chenzupeng bf0673003b add dense quant activation fake 5 years ago
  mindspore-ci-bot 3a40ac6521 !2435 fix perchannel num_channels not set bug and adjust quant.py params order 5 years ago
  wangdongxu f110c7616b fix perchannel num_channels not set bug and adjust quant.py params order 5 years ago
  mindspore-ci-bot 3e3cbbba0f !2447 asyn save checkpoint to file 5 years ago
  mindspore-ci-bot e368d0524b !2455 add perchannel quant train 5 years ago
  changzherui 966f05231d asyn save checkpoint to file merge to r0.3 5 years ago
  chenzupeng e9ee59c7ad add perchannel quant train 5 years ago
  mindspore-ci-bot 4bbd4414c4 !1734 deal with resnet50_THOR train print many ERROR&WARNING log and produce many ir file 5 years ago
  mindspore-ci-bot cf7c60a5ed !2431 update README 5 years ago
  panfengfeng 87cc57d3aa update readme 5 years ago
  mindspore-ci-bot 2d35511d7c !2423 Adapt module akg's change 5 years ago
  mindspore-ci-bot fded8732ea !2422 reshuffle all data and shard again whe use MindDataset distribute 5 years ago
  tronzhang 109a21d520 Adapt change of module akg 5 years ago
  jonyguo 0f380b559e enhance: add full reshuffle per epoch and fix: random_device failed 5 years ago
  mindspore-ci-bot e519317622 !2407 change Q.BNTrainingReduce() to P.BNTrainingReduce() 5 years ago
  chenzomi 2fab2492bc change Q.BNTrainingReduce() to P.BNTrainingReduce() 5 years ago
  mindspore-ci-bot 11caa3aec8 !2340 fix random_crop_resize_2 5 years ago
  panfengfeng 25827a8619 fix random_crop_and_resize 5 years ago
  mindspore-ci-bot 91c856e5ee !2334 remove dataset send from data exec for r0.3 5 years ago
  wangnan39@huawei.com 20049bbea6 send data after model init 5 years ago
  mindspore-ci-bot cb6211f25d !2291 remove _quant_op.py from __init__.py 5 years ago
  mindspore-ci-bot 24d61337c0 !2302 improve summary performance 5 years ago
  Margaret_wangrui 69b32e4dca improve summary performance 5 years ago
  chenzomi 9be52e0a1b remove _quant_op.py from __init__.py 5 years ago
  mindspore-ci-bot 53d7e622f9 !2261 modify log level from warning to info 5 years ago
  mindspore-ci-bot 1127ace7ec !2228 cache get_dataset_size value 5 years ago
  mindspore-ci-bot ab39708929 !2099 fix summary nodes memory reuse refcount 5 years ago
  mindspore-ci-bot 147d0cde07 !2277 fix arithmetic simplify 5 years ago
  Xian Weizhao 9c70861343 fix arithmetic simplify 5 years ago
  jjfeing e78e819b7c modify log level from warning to info 5 years ago
  mindspore-ci-bot f3bb991ce9 !2232 split correction_mul ops 5 years ago
  mindspore-ci-bot dcb90588b0 !2248 bind summary nodes to KernelGraph in order to memory reuse 5 years ago
  wandongdong c742384a39 split correction_mul op 5 years ago
  yanghaitao1 038040750d store get dataset size 5 years ago
  mindspore-ci-bot 476671b1cf !2196 fix log level too high: conversion of const tensor is normal 5 years ago
  mindspore-ci-bot c749f513ac !2195 【r0.3 branch】fix FakeQuantPerLayer/FakeQuantPerLayerGrad symmetric=True calculation error bug 5 years ago
  王东旭 7995189c72 fix FakeQuantPerLayer/FakeQuantPerLayerGrad symmetric bug and remove BNTrainingReduceGrad/BNTrainingUpdateGrad 5 years ago
  Margaret_wangrui 6f5303f0d9 bind summary nodes to KernelGraph in order to memory reuse 5 years ago
  tronzhang ac7197d33e fix log level: const tensor conversion is normal 5 years ago
  laiyongqiang 8d0691aaf9 fix summary nodes memory reuse refcount 5 years ago
  mindspore-ci-bot 1e90e7be05 !2172 fix some info 5 years ago
  jonyguo 5e2953247f fix: verify info 5 years ago
  mindspore-ci-bot ff500c678e !2122 add set_dataset_size for MindDataset 5 years ago
  jonyguo 488b74e92f 1. add set_dataset_size for MindDataset 2. modify parameter dupe_factor from 5 to 10 5 years ago
  mindspore-ci-bot fba21459a7 !2115 change readme.md 5 years ago
  mindspore-ci-bot 6d04e1a8e5 !2115 change readme.md 5 years ago
  chenzomi d6bd690d34 change readme.md 5 years ago
  mindspore-ci-bot 9fc00ca521 !2031 add sync bewteen hcom 5 years ago
  mindspore-ci-bot 7c77bb8782 !2104 change mobilenet V2 readme.md 5 years ago
  chenzomi 077d21f055 change mobilenet V2 readme. 5 years ago
  mindspore-ci-bot da9530f7f7 !2090 resnet quant dataset aug change 5 years ago
  panfengfeng 690db9a515 resnet_quant data aug change 5 years ago
  mindspore-ci-bot 653519630a !2079 Feat(GraphKernel): Init GraphKernel. 5 years ago
  mindspore-ci-bot 3c2f4df87c !2087 data aug changes from c to py 5 years ago
  mindspore-ci-bot 62fae9befa !2082 MindDataset with padded mode print reshuffle error info 5 years ago
  panfengfeng e20d687e7a using py_transform for data aug. 5 years ago
  mindspore-ci-bot 23d103a122 !2085 remove unused code in quant train 5 years ago
  chenzupeng 52a90f2587 remove unused code in quant train 5 years ago
  mindspore-ci-bot e21a0aad69 !2073 add resnet50 quant model 5 years ago
  gong chen 13a2d6d49e Init GraphKernel. 5 years ago
  jonyguo f3ebc7319c fix: MindDataset padded log error 5 years ago
  wandongdong df65f16812 add resnet50_quant 5 years ago
  mindspore-ci-bot dc9a51aad5 !2070 adapt quantization aware train for r0.3 5 years ago
  chenzupeng cc497424fc adapt for mobilenetV2 quantization awared train in r0.3 5 years ago
  mindspore-ci-bot b3f09b1d45 !1995 remove the useless transdata and cast connected with control depend 5 years ago
  mindspore-ci-bot f05da3aae9 !1948 fix resnet50 distribute bug 5 years ago
  WilliamLian 0ac5911910 remove the useless transdata and cast connected with control depend 5 years ago
  mindspore-ci-bot fb65a1a929 !2049 update mobilenetv2 scripts 5 years ago
  mindspore-ci-bot 7d965477a1 !2041 add mobilenetC2 quant 5 years ago
  panfengfeng 68c3c73fab update mobilenetV2 dataset codes 5 years ago
  mindspore-ci-bot 7ffcc606c9 !2035 add example for zhwiki, CLUERNER2020 and enwiki to mindrecord 5 years ago
  mindspore-ci-bot aa4c4f51ac !2025 fix remove reshape pair pass 5 years ago
  mindspore-ci-bot 854e16f0f8 !2033 fix mindrecord seekg failed 5 years ago
  chenzomi 60dc921186 add mobilenetC2 quant 5 years ago
  jonyguo 16e9da5ae5 enhance: add example for zhwiki, CLUERNER2020 and enwiki to mindrecord 5 years ago
  jonyguo a48a97208b fix: mindrecord seekg failed when shift raw page 5 years ago
  liubuyu e3145f18b0 fix remove reshape pair pass 5 years ago
  gukecai c4abebafcc add sync bewteen hcom 5 years ago
  mindspore-ci-bot 0e4fab2368 !2011 fake quant debug 5 years ago
  chenzomi 5a26546b56 fake quant debug 5 years ago
  mindspore-ci-bot a40e9e6fae !2001 fix MindDataset distribute shuffle error 5 years ago
  jonyguo 07f7d1ae62 fix: MindDataset distribute shuffle bug 5 years ago
  mindspore-ci-bot 9944abe99d !1963 bug fix in fake quant training in r0.3 5 years ago
  chenzomi bb58ea35b9 bug fix in fake quant training in r0.3 5 years ago
  mindspore-ci-bot eaaacfea4c !1941 Add order function in group params in r0.3 5 years ago
  mindspore-ci-bot 676e717edf !1952 use VisitKernelWithReturnType instead of VisitKernel to get node's input in mem_reuse 5 years ago
  laiyongqiang 9bdf017379 use VisitKernelWithReturnType instead of VisitKernel to get node's input 5 years ago
  zhaoting b37184050f fix resnet50 distribute bug 5 years ago
  zhaoting 4d92e2b579 Revert "Revert "add pattern AdjustAllReduceMulAdduse the old opadd test case for bugtemp fix try"" 5 years ago
  mindspore-ci-bot ba125f9673 !1925 bug fix in fake quant 5 years ago
  chenzomi e0fa277a05 fix bug in fake quant grad 5 years ago
  mindspore-ci-bot eac1f93ee4 !1889 add dropout special kernel selected rules 5 years ago
  mindspore-ci-bot 40e1e3843f !1894 fix lars weight decay computation error 5 years ago
  Ziyan fdb2a915b9 fix weight decay in lars 5 years ago
  WilliamLian 159119cb2a add dropout special kernel selected rules 5 years ago
  guohongzilong f213c3a6ad add order function in group params 5 years ago
  mindspore-ci-bot 1f34378b9c !1837 [MD] support padding samples in minddataset 5 years ago
  liyong d915d46d79 pad samples in mindrecord 5 years ago
  mindspore-ci-bot 6ce8a4ab20 !1836 update register info of BiasAddGrad and modify adam optimizer&softmax_grad to match fusion rules 5 years ago
  shibeiji 188c9feca4 update register info of BiasAddGrad and modify adam optimizer&softmax_grad to match fusion rules 5 years ago
  mindspore-ci-bot 29deeca343 !1818 Add SoftmaxGradExt fusion pass from master to r0.3 5 years ago
  huanghui cc582f5e30 add SoftmaxGradExt fusion pass 5 years ago
  mindspore-ci-bot bbdc44a0cc !1646 reorder independent nodes for stream parallel 5 years ago
  mindspore-ci-bot c8d31b0889 !1754 Add 5 patterns for AdamApplyOneWithDecay fusion pass 5 years ago
  mindspore-ci-bot 3baa52717f !1795 fix compile bugs in mobilenetv2 quant aware training for r0.3 5 years ago
  wandongdong 5485976f61 fix compile bugs for quant 5 years ago
  mindspore-ci-bot 2109bb68b3 !1756 modify widedeep 5 years ago
  mindspore-ci-bot 07166d11af !1751 fixed SoftmaxGradExt 5 years ago
  huanghui 05afc22ffa add newly 5 patterns for AdamApplyOneWithDecayRule fusion pass 5 years ago
  wukesong bb4b06946f modify widedeep 5 years ago
  jiangjinsheng 022d391e3c fixed SoftmaxGradExt 5 years ago
  mindspore-ci-bot 4cff81ee2d !1733 change some settings in SSD 5 years ago
  zhaoting ac12df82d2 change some settings in SSD 5 years ago
  mindspore-ci-bot 9cb129ac99 !1720 add reducemean's special kernel fileter rule 5 years ago
  mindspore-ci-bot 5adcbf6e23 !1727 move add graph manager to gpu session 5 years ago
  z00478463 491ba51b8b set save graphs False and add bprop for op cholesky trsm 5 years ago
  mindspore-ci-bot db6bb720df !1716 fix bug introduced by gpu support 5 years ago
  lizhenyu c0aa7602e0 move add graph manager to gpu session 5 years ago
  WilliamLian ba48964f2a add reduce mean kernel filter function 5 years ago
  mindspore-ci-bot 0a4a449e8f !1711 fix log1p 5 years ago
  gengdongjie 4f50cb3a9b fix bug introduced by gpu support 5 years ago
  mindspore-ci-bot 5d0cc35792 !1567 lstm&transpose_r0.3 5 years ago
  baihuawei b85c310ea1 add lstm & transpose 5 years ago
  jiangjinsheng a9de8012df fixed log1p 5 years ago
  mindspore-ci-bot d85262e03c !1686 update r0.3 relase notes 5 years ago
  jonyguo 22158fc703 update r0.3 release notes and install path 5 years ago
  mindspore-ci-bot e3a7f8f21c !1698 bugfix:get nullptr from graph manager 5 years ago
  lizhenyu df04230e13 fix get nullptr when use graph manager 5 years ago
  mindspore-ci-bot 20d26b17f8 !1684 dataset: repair get_sampler_size problem 5 years ago
  mindspore-ci-bot 85012ceedd !1677 TopK fusion pass bug fix 5 years ago
  mindspore-ci-bot 527f1d70ce !1680 fix resource release bug of memory swap 5 years ago
  mindspore-ci-bot cab2612c23 !1662 fix get_dataset_size error for GeneratorDataset 5 years ago
  linqingke a339fac777 topk bug fix 5 years ago
  mindspore-ci-bot 723c66bb66 !1683 modify dataset.py and add autp parallel split 5 years ago
  wanghua 298ff4adc1 modify dataset.py and add autp parallel split 5 years ago
  ms_yan 09fd47a256 repair get_sampler_size problem 5 years ago
  mindspore-ci-bot e0510928f1 !1676 GPU fix resnet script 5 years ago
  mindspore-ci-bot 9205271347 !1671 Add DeepLabV3 network 5 years ago
  lizhenyu 13bda4caf1 fix resource release bug of memory swap 5 years ago
  gukecai 8d68bd874e reorder independent nodes 5 years ago
  VectorSL 60dadd6d21 gpu fix resnet script 5 years ago
  mindspore-ci-bot 0e4574af6b !1656 fix bug for mobilenet in model_zoo 5 years ago
  mindspore-ci-bot 3ae2f8d12c !1664 revert parameter set kernel build info 5 years ago
  mindspore-ci-bot a74e238e21 !1664 revert parameter set kernel build info 5 years ago
  yangyongjie 0a97cb8acd add deeplabv3 to model zoo 5 years ago
  WilliamLian 344f2ef4df revert don't set parameter's format when it's has been setted before 5 years ago
  mindspore-ci-bot 6f3758f313 !1657 add readme 5 years ago
  yanghaitao 1187411af1 a 5 years ago
  mindspore-ci-bot f7acf0ed6f !1633 modify ssd script for merging backbone 5 years ago
  chenzomi e658eb7f24 bug fix 5 years ago
  mindspore-ci-bot 5cb99aadf5 !1645 ModelZoo WideDeep r0.3 5 years ago
  mindspore-ci-bot dc5b04846f !1566 sync lstm ops code from master to r0.3 5 years ago
  mindspore-ci-bot 72a166ff8c !1624 Remove WARNING log in pynative mode 5 years ago
  z00478463 894e329218 add the readme 5 years ago
  yao_yf 4ac88b6bcc modelzoo_widedeep_r0.3 5 years ago
  mindspore-ci-bot 67fecef6a8 !1651 GPU fix example scripts resnet r0.3 5 years ago
  mindspore-ci-bot c32c17bbad !1644 dataset: re-fix some format problem in take and split 5 years ago
  VectorSL a9db68db3a fix gpu resnet script 5 years ago
  chengxianbin 4fb3ab7882 modify ssd script for merging backbone 5 years ago
  mindspore-ci-bot b2f0135224 !1629 add cpu stridedslice 5 years ago
  ms_yan 5a1fba5103 repair api format problem 5 years ago
  mindspore-ci-bot 69dd996278 !1620 Add protection in cross entropy kernel. 5 years ago
  mindspore-ci-bot 9dd3c1f77d !1621 upload fasterrcnn scripts 5 years ago
  sunsuodong ba39d53c22 sync lstm ops code from master to r0.3 5 years ago
  mindspore-ci-bot b47847167d !1630 Add DeepFM scripts 5 years ago
  mindspore-ci-bot 41e179cc51 !1640 Fix lenet hang problem on windows 5 years ago
  mindspore-ci-bot 01d9ce3e5d !1622 change mobilenet file struct. 5 years ago
  xiefangqi 2c42665e90 fix lenet hang problem on windows 5 years ago
  mindspore-ci-bot 803a91596a !1614 LSTM network adapt to cpu target. 5 years ago
  yangyongjie 8f79f0cce8 add DeepFM 5 years ago
  kswang 808d5947d5 add cpu strided slice 5 years ago
  mindspore-ci-bot 9274daec9c !1610 fix subset random sampler error 5 years ago
  mindspore-ci-bot ce57e02db3 !1562 don't set parameter's format when it's has been setted before 5 years ago
  mindspore-ci-bot 07724c7080 !1608 add get_dataset_size for CelebADataset 5 years ago
  chenzomi 9853294aaa change mobilenet struct 5 years ago
  caifubi 6c491b8d3e Only release runtime resource in GRAPH_MODE 5 years ago
  meixiaowei 24fb17895a upload fasterrcnn scripts 5 years ago
  ZPaC 42641f17ab Add protection in cross entropy kernel. 5 years ago
  mindspore-ci-bot a8efea5c81 !1588 GPU upadate resnet50 script in example 5 years ago
  caojian05 600d052ac1 LSTM network adapt to cpu target. 5 years ago
  mindspore-ci-bot 6599cc1aca !1579 recitify pretrained path and revert AdjustAllReduceMulAdduse 5 years ago
  mindspore-ci-bot a14be2254b !1594 refine data copy in multi-graph 5 years ago
  mindspore-ci-bot 1289c3e4db !1592 bug fix while evaluation 5 years ago
  mindspore-ci-bot b70b2da675 !1582 add topk and randomchoicewithmask op data type for aicpu 5 years ago
  mindspore-ci-bot 02914ba0b9 !1581 fix flatten grad error with reshape 5 years ago
  WilliamLian 085d8f1233 don't set parameter's format when it's has been setted before 5 years ago
  yanghaitao a379c668f5 fix subsetrandomsampler 5 years ago
  mindspore-ci-bot 00a4e188b7 !1590 dataset: fix some format problem in take and split 5 years ago
  yanghaitao 415afe09f5 add get_dataset_size to celebadataset 5 years ago
  mindspore-ci-bot 94872b7678 !1570 Check the size of topk input names before converting input to attr 5 years ago
  mindspore-ci-bot 2f936166c9 !1575 VocDataset support split ops 5 years ago
  mindspore-ci-bot 76befd5703 !1577 fix reshape reshape case in auto parallel for r0.3 5 years ago
  mindspore-ci-bot 78909200ed !1589 fix bert performance 5 years ago
  lizhenyu 32dbbc1de2 refine data copy in multi-graph 5 years ago
  chenzomi 97610885d0 bug fix while evaluation 5 years ago
  ms_yan 27712eafaf repair some format problem in API 5 years ago
  chenhaozhe 04bc2a938e fix performance of bert 5 years ago
  VectorSL b5ce6c55a5 gpu update example resnet 5 years ago
  mindspore-ci-bot 63479f8e7c !1574 fix tfreadop hang 5 years ago
  zhaozhenlong 6cd15ea553 use reshape as flatten grad 5 years ago
  yanzhenxiang2020 d5af2f23b2 add topk and randomchoicewithmask data type for aicpu 5 years ago
  gengdongjie 217d801c12 bugfix for resnet50_imagenet pretrained_ckpt 5 years ago
  gengdongjie 135e90b135 Revert "add pattern AdjustAllReduceMulAdduse the old opadd test case for bugtemp fix try" 5 years ago
  mindspore-ci-bot 431bc8bf4b !1553 change hook function grad input to tuple 5 years ago
  mindspore-ci-bot 771a88d490 !1569 fix multi-graph run out of device resource 5 years ago
  mindspore-ci-bot b298c515a6 !1559 Voc dataset support split ops 5 years ago
  yao_yf dcb91b0ef6 fix reshape reshape case in auto parallel 5 years ago
  yanghaitao f30928f084 fix tfreaderop hang 5 years ago
  yujianfeng 1fb2cce274 Check the size of topk input names before converting input to attr 5 years ago
  caifubi f6ad679ef9 fix multi-graph device resource run out bug 5 years ago
  mindspore-ci-bot 0f22140331 !1548 [session]make manager for every graph 5 years ago
  kingfo a5e66e159e change hook grad input to tuple 5 years ago
  chenfei fce296eb38 make manager for every graph 5 years ago
  mindspore-ci-bot e5c45bd339 !1538 add custom tbe ops for quant aware training 5 years ago
  mindspore-ci-bot af1fde399b !1509 dataset: PR1457 fix 3 bug reports for split 5 years ago
  mindspore-ci-bot 1deb091c0f !1529 support tensor set item the number value type is similar as tensor dtype 5 years ago
  wandongdong 0a52fd052b add custom tbe ops for quant aware training 5 years ago
  mindspore-ci-bot cf20b3443c !1514 fix ssd run failed problem 5 years ago
  mindspore-ci-bot a0e552e75c !1524 fix compilation order 5 years ago
  mindspore-ci-bot ffdb11f548 !1526 Move graph_map_schema.py to example directory 5 years ago
  mindspore-ci-bot fac36e6a1a !1527 THOR ops master -> r0.3 5 years ago
  jjfeing 642761c2b1 adapte Second order optimization ops 5 years ago
  mindspore-ci-bot cfe87d9563 !1519 [Data]Updated UA, RandSharp and RandColor parameter check, Updated UA code and description. 5 years ago
  buxue 5ab32c33e4 support tensor set item the number value type is similar as tensor dtype not same 5 years ago
  mindspore-ci-bot e056799467 !1517 Add check for empty group parameters 5 years ago
  heleiwang 2be75b0c74 mv graph_map_schema.py to example 5 years ago
  panfengfeng d0d7864ccc fix compilation order 5 years ago
  Peilin Wang 9228384304 fixed bug for split, RandomSampler and some other cleanup 5 years ago
  mindspore-ci-bot bdd9aec368 !1463 Updated UA, RandSharp and RandColor parameter check, Updated UA code and description. 5 years ago
  guohongzilong 75c1e7f6af add check for group parameters 5 years ago
  mindspore-ci-bot c6f309e125 !1507 remove print 5 years ago
  chengxianbin c77ac8aa0b add mobilenet file for ssd net 5 years ago
  buxue c821cc1ebb remove print 5 years ago
  mindspore-ci-bot 6be8929f62 !1496 revert decoupled of 1313 5 years ago
  mindspore-ci-bot f51a745931 !1486 add train and eval script for LSTM 5 years ago
  c00499814 d3c848fc09 Revert "!1313 decoupled of insert transdata and deal ref and split transdata" 5 years ago
  mindspore-ci-bot 702fcbbe99 !1467 Pynative can not add cell hook 5 years ago
  mindspore-ci-bot 7013a9918a !1485 Fix fusion condition of transpose and reshape 5 years ago
  yujianfeng f95052fd65 Fix fusion condition of transpose and reshape 5 years ago
  caojian05 1fae83d746 add train and eval script for LSTM 5 years ago
  lvliang 11303142b1 pynative-cell-hook-grad-abnormal 5 years ago
  mindspore-ci-bot 5157063cbb !1470 Fix log and comment errors in graphdata 5 years ago
  mindspore-ci-bot d210fbb7e9 !1471 Fix input check in graphdata 5 years ago
  heleiwang b0a354830b fix input check 5 years ago
  heleiwang 0ca8daa1a2 fix log error 5 years ago
  mindspore-ci-bot 47039a6d98 !1449 fix kernel select 5 years ago
  mindspore-ci-bot 74cdb91151 !1458 remove old buffer fusion pass 5 years ago
  liubuyu 6f6fc75ba5 bug fix 5 years ago
  etone-chan 24e5387973 remove old buffer fusion pass 5 years ago
  mindspore-ci-bot 51a50e17b7 !1429 update version from 0.2 to 0.3 5 years ago
  jonyguo 3d6802007a update version from 0.2 to 0.3 5 years ago
100 changed files with 9294 additions and 113 deletions
Split View
  1. +3
    -0
      .gitmodules
  2. +1
    -1
      CMakeLists.txt
  3. +7
    -7
      README.md
  4. +85
    -0
      RELEASE.md
  5. +55
    -0
      Third_Party_Open_Source_Software_Notice
  6. +3
    -12
      build.sh
  7. +0
    -1
      cmake/options.cmake
  8. +67
    -0
      docker/mindspore-cpu/0.3.0-alpha/Dockerfile
  9. +83
    -0
      docker/mindspore-gpu/0.3.0-alpha/Dockerfile
  10. +1
    -1
      example/bert_clue/dataset.py
  11. +5
    -0
      example/bert_clue/run_pretrain.py
  12. +3
    -3
      example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py
  13. +132
    -0
      example/deepfm_criteo/README.md
  14. +14
    -0
      example/deepfm_criteo/__init__.py
  15. +66
    -0
      example/deepfm_criteo/eval.py
  16. +44
    -0
      example/deepfm_criteo/scripts/run_distribute_train.sh
  17. +32
    -0
      example/deepfm_criteo/scripts/run_eval.sh
  18. +34
    -0
      example/deepfm_criteo/scripts/run_standalone_train.sh
  19. +14
    -0
      example/deepfm_criteo/src/__init__.py
  20. +107
    -0
      example/deepfm_criteo/src/callback.py
  21. +62
    -0
      example/deepfm_criteo/src/config.py
  22. +299
    -0
      example/deepfm_criteo/src/dataset.py
  23. +370
    -0
      example/deepfm_criteo/src/deepfm.py
  24. +91
    -0
      example/deepfm_criteo/train.py
  25. +66
    -0
      example/deeplabv3_voc2012/README.md
  26. +53
    -0
      example/deeplabv3_voc2012/evaluation.py
  27. +66
    -0
      example/deeplabv3_voc2012/scripts/run_distribute_train.sh
  28. +32
    -0
      example/deeplabv3_voc2012/scripts/run_eval.sh
  29. +38
    -0
      example/deeplabv3_voc2012/scripts/run_standalone_train.sh
  30. +23
    -0
      example/deeplabv3_voc2012/src/__init__.py
  31. +21
    -0
      example/deeplabv3_voc2012/src/backbone/__init__.py
  32. +577
    -0
      example/deeplabv3_voc2012/src/backbone/resnet_deeplab.py
  33. +33
    -0
      example/deeplabv3_voc2012/src/config.py
  34. +457
    -0
      example/deeplabv3_voc2012/src/deeplabv3.py
  35. +84
    -0
      example/deeplabv3_voc2012/src/ei_dataset.py
  36. +63
    -0
      example/deeplabv3_voc2012/src/losses.py
  37. +115
    -0
      example/deeplabv3_voc2012/src/md_dataset.py
  38. +72
    -0
      example/deeplabv3_voc2012/src/miou_precision.py
  39. +14
    -0
      example/deeplabv3_voc2012/src/utils/__init__.py
  40. +67
    -0
      example/deeplabv3_voc2012/src/utils/adapter.py
  41. +148
    -0
      example/deeplabv3_voc2012/src/utils/custom_transforms.py
  42. +36
    -0
      example/deeplabv3_voc2012/src/utils/file_io.py
  43. +92
    -0
      example/deeplabv3_voc2012/train.py
  44. +142
    -0
      example/fasterrcnn_coco2017/README.md
  45. +130
    -0
      example/fasterrcnn_coco2017/eval.py
  46. +69
    -0
      example/fasterrcnn_coco2017/scripts/run_distribute_train.sh
  47. +65
    -0
      example/fasterrcnn_coco2017/scripts/run_eval.sh
  48. +57
    -0
      example/fasterrcnn_coco2017/scripts/run_standalone_train.sh
  49. +31
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/__init__.py
  50. +84
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/anchor_generator.py
  51. +164
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/bbox_assign_sample.py
  52. +195
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/bbox_assign_sample_stage2.py
  53. +425
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/faster_rcnn_r50.py
  54. +112
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/fpn_neck.py
  55. +199
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/proposal_generator.py
  56. +171
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/rcnn.py
  57. +248
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/resnet50.py
  58. +178
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/roi_align.py
  59. +311
    -0
      example/fasterrcnn_coco2017/src/FasterRcnn/rpn.py
  60. +158
    -0
      example/fasterrcnn_coco2017/src/config.py
  61. +441
    -0
      example/fasterrcnn_coco2017/src/dataset.py
  62. +42
    -0
      example/fasterrcnn_coco2017/src/lr_schedule.py
  63. +182
    -0
      example/fasterrcnn_coco2017/src/network_define.py
  64. +225
    -0
      example/fasterrcnn_coco2017/src/util.py
  65. +136
    -0
      example/fasterrcnn_coco2017/train.py
  66. +0
    -0
      example/graph_to_mindrecord/graph_map_schema.py
  67. +6
    -3
      example/graph_to_mindrecord/write_citeseer.sh
  68. +6
    -3
      example/graph_to_mindrecord/write_cora.sh
  69. +1
    -1
      example/graph_to_mindrecord/writer.py
  70. +100
    -0
      example/lstm_aclImdb/README.md
  71. +33
    -0
      example/lstm_aclImdb/config.py
  72. +92
    -0
      example/lstm_aclImdb/dataset.py
  73. +81
    -0
      example/lstm_aclImdb/eval.py
  74. +155
    -0
      example/lstm_aclImdb/imdb.py
  75. +83
    -0
      example/lstm_aclImdb/train.py
  76. +11
    -20
      example/mobilenetv2/Readme.md
  77. +11
    -8
      example/mobilenetv2/eval.py
  78. +1
    -1
      example/mobilenetv2/scripts/run_infer.sh
  79. +9
    -7
      example/mobilenetv2/scripts/run_train.sh
  80. +0
    -0
      example/mobilenetv2/src/config.py
  81. +160
    -0
      example/mobilenetv2/src/dataset.py
  82. +3
    -1
      example/mobilenetv2/src/launch.py
  83. +0
    -0
      example/mobilenetv2/src/lr_generator.py
  84. +23
    -18
      example/mobilenetv2/train.py
  85. +101
    -0
      example/mobilenetv2_quant/Readme.md
  86. +63
    -0
      example/mobilenetv2_quant/eval.py
  87. +53
    -0
      example/mobilenetv2_quant/scripts/run_infer.sh
  88. +62
    -0
      example/mobilenetv2_quant/scripts/run_train.sh
  89. +38
    -0
      example/mobilenetv2_quant/src/config.py
  90. +156
    -0
      example/mobilenetv2_quant/src/dataset.py
  91. +166
    -0
      example/mobilenetv2_quant/src/launch.py
  92. +0
    -0
      example/mobilenetv2_quant/src/lr_generator.py
  93. +215
    -0
      example/mobilenetv2_quant/src/mobilenetV2_quant.py
  94. +232
    -0
      example/mobilenetv2_quant/train.py
  95. +4
    -4
      example/mobilenetv3_imagenet/Readme.md
  96. +2
    -2
      example/mobilenetv3_imagenet/eval.py
  97. +5
    -5
      example/mobilenetv3_imagenet/scripts/run_infer.sh
  98. +16
    -14
      example/mobilenetv3_imagenet/scripts/run_train.sh
  99. +0
    -0
      example/mobilenetv3_imagenet/src/config.py
  100. +6
    -1
      example/mobilenetv3_imagenet/src/dataset.py

+ 3
- 0
.gitmodules View File

@@ -13,3 +13,6 @@
[submodule "graphengine"]
path = graphengine
url = https://gitee.com/mindspore/graphengine.git
[submodule "akg"]
path = akg
url = https://gitee.com/mindspore/akg.git

+ 1
- 1
CMakeLists.txt View File

@@ -89,4 +89,4 @@ if (ENABLE_TESTCASES)
add_subdirectory(tests)
endif()

include(cmake/package.cmake)
include(cmake/package.cmake)

+ 7
- 7
README.md View File

@@ -29,7 +29,7 @@ enrichment of the AI software/hardware application ecosystem.

<img src="docs/MindSpore-architecture.png" alt="MindSpore Architecture" width="600"/>

For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.2.0-alpha/architecture.html).
For more details please check out our [Architecture Guide](https://www.mindspore.cn/docs/en/0.3.0-alpha/architecture.html).

### Automatic Differentiation

@@ -76,7 +76,7 @@ For installation using `pip`, take `CPU` and `Ubuntu-x86` build version as an ex
1. Download whl from [MindSpore download page](https://www.mindspore.cn/versions/en), and install the package.

```
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.2.0-alpha/MindSpore/cpu/x86_ubuntu/mindspore-0.2.0-cp37-cp37m-linux_x86_64.whl
pip install https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/cpu/ubuntu_x86/mindspore-0.3.0-cp37-cp37m-linux_x86_64.whl
```

2. Run the following command to verify the install.
@@ -133,8 +133,8 @@ currently the containerized build options are supported as follows:

For `CPU` backend, you can directly pull and run the latest stable image using the below command:
```
docker pull mindspore/mindspore-cpu:0.2.0-alpha
docker run -it mindspore/mindspore-cpu:0.2.0-alpha /bin/bash
docker pull mindspore/mindspore-cpu:0.3.0-alpha
docker run -it mindspore/mindspore-cpu:0.3.0-alpha /bin/bash
```

* GPU
@@ -151,8 +151,8 @@ currently the containerized build options are supported as follows:

Then you can pull and run the latest stable image using the below command:
```
docker pull mindspore/mindspore-gpu:0.2.0-alpha
docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.2.0-alpha /bin/bash
docker pull mindspore/mindspore-gpu:0.3.0-alpha
docker run -it --runtime=nvidia --privileged=true mindspore/mindspore-gpu:0.3.0-alpha /bin/bash
```

To test if the docker image works, please execute the python code below and check the output:
@@ -187,7 +187,7 @@ please check out [docker](docker/README.md) repo for the details.

## Quickstart

See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.2.0-alpha/quick_start/quick_start.html)
See the [Quick Start](https://www.mindspore.cn/tutorial/en/0.3.0-alpha/quick_start/quick_start.html)
to implement the image classification.

## Docs


+ 85
- 0
RELEASE.md View File

@@ -1,3 +1,88 @@
# Release 0.3.1-alpha

## Major Features and Improvements

### Ascend 910 Training and Inference Framework
* Frontend and User Interface
* Independent model init interface.
* Data processing, augmentation, and save format
* Support sample padding for minddataset.

## Bugfixes
* Python API
* Fix bugs in the lars optimizer([!1894](https://gitee.com/mindspore/mindspore/pulls/1894))
* Data processing
* Fix accuracy problem of RandomCropDecodeResize ([!2340](https://gitee.com/mindspore/mindspore/pulls/2340))

# Release 0.3.0-alpha

## Major Features and Improvements

### Ascend 910 Training and Inference Framework
* New models
* DeepFM: a factorization-machine based neural network for CTR prediction on Criteo dataset.
* DeepLabV3: significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2007 semantic image segmentation benchmark.
* Faster-RCNN: towards real-time object detection with region proposal networks on COCO 2017 dataset.
* SSD: a single stage object detection methods on COCO 2017 dataset.
* GoogLeNet: a deep convolutional neural network architecture codenamed Inception V1 for classification and detection on CIFAR-10 dataset.
* Wide&Deep: jointly trained wide linear models and deep neural networks for recommender systems on Criteo dataset.
* Frontend and User Interface
* Complete numpy advanced indexing method. Supports value and assignment through tensor index.
* Some optimizers support separating parameter groups. Different parameter groups can set different `learning_rate` and `weight_decay`.
* Support setting submodule's logging level independently, e.g. you can set logging level of module `A` to warning and set logging level of module `B` to info.
* Support weights to be compiled according to shape to solve the problem of large memory overhead.
* Add some operators implement and grammar support in pynative mode. To be consistent with graph mode.
* User interfaces change log
* Learning rate and weight decay making group params([!637](https://gitee.com/mindspore/mindspore/pulls/637))
* Support weights to be compiled according to shape([!1015](https://gitee.com/mindspore/mindspore/pulls/1015))
* delete some context param([!1100](https://gitee.com/mindspore/mindspore/pulls/1100))
* ImageSummary/ScalarSummary/TensorSummary/HistogramSummary([!1329](https://gitee.com/mindspore/mindspore/pulls/1329))([!1425](https://gitee.com/mindspore/mindspore/pulls/1425))
* Executor and Performance Optimization
* Support doing evaluation while in training process, so that the accuracy of training can be easily obtained.
* Enable second-order optimization for resnet50, which can achieve 75.9% accuracy in 45 epochs (Resnet50 @ImageNet).
* Optimize pynative implementation and improve it's execution performance.
* Optimize summary record implementation and improve its performance.
* Data processing, augmentation, and save format
* Support simple text processing, such as tokenizer/buildvocab/lookup.
* Support padding batch.
* Support split or concat dataset.
* Support MindDataset reading from file list.

### Other Hardware Support
* GPU platform
* New models supported: MobileNetV2, MobileNetV3.
* Support mixed precision training.
* Support device memory swapping.

## Bugfixes
* Python API
* An exception to the broadcast input data type check([!712](https://gitee.com/mindspore/mindspore/pulls/712))
* Fix issues assignsub return value 0([!1036](https://gitee.com/mindspore/mindspore/pulls/1036))
* Fix issue Conv2dBackpropInput bprop should return 3 instead of 2 items([!1001](https://gitee.com/mindspore/mindspore/pulls/1001))
* Fix sens shape error of TrainOneStepWithLossScaleCell([!1050](https://gitee.com/mindspore/mindspore/pulls/1050))
* Fix BatchNormGrad operator([!1344](https://gitee.com/mindspore/mindspore/pulls/1344))
* Executor
* Fix dropout,topK and addn errors in PyNative mode ([!1285](https://gitee.com/mindspore/mindspore/pulls/1285), [!1138](https://gitee.com/mindspore/mindspore/pulls/1138), [!1033](https://gitee.com/mindspore/mindspore/pulls/1033)).
* Fix memory leaks after execution in PyNatvie mode ([!1201](https://gitee.com/mindspore/mindspore/pulls/1201)).
* Fix HCCL failure in some special scenes ([!1204](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1204), [!1252](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1252)).
* Fix SSD network when Select failed, cann't find kernel info([!1449](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1449)).
* Fix Topk operator selection strategy bug between aicore and aicpu([!1367](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/1367)).
* Fix input memory size of 'assign' op unequal in control sink mode when assigning a data from one child graph to another child graph([!802](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/802)).
* Fix allreduce ir inconsistency([!989](https://gitee.com/mindspore/dashboard/projects/mindspore/mindspore/pulls/989)).
* GPU platform
* Fix summary for gradient collection ([!1364](https://gitee.com/mindspore/mindspore/pulls/1364))
* Fix the slice operator ([!1489](https://gitee.com/mindspore/mindspore/pulls/1489))
* Data processing
* Fix memory problems of GeneratorDataset of sub-process ([!907](https://gitee.com/mindspore/mindspore/pulls/907))
* Fix getting data timeout when training the cifar10 dataset under the lenet([!1391](https://gitee.com/mindspore/mindspore/pulls/1391))

## Contributors
Thanks goes to these wonderful people:

Alexey Shevlyakov, Amir Lashkari, anthony, baihuawei, biffex, buxue, caifubi, candanzg, caojian05, Cathy Wong, changzherui, chenfei, chengxianbin, chenhaozhe, chenzomi, chujinjin, cristoval, dengwentao, eric, etone-chan, fary86, gaojing, gengdongjie, gongchen, guohongzilong, guozhijian, heleiwang, hesham, He Wei, Hoai Linh Tran, hongxing, huangdongrun, huanghui, Jamie Nisbet, Jesse Lee, jiangjinsheng, jiangzhiwen, jinyaohui, jjfeing, jonwe, jonyguo, Junhan Hu, Kang, kingfo, kswang, laiyongqiang, leopz, lichenever, lihongkang, limingqi107, liubuyu, liuliyan2, liuwenhao4, liuxiao, liuxiao, liyong, lizhenyu, lvliang, Margaret_wangrui, meixiaowei, ms_yan, Nat Sutyanyong, ougongchang, panfengfeng, panyifeng, Peilin Wang, peixu_ren, qianlong, rick_sanchez, seatea, sheng, shijianning, simson, sunsuodong, Tinazhang, VectorSL, wandongdong, wangcong, wanghua, wangnan39, Wei Luning, wenchunjiang, wilfChen, WilliamLian, wsc, wukesong, wuxuejian, Xiaoda Zhang, xiefangqi, xulei2020, Yang, yangjie159, yangruoqi713, yangyongjie, yangzhenzhang, Yanjun Peng, yanzhenxiang2020, yao_yf, Yi Huaijie, yoonlee666, yujianfeng, YuJianfeng, yvetteliu, zhangdengcheng, Zhang Qinghua, zhangz0911gm, zhaojichen, zhaoting, zhaozhenlong, zhoufeng, zhouneng, zhousiyi, zhouyuanshen, Zirui Wu, Ziyan, zjun, ZPaC, lihongzhang

Contributions of any kind are welcome!

# Release 0.2.0-alpha

## Major Features and Improvements


+ 55
- 0
Third_Party_Open_Source_Software_Notice View File

@@ -3053,6 +3053,61 @@ Copyright 2003 Google Inc.
Copyright 2009 Google Inc.
Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All

Software: libtiff 4.1.0
Copyright notice:
Copyright © 2015 Open Microscopy Environment / University of Dundee
Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 1990-1997 Sam Leffler
Copyright (c) 1991-1997 Silicon Graphics, Inc.
Copyright (c) 1988-1997 Sam Leffler
Copyright (c) 1991-1997 Sam Leffler
Use and Copyright
Copyright (C) 1990, 1995 Frank D. Cringle.
Copyright (c) 1994-1997 Sam Leffler
Copyright (c) 1994-1997 Silicon Graphics, Inc.
Copyright (c) 1997 Greg Ward Larson
Copyright (c) 1997 Silicon Graphics, Inc.
Copyright (c) 2010, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) Joris Van Damme <info@awaresystems.be>
Copyright (c) AWare Systems <http:www.awaresystems.be/>
Copyright (c) 1996-1997 Sam Leffler
Copyright (c) 1996 Pixar
Copyright (c) 1995-1997 Sam Leffler
Copyright (c) 1995-1997 Silicon Graphics, Inc.
Copyright (c) 1988-1996 Sam Leffler
Copyright (c) 1991-1996 Silicon Graphics, Inc.
Copyright (c) 1992-1997 Sam Leffler
Copyright (c) 1992-1997 Silicon Graphics, Inc.
Copyright (c) 2018, Mapbox
Copyright (c) 2017, Planet Labs
Copyright (c) 1990 by Sun Microsystems, Inc.
Copyright 1990 by Digital Equipment Corporation, Maynard, Massachusetts.
Copyright 1991 by Digital Equipment Corporation, Maynard, Massachusetts.
Copyright (c) 2002, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 2003 Ross Finlayson
Additions (c) Richard Nolde 2006-2010
Copyright (c) 2003, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 2000, Frank Warmerdam
Copyright (c) 1987, 1993, 1994
Copyright (c) 1989, 1993
Copyright (c) 2009 Frank Warmerdam
Copyright (c) 1987, 1993
Copyright (c) 2005 The DragonFly Project. All rights reserved.
Copyright (c) 2003 Citrus Project,
All rights reserved.
Copyright (c) 1990, 1993
Copyright (c) 1996 Mike Johnson
Copyright (c) 1996 BancTec AB
Copyright (c) 2004, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 2012, Frank Warmerdam <warmerdam@pobox.com>
Copyright (c) 2019, Even Rouault <even.rouault at spatialys.com>
Copyright (c) 2007, Frank Warmerdam <warmerdam@pobox.com>
Copyright (c) 2019, Thomas Bernard <miniupnp@free.fr>
Copyright (c) 2008, Andrey Kiselev <dron@ak4719.spb.edu>
Copyright (c) 1999, Frank Warmerdam
Copyright (c) 1991-1996 Sam Leffler
Copyright (c) 1996 USAF Phillips Laboratory

Software: opencv 4.2.0
Copyright notice:
Copyright (C) 2016, NVIDIA Corporation, all rights reserved.


+ 3
- 12
build.sh View File

@@ -25,7 +25,7 @@ usage()
echo "Usage:"
echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
echo " [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K]"
echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I]"
echo ""
echo "Options:"
echo " -d Debug mode"
@@ -52,7 +52,6 @@ usage()
echo " -M Enable MPI and NCCL for GPU training, default on"
echo " -V Specify the minimum required cuda version, default CUDA 9.2"
echo " -I Compile predict, default off"
echo " -K Compile with AKG, default off"
}

# check value of input is 'on' or 'off'
@@ -91,7 +90,6 @@ checkopts()
COMPILE_PREDICT="off"
USE_GLOG="on"
PREDICT_PLATFORM=""
ENABLE_AKG="off"

# Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K' opt
@@ -230,10 +228,6 @@ checkopts()
exit 1
fi
;;
K)
ENABLE_AKG="on"
echo "enable compile with akg"
;;
*)
echo "Unknown option ${opt}!"
usage
@@ -307,9 +301,6 @@ build_mindspore()
if [[ "X$USE_GLOG" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DUSE_GLOG=ON"
fi
if [[ "X$ENABLE_AKG" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_AKG=ON"
fi
echo "${CMAKE_ARGS}"
if [[ "X$INC_BUILD" = "Xoff" ]]; then
cmake ${CMAKE_ARGS} ../..
@@ -433,9 +424,9 @@ build_predict()

cd "${BASEPATH}/predict/output/"
if [[ "$PREDICT_PLATFORM" == "x86_64" ]]; then
tar -cf MSPredict-0.2.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
tar -cf MSPredict-0.3.0-linux_x86_64.tar.gz include/ lib/ --warning=no-file-changed
elif [[ "$PREDICT_PLATFORM" == "arm64" ]]; then
tar -cf MSPredict-0.2.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
tar -cf MSPredict-0.3.0-linux_aarch64.tar.gz include/ lib/ --warning=no-file-changed
fi
echo "success to build predict project!"
}


+ 0
- 1
cmake/options.cmake View File

@@ -16,7 +16,6 @@ option(ENABLE_DUMP_PROTO "Enable dump anf graph to file in ProtoBuffer format, d
option(ENABLE_DUMP_E2E "Enable dump e2e file, default on" OFF)
option(ENABLE_DUMP_IR "Enable dump funciton graph ir, default on" ON)
option(ENABLE_MPI "enable mpi" OFF)
option(ENABLE_AKG "enable akg" OFF)

if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
if (WIN32)


+ 67
- 0
docker/mindspore-cpu/0.3.0-alpha/Dockerfile View File

@@ -0,0 +1,67 @@
FROM ubuntu:18.04

MAINTAINER leonwanghui <leon.wanghui@huawei.com>

# Set env
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV PATH /usr/local/bin:$PATH

# Install base tools
RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
vim \
wget \
curl \
xz-utils \
net-tools \
openssh-client \
git \
ntpdate \
tzdata \
tcl \
sudo \
bash-completion

# Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \
g++ \
zlibc \
make \
libgmp-dev \
patch \
autoconf \
libtool \
automake \
flex

# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

# Install python (v3.7.5)
RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \
&& rm -f /usr/local/bin/pip \
&& ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
&& ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
&& rm -rf /tmp/cpython-3.7.5 \
&& rm -f /tmp/v3.7.5.tar.gz

# Set pip source
RUN mkdir -pv /root/.pip \
&& echo "[global]" > /root/.pip/pip.conf \
&& echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

# Install MindSpore cpu whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/cpu/ubuntu_x86/mindspore-0.3.0-cp37-cp37m-linux_x86_64.whl

+ 83
- 0
docker/mindspore-gpu/0.3.0-alpha/Dockerfile View File

@@ -0,0 +1,83 @@
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04

MAINTAINER leonwanghui <leon.wanghui@huawei.com>

# Set env
ENV PYTHON_ROOT_PATH /usr/local/python-3.7.5
ENV OMPI_ROOT_PATH /usr/local/openmpi-3.1.5
ENV PATH ${OMPI_ROOT_PATH}/bin:/usr/local/bin:$PATH
ENV LD_LIBRARY_PATH ${OMPI_ROOT_PATH}/lib:$LD_LIBRARY_PATH

# Install base tools
RUN apt update \
&& DEBIAN_FRONTEND=noninteractive apt install -y \
vim \
wget \
curl \
xz-utils \
net-tools \
openssh-client \
git \
ntpdate \
tzdata \
tcl \
sudo \
bash-completion

# Install compile tools
RUN DEBIAN_FRONTEND=noninteractive apt install -y \
gcc \
g++ \
zlibc \
make \
libgmp-dev \
patch \
autoconf \
libtool \
automake \
flex \
libnccl2=2.4.8-1+cuda10.1 \
libnccl-dev=2.4.8-1+cuda10.1

# Set bash
RUN echo "dash dash/sh boolean false" | debconf-set-selections
RUN DEBIAN_FRONTEND=noninteractive dpkg-reconfigure dash

# Install python (v3.7.5)
RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
libgdbm-dev libgdbm-compat-dev liblzma-dev libreadline-dev libsqlite3-dev \
&& cd /tmp \
&& wget https://github.com/python/cpython/archive/v3.7.5.tar.gz \
&& tar -xvf v3.7.5.tar.gz \
&& cd /tmp/cpython-3.7.5 \
&& mkdir -p ${PYTHON_ROOT_PATH} \
&& ./configure --prefix=${PYTHON_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -f /usr/local/bin/python \
&& rm -f /usr/local/bin/pip \
&& ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
&& ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
&& rm -rf /tmp/cpython-3.7.5 \
&& rm -f /tmp/v3.7.5.tar.gz

# Set pip source
RUN mkdir -pv /root/.pip \
&& echo "[global]" > /root/.pip/pip.conf \
&& echo "trusted-host=mirrors.aliyun.com" >> /root/.pip/pip.conf \
&& echo "index-url=http://mirrors.aliyun.com/pypi/simple/" >> /root/.pip/pip.conf

# Install openmpi (v3.1.5)
RUN cd /tmp \
&& wget https://download.open-mpi.org/release/open-mpi/v3.1/openmpi-3.1.5.tar.gz \
&& tar -xvf openmpi-3.1.5.tar.gz \
&& cd /tmp/openmpi-3.1.5 \
&& mkdir -p ${OMPI_ROOT_PATH} \
&& ./configure --prefix=${OMPI_ROOT_PATH} \
&& make -j4 \
&& make install -j4 \
&& rm -rf /tmp/openmpi-3.1.5 \
&& rm -f /tmp/openmpi-3.1.5.tar.gz

# Install MindSpore cuda-10.1 whl package
RUN pip install --no-cache-dir https://ms-release.obs.cn-north-4.myhuaweicloud.com/0.3.0-alpha/MindSpore/gpu/ubuntu_x86/cuda-10.1/mindspore_gpu-0.3.0-cp37-cp37m-linux_x86_64.whl

+ 1
- 1
example/bert_clue/dataset.py View File

@@ -52,7 +52,7 @@ def create_bert_dataset(epoch_size=1, device_num=1, rank=0, do_shuffle="true", e
ds = ds.map(input_columns="input_ids", operations=type_cast_op)
# apply batch operations
ds = ds.batch(bert_net_cfg.batch_size, drop_remainder=True)
ds = ds.repeat(repeat_count)
ds = ds.repeat(new_repeat_count)
logger.info("data size: {}".format(ds.get_dataset_size()))
logger.info("repeatcount: {}".format(ds.get_repeat_count()))
return ds, new_repeat_count

+ 5
- 0
example/bert_clue/run_pretrain.py View File

@@ -81,6 +81,11 @@ def run_pretrain():
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True,
device_num=device_num)
from mindspore.parallel._auto_parallel_context import auto_parallel_context
if bert_net_cfg.num_hidden_layers == 12:
auto_parallel_context().set_all_reduce_fusion_split_indices([28, 55, 82, 109, 136, 163, 190, 205])
elif bert_net_cfg.num_hidden_layers == 24:
auto_parallel_context().set_all_reduce_fusion_split_indices([38, 93, 148, 203, 258, 313, 368, 397])
D.init()
rank = args_opt.device_id % device_num
else:


+ 3
- 3
example/cv_to_mindrecord/ImageNet_Similar_Perf/imagenet/mr_api.py View File

@@ -26,8 +26,8 @@ import os
import pickle

######## mindrecord_schema begin ##########
mindrecord_schema = {"label": {"type": "int64"},
"data": {"type": "bytes"},
mindrecord_schema = {"label": {"type": "int32"},
"image": {"type": "bytes"},
"file_name": {"type": "string"}}
######## mindrecord_schema end ##########

@@ -121,5 +121,5 @@ def mindrecord_dict_data(task_id):
if not image_bytes:
print("The image file: {} is invalid.".format(file_name))
continue
data["data"] = image_bytes
data["image"] = image_bytes
yield data

+ 132
- 0
example/deepfm_criteo/README.md View File

@@ -0,0 +1,132 @@
# DeepFM Description

This is an example of training DeepFM with Criteo dataset in MindSpore.

[Paper](https://arxiv.org/pdf/1703.04247.pdf) Huifeng Guo, Ruiming Tang, Yunming Ye, Zhenguo Li, Xiuqiang He


# Model architecture

The overall network architecture of DeepFM is show below:

[Link](https://arxiv.org/pdf/1703.04247.pdf)


# Requirements
- Install [MindSpore](https://www.mindspore.cn/install/en).
- Download the criteo dataset for pre-training. Extract and clean text in the dataset with [WikiExtractor](https://github.com/attardi/wikiextractor). Convert the dataset to TFRecord format and move the files to a specified path.
- For more information, please check the resources below:
- [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html)
- [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html)

# Script description

## Script and sample code

```python
├── deepfm
├── README.md
├── scripts
│ ├──run_train.sh
│ ├──run_eval.sh
├── src
│ ├──config.py
│ ├──dataset.py
│ ├──callback.py
│ ├──deepfm.py
├── train.py
├── eval.py
```

## Training process

### Usage

- sh run_train.sh [DEVICE_NUM] [DATASET_PATH] [MINDSPORE_HCCL_CONFIG_PAHT]
- python train.py --dataset_path [DATASET_PATH]

### Launch

```
# distribute training example
sh scripts/run_distribute_train.sh 8 /opt/dataset/criteo /opt/mindspore_hccl_file.json
# standalone training example
sh scripts/run_standalone_train.sh 0 /opt/dataset/criteo
or
python train.py --dataset_path /opt/dataset/criteo > output.log 2>&1 &
```

### Result

Training result will be stored in the example path.
Checkpoints will be stored at `./checkpoint` by default,
and training log will be redirected to `./output.log` by default,
and loss log will be redirected to `./loss.log` by default,
and eval log will be redirected to `./auc.log` by default.


## Eval process

### Usage

- sh run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH]

### Launch

```
# infer example
sh scripts/run_eval.sh 0 ~/criteo/eval/ ~/train/deepfm-15_41257.ckpt
```

> checkpoint can be produced in training process.

### Result

Inference result will be stored in the example path, you can find result like the followings in `auc.log`.

```
2020-05-27 20:51:35 AUC: 0.80577889065281, eval time: 35.55999s.
```

# Model description

## Performance

### Training Performance

| Parameters | DeepFM |
| -------------------------- | ------------------------------------------------------|
| Model Version | |
| Resource | Ascend 910, cpu:2.60GHz 96cores, memory:1.5T |
| uploaded Date | 05/27/2020 |
| MindSpore Version | 0.2.0 |
| Dataset | Criteo |
| Training Parameters | src/config.py |
| Optimizer | Adam |
| Loss Function | SoftmaxCrossEntropyWithLogits |
| outputs | |
| Loss | 0.4234 |
| Accuracy | AUC[0.8055] |
| Total time | 91 min |
| Params (M) | |
| Checkpoint for Fine tuning | |
| Model for inference | |

#### Inference Performance

| Parameters | | |
| -------------------------- | ----------------------------- | ------------------------- |
| Model Version | | |
| Resource | Ascend 910 | Ascend 310 |
| uploaded Date | 05/27/2020 | 05/27/2020 |
| MindSpore Version | 0.2.0 | 0.2.0 |
| Dataset | Criteo | |
| batch_size | 1000 | |
| outputs | | |
| Accuracy | AUC[0.8055] | |
| Speed | | |
| Total time | 35.559s | |
| Model for inference | | |

# ModelZoo Homepage
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)

+ 14
- 0
example/deepfm_criteo/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 66
- 0
example/deepfm_criteo/eval.py View File

@@ -0,0 +1,66 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train_criteo."""
import os
import sys
import time
import argparse

from mindspore import context
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net

from src.deepfm import ModelBuilder, AUCMetric
from src.config import DataConfig, ModelConfig, TrainConfig
from src.dataset import create_dataset

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
parser = argparse.ArgumentParser(description='CTR Prediction')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')

args_opt, _ = parser.parse_known_args()
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id)


def add_write(file_path, print_str):
with open(file_path, 'a+', encoding='utf-8') as file_out:
file_out.write(print_str + '\n')


if __name__ == '__main__':
data_config = DataConfig()
model_config = ModelConfig()
train_config = TrainConfig()

ds_eval = create_dataset(args_opt.dataset_path, train_mode=False,
epochs=1, batch_size=train_config.batch_size)
model_builder = ModelBuilder(ModelConfig, TrainConfig)
train_net, eval_net = model_builder.get_train_eval_net()
train_net.set_train()
eval_net.set_train(False)
auc_metric = AUCMetric()
model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric})
param_dict = load_checkpoint(args_opt.checkpoint_path)
load_param_into_net(eval_net, param_dict)

start = time.time()
res = model.eval(ds_eval)
eval_time = time.time() - start
time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
out_str = f'{time_str} AUC: {list(res.values())[0]}, eval time: {eval_time}s.'
print(out_str)
add_write('./auc.log', str(out_str))

+ 44
- 0
example/deepfm_criteo/scripts/run_distribute_train.sh View File

@@ -0,0 +1,44 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "Please run the script as: "
echo "sh scripts/run_distribute_train.sh DEVICE_NUM DATASET_PATH MINDSPORE_HCCL_CONFIG_PAHT"
echo "for example: sh scripts/run_distribute_train.sh 8 /dataset_path /rank_table_8p.json"
echo "After running the script, the network runs in the background, The log will be generated in logx/output.log"


export RANK_SIZE=$1
DATA_URL=$2
export MINDSPORE_HCCL_CONFIG_PAHT=$3

for ((i=0; i<RANK_SIZE;i++))
do
export DEVICE_ID=$i
export RANK_ID=$i
rm -rf log$i
mkdir ./log$i
cp *.py ./log$i
cp -r src ./log$i
cd ./log$i || exit
echo "start training for rank $i, device $DEVICE_ID"
env > env.log
python -u train.py \
--dataset_path=$DATA_URL \
--ckpt_path="checkpoint" \
--eval_file_name='auc.log' \
--loss_file_name='loss.log' \
--do_eval=True > output.log 2>&1 &
cd ../
done

+ 32
- 0
example/deepfm_criteo/scripts/run_eval.sh View File

@@ -0,0 +1,32 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "Please run the script as: "
echo "sh scripts/run_eval.sh DEVICE_ID DATASET_PATH CHECKPOINT_PATH"
echo "for example: sh scripts/run_eval.sh 0 /dataset_path /checkpoint_path"
echo "After running the script, the network runs in the background, The log will be generated in ms_log/eval_output.log"

export DEVICE_ID=$1
DATA_URL=$2
CHECKPOINT_PATH=$3

mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0

python -u eval.py \
--dataset_path=$DATA_URL \
--checkpoint_path=$CHECKPOINT_PATH > ms_log/eval_output.log 2>&1 &

+ 34
- 0
example/deepfm_criteo/scripts/run_standalone_train.sh View File

@@ -0,0 +1,34 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "Please run the script as: "
echo "sh scripts/run_standalone_train.sh DEVICE_ID DATASET_PATH"
echo "for example: sh scripts/run_standalone_train.sh 0 /dataset_path"
echo "After running the script, the network runs in the background, The log will be generated in ms_log/output.log"

export DEVICE_ID=$1
DATA_URL=$2

mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0

python -u train.py \
--dataset_path=$DATA_URL \
--ckpt_path="checkpoint" \
--eval_file_name='auc.log' \
--loss_file_name='loss.log' \
--do_eval=True > ms_log/output.log 2>&1 &

+ 14
- 0
example/deepfm_criteo/src/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 107
- 0
example/deepfm_criteo/src/callback.py View File

@@ -0,0 +1,107 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Defined callback for DeepFM.
"""
import time
from mindspore.train.callback import Callback


def add_write(file_path, out_str):
with open(file_path, 'a+', encoding='utf-8') as file_out:
file_out.write(out_str + '\n')


class EvalCallBack(Callback):
"""
Monitor the loss in training.
If the loss is NAN or INF terminating training.
Note
If per_print_times is 0 do not print loss.
"""
def __init__(self, model, eval_dataset, auc_metric, eval_file_path):
super(EvalCallBack, self).__init__()
self.model = model
self.eval_dataset = eval_dataset
self.aucMetric = auc_metric
self.aucMetric.clear()
self.eval_file_path = eval_file_path

def epoch_end(self, run_context):
start_time = time.time()
out = self.model.eval(self.eval_dataset)
eval_time = int(time.time() - start_time)
time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
out_str = "{} EvalCallBack metric{}; eval_time{}s".format(
time_str, out.values(), eval_time)
print(out_str)
add_write(self.eval_file_path, out_str)


class LossCallBack(Callback):
"""
Monitor the loss in training.
If the loss is NAN or INF terminating training.
Note
If per_print_times is 0 do not print loss.
Args
loss_file_path (str) The file absolute path, to save as loss_file;
per_print_times (int) Print loss every times. Default 1.
"""
def __init__(self, loss_file_path, per_print_times=1):
super(LossCallBack, self).__init__()
if not isinstance(per_print_times, int) or per_print_times < 0:
raise ValueError("print_step must be int and >= 0.")
self.loss_file_path = loss_file_path
self._per_print_times = per_print_times

def step_end(self, run_context):
cb_params = run_context.original_args()
loss = cb_params.net_outputs.asnumpy()
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1
cur_num = cb_params.cur_step_num
if self._per_print_times != 0 and cur_num % self._per_print_times == 0:
with open(self.loss_file_path, "a+") as loss_file:
time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
loss_file.write("{} epoch: {} step: {}, loss is {}\n".format(
time_str, cb_params.cur_epoch_num, cur_step_in_epoch, loss))
print("epoch: {} step: {}, loss is {}\n".format(
cb_params.cur_epoch_num, cur_step_in_epoch, loss))


class TimeMonitor(Callback):
"""
Time monitor for calculating cost of each epoch.
Args
data_size (int) step size of an epoch.
"""
def __init__(self, data_size):
super(TimeMonitor, self).__init__()
self.data_size = data_size

def epoch_begin(self, run_context):
self.epoch_time = time.time()

def epoch_end(self, run_context):
epoch_mseconds = (time.time() - self.epoch_time) * 1000
per_step_mseconds = epoch_mseconds / self.data_size
print("epoch time: {0}, per step time: {1}".format(epoch_mseconds, per_step_mseconds), flush=True)

def step_begin(self, run_context):
self.step_time = time.time()

def step_end(self, run_context):
step_mseconds = (time.time() - self.step_time) * 1000
print(f"step time {step_mseconds}", flush=True)

+ 62
- 0
example/deepfm_criteo/src/config.py View File

@@ -0,0 +1,62 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py and eval.py
"""


class DataConfig:
"""
Define parameters of dataset.
"""
data_vocab_size = 184965
train_num_of_parts = 21
test_num_of_parts = 3
batch_size = 1000
data_field_size = 39
# dataset format, 1: mindrecord, 2: tfrecord, 3: h5
data_format = 2


class ModelConfig:
"""
Define parameters of model.
"""
batch_size = DataConfig.batch_size
data_field_size = DataConfig.data_field_size
data_vocab_size = DataConfig.data_vocab_size
data_emb_dim = 80
deep_layer_args = [[400, 400, 512], "relu"]
init_args = [-0.01, 0.01]
weight_bias_init = ['normal', 'normal']
keep_prob = 0.9


class TrainConfig:
"""
Define parameters of training.
"""
batch_size = DataConfig.batch_size
l2_coef = 1e-6
learning_rate = 1e-5
epsilon = 1e-8
loss_scale = 1024.0
train_epochs = 15
save_checkpoint = True
ckpt_file_name_prefix = "deepfm"
save_checkpoint_steps = 1
keep_checkpoint_max = 15
eval_callback = True
loss_callback = True

+ 299
- 0
example/deepfm_criteo/src/dataset.py View File

@@ -0,0 +1,299 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Create train or eval dataset.
"""
import os
import math
from enum import Enum

import pandas as pd
import numpy as np
import mindspore.dataset.engine as de
import mindspore.common.dtype as mstype

from .config import DataConfig


class DataType(Enum):
"""
Enumerate supported dataset format.
"""
MINDRECORD = 1
TFRECORD = 2
H5 = 3


class H5Dataset():
"""
Create dataset with H5 format.

Args:
data_path (str): Dataset directory.
train_mode (bool): Whether dataset is used for train or eval (default=True).
train_num_of_parts (int): The number of train data file (default=21).
test_num_of_parts (int): The number of test data file (default=3).
"""
max_length = 39

def __init__(self, data_path, train_mode=True,
train_num_of_parts=DataConfig.train_num_of_parts,
test_num_of_parts=DataConfig.test_num_of_parts):
self._hdf_data_dir = data_path
self._is_training = train_mode
if self._is_training:
self._file_prefix = 'train'
self._num_of_parts = train_num_of_parts
else:
self._file_prefix = 'test'
self._num_of_parts = test_num_of_parts
self.data_size = self._bin_count(self._hdf_data_dir, self._file_prefix, self._num_of_parts)
print("data_size: {}".format(self.data_size))

def _bin_count(self, hdf_data_dir, file_prefix, num_of_parts):
size = 0
for part in range(num_of_parts):
_y = pd.read_hdf(os.path.join(hdf_data_dir, f'{file_prefix}_output_part_{str(part)}.h5'))
size += _y.shape[0]
return size

def _iterate_hdf_files_(self, num_of_parts=None,
shuffle_block=False):
"""
iterate among hdf files(blocks). when the whole data set is finished, the iterator restarts
from the beginning, thus the data stream will never stop
:param train_mode: True or false,false is eval_mode,
this file iterator will go through the train set
:param num_of_parts: number of files
:param shuffle_block: shuffle block files at every round
:return: input_hdf_file_name, output_hdf_file_name, finish_flag
"""
parts = np.arange(num_of_parts)
while True:
if shuffle_block:
for _ in range(int(shuffle_block)):
np.random.shuffle(parts)
for i, p in enumerate(parts):
yield os.path.join(self._hdf_data_dir, f'{self._file_prefix}_input_part_{str(p)}.h5'), \
os.path.join(self._hdf_data_dir, f'{self._file_prefix}_output_part_{str(p)}.h5'), \
i + 1 == len(parts)

def _generator(self, X, y, batch_size, shuffle=True):
"""
should be accessed only in private
:param X:
:param y:
:param batch_size:
:param shuffle:
:return:
"""
number_of_batches = np.ceil(1. * X.shape[0] / batch_size)
counter = 0
finished = False
sample_index = np.arange(X.shape[0])
if shuffle:
for _ in range(int(shuffle)):
np.random.shuffle(sample_index)
assert X.shape[0] > 0
while True:
batch_index = sample_index[batch_size * counter: batch_size * (counter + 1)]
X_batch = X[batch_index]
y_batch = y[batch_index]
counter += 1
yield X_batch, y_batch, finished
if counter == number_of_batches:
counter = 0
finished = True

def batch_generator(self, batch_size=1000,
random_sample=False, shuffle_block=False):
"""
:param train_mode: True or false,false is eval_mode,
:param batch_size
:param num_of_parts: number of files
:param random_sample: if True, will shuffle
:param shuffle_block: shuffle file blocks at every round
:return:
"""

for hdf_in, hdf_out, _ in self._iterate_hdf_files_(self._num_of_parts,
shuffle_block):
start = stop = None
X_all = pd.read_hdf(hdf_in, start=start, stop=stop).values
y_all = pd.read_hdf(hdf_out, start=start, stop=stop).values
data_gen = self._generator(X_all, y_all, batch_size,
shuffle=random_sample)
finished = False

while not finished:
X, y, finished = data_gen.__next__()
X_id = X[:, 0:self.max_length]
X_va = X[:, self.max_length:]
yield np.array(X_id.astype(dtype=np.int32)), \
np.array(X_va.astype(dtype=np.float32)), \
np.array(y.astype(dtype=np.float32))


def _get_h5_dataset(directory, train_mode=True, epochs=1, batch_size=1000):
"""
Get dataset with h5 format.

Args:
directory (str): Dataset directory.
train_mode (bool): Whether dataset is use for train or eval (default=True).
epochs (int): Dataset epoch size (default=1).
batch_size (int): Dataset batch size (default=1000)

Returns:
Dataset.
"""
data_para = {'batch_size': batch_size}
if train_mode:
data_para['random_sample'] = True
data_para['shuffle_block'] = True

h5_dataset = H5Dataset(data_path=directory, train_mode=train_mode)
numbers_of_batch = math.ceil(h5_dataset.data_size / batch_size)

def _iter_h5_data():
train_eval_gen = h5_dataset.batch_generator(**data_para)
for _ in range(0, numbers_of_batch, 1):
yield train_eval_gen.__next__()

ds = de.GeneratorDataset(_iter_h5_data, ["ids", "weights", "labels"])
ds.set_dataset_size(numbers_of_batch)
ds = ds.repeat(epochs)
return ds


def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=1000,
line_per_sample=1000, rank_size=None, rank_id=None):
"""
Get dataset with mindrecord format.

Args:
directory (str): Dataset directory.
train_mode (bool): Whether dataset is use for train or eval (default=True).
epochs (int): Dataset epoch size (default=1).
batch_size (int): Dataset batch size (default=1000).
line_per_sample (int): The number of sample per line (default=1000).
rank_size (int): The number of device, not necessary for single device (default=None).
rank_id (int): Id of device, not necessary for single device (default=None).

Returns:
Dataset.
"""
file_prefix_name = 'train_input_part.mindrecord' if train_mode else 'test_input_part.mindrecord'
file_suffix_name = '00' if train_mode else '0'
shuffle = train_mode

if rank_size is not None and rank_id is not None:
ds = de.MindDataset(os.path.join(directory, file_prefix_name + file_suffix_name),
columns_list=['feat_ids', 'feat_vals', 'label'],
num_shards=rank_size, shard_id=rank_id, shuffle=shuffle,
num_parallel_workers=8)
else:
ds = de.MindDataset(os.path.join(directory, file_prefix_name + file_suffix_name),
columns_list=['feat_ids', 'feat_vals', 'label'],
shuffle=shuffle, num_parallel_workers=8)
ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True)
ds = ds.map(operations=(lambda x, y, z: (np.array(x).flatten().reshape(batch_size, 39),
np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8)
ds = ds.repeat(epochs)
return ds


def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000,
line_per_sample=1000, rank_size=None, rank_id=None):
"""
Get dataset with tfrecord format.

Args:
directory (str): Dataset directory.
train_mode (bool): Whether dataset is use for train or eval (default=True).
epochs (int): Dataset epoch size (default=1).
batch_size (int): Dataset batch size (default=1000).
line_per_sample (int): The number of sample per line (default=1000).
rank_size (int): The number of device, not necessary for single device (default=None).
rank_id (int): Id of device, not necessary for single device (default=None).

Returns:
Dataset.
"""
dataset_files = []
file_prefixt_name = 'train' if train_mode else 'test'
shuffle = train_mode
for (dir_path, _, filenames) in os.walk(directory):
for filename in filenames:
if file_prefixt_name in filename and 'tfrecord' in filename:
dataset_files.append(os.path.join(dir_path, filename))
schema = de.Schema()
schema.add_column('feat_ids', de_type=mstype.int32)
schema.add_column('feat_vals', de_type=mstype.float32)
schema.add_column('label', de_type=mstype.float32)
if rank_size is not None and rank_id is not None:
ds = de.TFRecordDataset(dataset_files=dataset_files, shuffle=shuffle,
schema=schema, num_parallel_workers=8,
num_shards=rank_size, shard_id=rank_id,
shard_equal_rows=True)
else:
ds = de.TFRecordDataset(dataset_files=dataset_files, shuffle=shuffle,
schema=schema, num_parallel_workers=8)
ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True)
ds = ds.map(operations=(lambda x, y, z: (
np.array(x).flatten().reshape(batch_size, 39),
np.array(y).flatten().reshape(batch_size, 39),
np.array(z).flatten().reshape(batch_size, 1))),
input_columns=['feat_ids', 'feat_vals', 'label'],
columns_order=['feat_ids', 'feat_vals', 'label'],
num_parallel_workers=8)
ds = ds.repeat(epochs)
return ds


def create_dataset(directory, train_mode=True, epochs=1, batch_size=1000,
data_type=DataType.TFRECORD, line_per_sample=1000,
rank_size=None, rank_id=None):
"""
Get dataset.

Args:
directory (str): Dataset directory.
train_mode (bool): Whether dataset is use for train or eval (default=True).
epochs (int): Dataset epoch size (default=1).
batch_size (int): Dataset batch size (default=1000).
data_type (DataType): The type of dataset which is one of H5, TFRECORE, MINDRECORD (default=TFRECORD).
line_per_sample (int): The number of sample per line (default=1000).
rank_size (int): The number of device, not necessary for single device (default=None).
rank_id (int): Id of device, not necessary for single device (default=None).

Returns:
Dataset.
"""
if data_type == DataType.MINDRECORD:
return _get_mindrecord_dataset(directory, train_mode, epochs,
batch_size, line_per_sample,
rank_size, rank_id)
if data_type == DataType.TFRECORD:
return _get_tf_dataset(directory, train_mode, epochs, batch_size,
line_per_sample, rank_size=rank_size, rank_id=rank_id)

if rank_size is not None and rank_size > 1:
raise ValueError('Please use mindrecord dataset.')
return _get_h5_dataset(directory, train_mode, epochs, batch_size)

+ 370
- 0
example/deepfm_criteo/src/deepfm.py View File

@@ -0,0 +1,370 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" test_training """
import os

import numpy as np
from sklearn.metrics import roc_auc_score
import mindspore.common.dtype as mstype
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore.ops import operations as P
from mindspore.nn import Dropout
from mindspore.nn.optim import Adam
from mindspore.nn.metrics import Metric
from mindspore import nn, ParameterTuple, Parameter
from mindspore.common.initializer import Uniform, initializer, Normal
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig

from .callback import EvalCallBack, LossCallBack


np_type = np.float32
ms_type = mstype.float32


class AUCMetric(Metric):
"""AUC metric for DeepFM model."""
def __init__(self):
super(AUCMetric, self).__init__()
self.pred_probs = []
self.true_labels = []

def clear(self):
"""Clear the internal evaluation result."""
self.pred_probs = []
self.true_labels = []

def update(self, *inputs):
batch_predict = inputs[1].asnumpy()
batch_label = inputs[2].asnumpy()
self.pred_probs.extend(batch_predict.flatten().tolist())
self.true_labels.extend(batch_label.flatten().tolist())

def eval(self):
if len(self.true_labels) != len(self.pred_probs):
raise RuntimeError('true_labels.size() is not equal to pred_probs.size()')
auc = roc_auc_score(self.true_labels, self.pred_probs)
return auc


def init_method(method, shape, name, max_val=0.01):
"""
The method of init parameters.

Args:
method (str): The method uses to initialize parameter.
shape (list): The shape of parameter.
name (str): The name of parameter.
max_val (float): Max value in parameter when uses 'random' or 'uniform' to initialize parameter.

Returns:
Parameter.
"""
if method in ['random', 'uniform']:
params = Parameter(initializer(Uniform(max_val), shape, ms_type), name=name)
elif method == "one":
params = Parameter(initializer("ones", shape, ms_type), name=name)
elif method == 'zero':
params = Parameter(initializer("zeros", shape, ms_type), name=name)
elif method == "normal":
params = Parameter(initializer(Normal(max_val), shape, ms_type), name=name)
return params


def init_var_dict(init_args, values):
"""
Init parameter.

Args:
init_args (list): Define max and min value of parameters.
values (list): Define name, shape and init method of parameters.

Returns:
dict, a dict ot Parameter.
"""
var_map = {}
_, _max_val = init_args
for key, shape, init_flag in values:
if key not in var_map.keys():
if init_flag in ['random', 'uniform']:
var_map[key] = Parameter(initializer(Uniform(_max_val), shape, ms_type), name=key)
elif init_flag == "one":
var_map[key] = Parameter(initializer("ones", shape, ms_type), name=key)
elif init_flag == "zero":
var_map[key] = Parameter(initializer("zeros", shape, ms_type), name=key)
elif init_flag == 'normal':
var_map[key] = Parameter(initializer(Normal(_max_val), shape, ms_type), name=key)
return var_map


class DenseLayer(nn.Cell):
"""
Dense Layer for Deep Layer of DeepFM Model;
Containing: activation, matmul, bias_add;
Args:
input_dim (int): the shape of weight at 0-aixs;
output_dim (int): the shape of weight at 1-aixs, and shape of bias
weight_bias_init (list): weight and bias init method, "random", "uniform", "one", "zero", "normal";
act_str (str): activation function method, "relu", "sigmoid", "tanh";
keep_prob (float): Dropout Layer keep_prob_rate;
scale_coef (float): input scale coefficient;
"""
def __init__(self, input_dim, output_dim, weight_bias_init, act_str, keep_prob=0.9, scale_coef=1.0):
super(DenseLayer, self).__init__()
weight_init, bias_init = weight_bias_init
self.weight = init_method(weight_init, [input_dim, output_dim], name="weight")
self.bias = init_method(bias_init, [output_dim], name="bias")
self.act_func = self._init_activation(act_str)
self.matmul = P.MatMul(transpose_b=False)
self.bias_add = P.BiasAdd()
self.cast = P.Cast()
self.dropout = Dropout(keep_prob=keep_prob)
self.mul = P.Mul()
self.realDiv = P.RealDiv()
self.scale_coef = scale_coef

def _init_activation(self, act_str):
act_str = act_str.lower()
if act_str == "relu":
act_func = P.ReLU()
elif act_str == "sigmoid":
act_func = P.Sigmoid()
elif act_str == "tanh":
act_func = P.Tanh()
return act_func

def construct(self, x):
x = self.act_func(x)
if self.training:
x = self.dropout(x)
x = self.mul(x, self.scale_coef)
x = self.cast(x, mstype.float16)
weight = self.cast(self.weight, mstype.float16)
wx = self.matmul(x, weight)
wx = self.cast(wx, mstype.float32)
wx = self.realDiv(wx, self.scale_coef)
output = self.bias_add(wx, self.bias)
return output


class DeepFMModel(nn.Cell):
"""
From paper: "DeepFM: A Factorization-Machine based Neural Network for CTR Prediction"

Args:
batch_size (int): smaple_number of per step in training; (int, batch_size=128)
filed_size (int): input filed number, or called id_feature number; (int, filed_size=39)
vocab_size (int): id_feature vocab size, id dict size; (int, vocab_size=200000)
emb_dim (int): id embedding vector dim, id mapped to embedding vector; (int, emb_dim=100)
deep_layer_args (list): Deep Layer args, layer_dim_list, layer_activator;
(int, deep_layer_args=[[100, 100, 100], "relu"])
init_args (list): init args for Parameter init; (list, init_args=[min, max, seeds])
weight_bias_init (list): weight, bias init method for deep layers;
(list[str], weight_bias_init=['random', 'zero'])
keep_prob (float): if dropout_flag is True, keep_prob rate to keep connect; (float, keep_prob=0.8)
"""
def __init__(self, config):
super(DeepFMModel, self).__init__()

self.batch_size = config.batch_size
self.field_size = config.data_field_size
self.vocab_size = config.data_vocab_size
self.emb_dim = config.data_emb_dim
self.deep_layer_dims_list, self.deep_layer_act = config.deep_layer_args
self.init_args = config.init_args
self.weight_bias_init = config.weight_bias_init
self.keep_prob = config.keep_prob
init_acts = [('W_l2', [self.vocab_size, 1], 'normal'),
('V_l2', [self.vocab_size, self.emb_dim], 'normal'),
('b', [1], 'normal')]
var_map = init_var_dict(self.init_args, init_acts)
self.fm_w = var_map["W_l2"]
self.fm_b = var_map["b"]
self.embedding_table = var_map["V_l2"]
# Deep Layers
self.deep_input_dims = self.field_size * self.emb_dim + 1
self.all_dim_list = [self.deep_input_dims] + self.deep_layer_dims_list + [1]
self.dense_layer_1 = DenseLayer(self.all_dim_list[0], self.all_dim_list[1],
self.weight_bias_init, self.deep_layer_act, self.keep_prob)
self.dense_layer_2 = DenseLayer(self.all_dim_list[1], self.all_dim_list[2],
self.weight_bias_init, self.deep_layer_act, self.keep_prob)
self.dense_layer_3 = DenseLayer(self.all_dim_list[2], self.all_dim_list[3],
self.weight_bias_init, self.deep_layer_act, self.keep_prob)
self.dense_layer_4 = DenseLayer(self.all_dim_list[3], self.all_dim_list[4],
self.weight_bias_init, self.deep_layer_act, self.keep_prob)
# FM, linear Layers
self.Gatherv2 = P.GatherV2()
self.Mul = P.Mul()
self.ReduceSum = P.ReduceSum(keep_dims=False)
self.Reshape = P.Reshape()
self.Square = P.Square()
self.Shape = P.Shape()
self.Tile = P.Tile()
self.Concat = P.Concat(axis=1)
self.Cast = P.Cast()

def construct(self, id_hldr, wt_hldr):
"""
Args:
id_hldr: batch ids; [bs, field_size]
wt_hldr: batch weights; [bs, field_size]
"""

mask = self.Reshape(wt_hldr, (self.batch_size, self.field_size, 1))
# Linear layer
fm_id_weight = self.Gatherv2(self.fm_w, id_hldr, 0)
wx = self.Mul(fm_id_weight, mask)
linear_out = self.ReduceSum(wx, 1)
# FM layer
fm_id_embs = self.Gatherv2(self.embedding_table, id_hldr, 0)
vx = self.Mul(fm_id_embs, mask)
v1 = self.ReduceSum(vx, 1)
v1 = self.Square(v1)
v2 = self.Square(vx)
v2 = self.ReduceSum(v2, 1)
fm_out = 0.5 * self.ReduceSum(v1 - v2, 1)
fm_out = self.Reshape(fm_out, (-1, 1))
# Deep layer
b = self.Reshape(self.fm_b, (1, 1))
b = self.Tile(b, (self.batch_size, 1))
deep_in = self.Reshape(vx, (-1, self.field_size * self.emb_dim))
deep_in = self.Concat((deep_in, b))
deep_in = self.dense_layer_1(deep_in)
deep_in = self.dense_layer_2(deep_in)
deep_in = self.dense_layer_3(deep_in)
deep_out = self.dense_layer_4(deep_in)
out = linear_out + fm_out + deep_out
return out, fm_id_weight, fm_id_embs


class NetWithLossClass(nn.Cell):
"""
NetWithLossClass definition.
"""
def __init__(self, network, l2_coef=1e-6):
super(NetWithLossClass, self).__init__(auto_prefix=False)
self.loss = P.SigmoidCrossEntropyWithLogits()
self.network = network
self.l2_coef = l2_coef
self.Square = P.Square()
self.ReduceMean_false = P.ReduceMean(keep_dims=False)
self.ReduceSum_false = P.ReduceSum(keep_dims=False)

def construct(self, batch_ids, batch_wts, label):
predict, fm_id_weight, fm_id_embs = self.network(batch_ids, batch_wts)
log_loss = self.loss(predict, label)
mean_log_loss = self.ReduceMean_false(log_loss)
l2_loss_w = self.ReduceSum_false(self.Square(fm_id_weight))
l2_loss_v = self.ReduceSum_false(self.Square(fm_id_embs))
l2_loss_all = self.l2_coef * (l2_loss_v + l2_loss_w) * 0.5
loss = mean_log_loss + l2_loss_all
return loss


class TrainStepWrap(nn.Cell):
"""
TrainStepWrap definition
"""
def __init__(self, network, lr=5e-8, eps=1e-8, loss_scale=1000.0):
super(TrainStepWrap, self).__init__(auto_prefix=False)
self.network = network
self.network.set_train()
self.weights = ParameterTuple(network.trainable_params())
self.optimizer = Adam(self.weights, learning_rate=lr, eps=eps, loss_scale=loss_scale)
self.hyper_map = C.HyperMap()
self.grad = C.GradOperation('grad', get_by_list=True, sens_param=True)
self.sens = loss_scale

def construct(self, batch_ids, batch_wts, label):
weights = self.weights
loss = self.network(batch_ids, batch_wts, label)
sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) #
grads = self.grad(self.network, weights)(batch_ids, batch_wts, label, sens)
return F.depend(loss, self.optimizer(grads))


class PredictWithSigmoid(nn.Cell):
"""
Eval model with sigmoid.
"""
def __init__(self, network):
super(PredictWithSigmoid, self).__init__(auto_prefix=False)
self.network = network
self.sigmoid = P.Sigmoid()

def construct(self, batch_ids, batch_wts, labels):
logits, _, _, = self.network(batch_ids, batch_wts)
pred_probs = self.sigmoid(logits)

return logits, pred_probs, labels


class ModelBuilder:
"""
Model builder for DeepFM.

Args:
model_config (ModelConfig): Model configuration.
train_config (TrainConfig): Train configuration.
"""
def __init__(self, model_config, train_config):
self.model_config = model_config
self.train_config = train_config

def get_callback_list(self, model=None, eval_dataset=None):
"""
Get callbacks which contains checkpoint callback, eval callback and loss callback.

Args:
model (Cell): The network is added callback (default=None).
eval_dataset (Dataset): Dataset for eval (default=None).
"""
callback_list = []
if self.train_config.save_checkpoint:
config_ck = CheckpointConfig(save_checkpoint_steps=self.train_config.save_checkpoint_steps,
keep_checkpoint_max=self.train_config.keep_checkpoint_max)
ckpt_cb = ModelCheckpoint(prefix=self.train_config.ckpt_file_name_prefix,
directory=self.train_config.output_path,
config=config_ck)
callback_list.append(ckpt_cb)
if self.train_config.eval_callback:
if model is None:
raise RuntimeError("train_config.eval_callback is {}; get_callback_list() args model is {}".format(
self.train_config.eval_callback, model))
if eval_dataset is None:
raise RuntimeError("train_config.eval_callback is {}; get_callback_list() "
"args eval_dataset is {}".format(self.train_config.eval_callback, eval_dataset))
auc_metric = AUCMetric()
eval_callback = EvalCallBack(model, eval_dataset, auc_metric,
eval_file_path=os.path.join(self.train_config.output_path,
self.train_config.eval_file_name))
callback_list.append(eval_callback)
if self.train_config.loss_callback:
loss_callback = LossCallBack(loss_file_path=os.path.join(self.train_config.output_path,
self.train_config.loss_file_name))
callback_list.append(loss_callback)
if callback_list:
return callback_list
return None

def get_train_eval_net(self):
deepfm_net = DeepFMModel(self.model_config)
loss_net = NetWithLossClass(deepfm_net, l2_coef=self.train_config.l2_coef)
train_net = TrainStepWrap(loss_net, lr=self.train_config.learning_rate,
eps=self.train_config.epsilon,
loss_scale=self.train_config.loss_scale)
eval_net = PredictWithSigmoid(deepfm_net)
return train_net, eval_net

+ 91
- 0
example/deepfm_criteo/train.py View File

@@ -0,0 +1,91 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train_criteo."""
import os
import sys
import argparse

from mindspore import context, ParallelMode
from mindspore.communication.management import init
from mindspore.train.model import Model
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, TimeMonitor

from src.deepfm import ModelBuilder, AUCMetric
from src.config import DataConfig, ModelConfig, TrainConfig
from src.dataset import create_dataset, DataType
from src.callback import EvalCallBack, LossCallBack

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
parser = argparse.ArgumentParser(description='CTR Prediction')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
parser.add_argument('--ckpt_path', type=str, default=None, help='Checkpoint path')
parser.add_argument('--eval_file_name', type=str, default="./auc.log", help='eval file path')
parser.add_argument('--loss_file_name', type=str, default="./loss.log", help='loss file path')
parser.add_argument('--do_eval', type=bool, default=True, help='Do evaluation or not.')

args_opt, _ = parser.parse_known_args()
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id)


if __name__ == '__main__':
data_config = DataConfig()
model_config = ModelConfig()
train_config = TrainConfig()

rank_size = int(os.environ.get("RANK_SIZE", 1))
if rank_size > 1:
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True)
init()
rank_id = int(os.environ.get('RANK_ID'))
else:
rank_size = None
rank_id = None

ds_train = create_dataset(args_opt.dataset_path,
train_mode=True,
epochs=train_config.train_epochs,
batch_size=train_config.batch_size,
data_type=DataType(data_config.data_format),
rank_size=rank_size,
rank_id=rank_id)

model_builder = ModelBuilder(ModelConfig, TrainConfig)
train_net, eval_net = model_builder.get_train_eval_net()
auc_metric = AUCMetric()
model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric})

time_callback = TimeMonitor(data_size=ds_train.get_dataset_size())
loss_callback = LossCallBack(loss_file_path=args_opt.loss_file_name)
callback_list = [time_callback, loss_callback]

if train_config.save_checkpoint:
config_ck = CheckpointConfig(save_checkpoint_steps=train_config.save_checkpoint_steps,
keep_checkpoint_max=train_config.keep_checkpoint_max)
ckpt_cb = ModelCheckpoint(prefix=train_config.ckpt_file_name_prefix,
directory=args_opt.ckpt_path,
config=config_ck)
callback_list.append(ckpt_cb)

if args_opt.do_eval:
ds_eval = create_dataset(args_opt.dataset_path, train_mode=False,
epochs=train_config.train_epochs,
batch_size=train_config.batch_size,
data_type=DataType(data_config.data_format))
eval_callback = EvalCallBack(model, ds_eval, auc_metric,
eval_file_path=args_opt.eval_file_name)
callback_list.append(eval_callback)
model.train(train_config.train_epochs, ds_train, callbacks=callback_list)

+ 66
- 0
example/deeplabv3_voc2012/README.md View File

@@ -0,0 +1,66 @@
# Deeplab-V3 Example

## Description
This is an example of training DeepLabv3 with PASCAL VOC 2012 dataset in MindSpore.

## Requirements
- Install [MindSpore](https://www.mindspore.cn/install/en).
- Download the VOC 2012 dataset for training.

> Notes:
If you are running a fine-tuning or evaluation task, prepare the corresponding checkpoint file.


## Running the Example
### Training
- Set options in config.py.
- Run `run_standalone_train.sh` for non-distributed training.
``` bash
sh scripts/run_standalone_train.sh DEVICE_ID EPOCH_SIZE DATA_DIR
```
- Run `run_distribute_train.sh` for distributed training.
``` bash
sh scripts/run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH
```
### Evaluation
Set options in evaluation_config.py. Make sure the 'data_file' and 'finetune_ckpt' are set to your own path.
- Run run_eval.sh for evaluation.
``` bash
sh scripts/run_eval.sh DEVICE_ID DATA_DIR
```

## Options and Parameters
It contains of parameters of Deeplab-V3 model and options for training, which is set in file config.py.

### Options:
```
config.py:
learning_rate Learning rate, default is 0.0014.
weight_decay Weight decay, default is 5e-5.
momentum Momentum, default is 0.97.
crop_size Image crop size [height, width] during training, default is 513.
eval_scales The scales to resize images for evaluation, default is [0.5, 0.75, 1.0, 1.25, 1.5, 1.75].
output_stride The ratio of input to output spatial resolution, default is 16.
ignore_label Ignore label value, default is 255.
seg_num_classes Number of semantic classes, including the background class (if exists).
foreground classes + 1 background class in the PASCAL VOC 2012 dataset, default is 21.
fine_tune_batch_norm Fine tune the batch norm parameters or not, default is False.
atrous_rates Atrous rates for atrous spatial pyramid pooling, default is None.
decoder_output_stride The ratio of input to output spatial resolution when employing decoder
to refine segmentation results, default is None.
image_pyramid Input scales for multi-scale feature extraction, default is None.
```


### Parameters:
```
Parameters for dataset and network:
distribute Run distribute, default is false.
epoch_size Epoch size, default is 6.
batch_size batch size of input dataset: N, default is 2.
data_url Train/Evaluation data url, required.
checkpoint_url Checkpoint path, default is None.
enable_save_ckpt Enable save checkpoint, default is true.
save_checkpoint_steps Save checkpoint steps, default is 1000.
save_checkpoint_num Save checkpoint numbers, default is 1.
```

+ 53
- 0
example/deeplabv3_voc2012/evaluation.py View File

@@ -0,0 +1,53 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""evaluation."""
import argparse
from mindspore import context
from mindspore import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.md_dataset import create_dataset
from src.losses import OhemLoss
from src.miou_precision import MiouPrecision
from src.deeplabv3 import deeplabv3_resnet50
from src.config import config


parser = argparse.ArgumentParser(description="Deeplabv3 evaluation")
parser.add_argument('--epoch_size', type=int, default=2, help='Epoch size.')
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
parser.add_argument('--batch_size', type=int, default=2, help='Batch size.')
parser.add_argument('--data_url', required=True, default=None, help='Evaluation data url')
parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path')

args_opt = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
print(args_opt)


if __name__ == "__main__":
args_opt.crop_size = config.crop_size
args_opt.base_size = config.crop_size
eval_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="eval")
net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size],
infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates,
decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride,
fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid)
param_dict = load_checkpoint(args_opt.checkpoint_url)
load_param_into_net(net, param_dict)
mIou = MiouPrecision(config.seg_num_classes)
metrics = {'mIou': mIou}
loss = OhemLoss(config.seg_num_classes, config.ignore_label)
model = Model(net, loss, metrics=metrics)
model.eval(eval_dataset)

+ 66
- 0
example/deeplabv3_voc2012/scripts/run_distribute_train.sh View File

@@ -0,0 +1,66 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "bash run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATA_DIR MINDSPORE_HCCL_CONFIG_PATH"
echo "for example: bash run_distribute_train.sh 8 40 /path/zh-wiki/ /path/hccl.json"
echo "It is better to use absolute path."
echo "=============================================================================================================="
EPOCH_SIZE=$2
DATA_DIR=$3
export MINDSPORE_HCCL_CONFIG_PATH=$4
export RANK_TABLE_FILE=$4
export RANK_SIZE=$1
cores=`cat /proc/cpuinfo|grep "processor" |wc -l`
echo "the number of logical core" $cores
avg_core_per_rank=`expr $cores \/ $RANK_SIZE`
core_gap=`expr $avg_core_per_rank \- 1`
echo "avg_core_per_rank" $avg_core_per_rank
echo "core_gap" $core_gap
for((i=0;i<RANK_SIZE;i++))
do
start=`expr $i \* $avg_core_per_rank`
export DEVICE_ID=$i
export RANK_ID=$i
export DEPLOY_MODE=0
export GE_USE_STATIC_MEMORY=1
end=`expr $start \+ $core_gap`
cmdopt=$start"-"$end
rm -rf LOG$i
mkdir ./LOG$i
cp *.py ./LOG$i
cd ./LOG$i || exit
echo "start training for rank $i, device $DEVICE_ID"
mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
env > env.log
taskset -c $cmdopt python ../train.py \
--distribute="true" \
--epoch_size=$EPOCH_SIZE \
--device_id=$DEVICE_ID \
--enable_save_ckpt="true" \
--checkpoint_url="" \
--save_checkpoint_steps=10000 \
--save_checkpoint_num=1 \
--data_url=$DATA_DIR > log.txt 2>&1 &
cd ../
done

+ 32
- 0
example/deeplabv3_voc2012/scripts/run_eval.sh View File

@@ -0,0 +1,32 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "bash run_eval.sh DEVICE_ID DATA_DIR"
echo "for example: bash run_eval.sh /path/zh-wiki/ "
echo "=============================================================================================================="
DEVICE_ID=$1
DATA_DIR=$2
mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
python evaluation.py \
--device_id=$DEVICE_ID \
--checkpoint_url="" \
--data_url=$DATA_DIR > log.txt 2>&1 &

+ 38
- 0
example/deeplabv3_voc2012/scripts/run_standalone_train.sh View File

@@ -0,0 +1,38 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
echo "=============================================================================================================="
echo "Please run the scipt as: "
echo "bash run_standalone_pretrain.sh DEVICE_ID EPOCH_SIZE DATA_DIR"
echo "for example: bash run_standalone_train.sh 0 40 /path/zh-wiki/ "
echo "=============================================================================================================="
DEVICE_ID=$1
EPOCH_SIZE=$2
DATA_DIR=$3
mkdir -p ms_log
CUR_DIR=`pwd`
export GLOG_log_dir=${CUR_DIR}/ms_log
export GLOG_logtostderr=0
python train.py \
--distribute="false" \
--epoch_size=$EPOCH_SIZE \
--device_id=$DEVICE_ID \
--enable_save_ckpt="true" \
--checkpoint_url="" \
--save_checkpoint_steps=10000 \
--save_checkpoint_num=1 \
--data_url=$DATA_DIR > log.txt 2>&1 &

+ 23
- 0
example/deeplabv3_voc2012/src/__init__.py View File

@@ -0,0 +1,23 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Init DeepLabv3."""
from .deeplabv3 import ASPP, DeepLabV3, deeplabv3_resnet50
from .backbone import *

__all__ = [
"ASPP", "DeepLabV3", "deeplabv3_resnet50"
]

__all__.extend(backbone.__all__)

+ 21
- 0
example/deeplabv3_voc2012/src/backbone/__init__.py View File

@@ -0,0 +1,21 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Init backbone."""
from .resnet_deeplab import Subsample, DepthwiseConv2dNative, SpaceToBatch, BatchToSpace, ResNetV1, \
RootBlockBeta, resnet50_dl

__all__ = [
"Subsample", "DepthwiseConv2dNative", "SpaceToBatch", "BatchToSpace", "ResNetV1", "RootBlockBeta", "resnet50_dl"
]

+ 577
- 0
example/deeplabv3_voc2012/src/backbone/resnet_deeplab.py View File

@@ -0,0 +1,577 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ResNet based DeepLab."""
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.initializer import initializer
from mindspore._checkparam import twice
from mindspore.common.parameter import Parameter


def _conv_bn_relu(in_channel,
out_channel,
ksize,
stride=1,
padding=0,
dilation=1,
pad_mode="pad",
use_batch_statistics=False):
"""Get a conv2d -> batchnorm -> relu layer"""
return nn.SequentialCell(
[nn.Conv2d(in_channel,
out_channel,
kernel_size=ksize,
stride=stride,
padding=padding,
dilation=dilation,
pad_mode=pad_mode),
nn.BatchNorm2d(out_channel, use_batch_statistics=use_batch_statistics),
nn.ReLU()]
)


def _deep_conv_bn_relu(in_channel,
channel_multiplier,
ksize,
stride=1,
padding=0,
dilation=1,
pad_mode="pad",
use_batch_statistics=False):
"""Get a spacetobatch -> conv2d -> batchnorm -> relu -> batchtospace layer"""
return nn.SequentialCell(
[DepthwiseConv2dNative(in_channel,
channel_multiplier,
kernel_size=ksize,
stride=stride,
padding=padding,
dilation=dilation,
pad_mode=pad_mode),
nn.BatchNorm2d(channel_multiplier * in_channel, use_batch_statistics=use_batch_statistics),
nn.ReLU()]
)


def _stob_deep_conv_btos_bn_relu(in_channel,
channel_multiplier,
ksize,
space_to_batch_block_shape,
batch_to_space_block_shape,
paddings,
crops,
stride=1,
padding=0,
dilation=1,
pad_mode="pad",
use_batch_statistics=False):
"""Get a spacetobatch -> conv2d -> batchnorm -> relu -> batchtospace layer"""
return nn.SequentialCell(
[SpaceToBatch(space_to_batch_block_shape, paddings),
DepthwiseConv2dNative(in_channel,
channel_multiplier,
kernel_size=ksize,
stride=stride,
padding=padding,
dilation=dilation,
pad_mode=pad_mode),
BatchToSpace(batch_to_space_block_shape, crops),
nn.BatchNorm2d(channel_multiplier * in_channel, use_batch_statistics=use_batch_statistics),
nn.ReLU()]
)


def _stob_conv_btos_bn_relu(in_channel,
out_channel,
ksize,
space_to_batch_block_shape,
batch_to_space_block_shape,
paddings,
crops,
stride=1,
padding=0,
dilation=1,
pad_mode="pad",
use_batch_statistics=False):
"""Get a spacetobatch -> conv2d -> batchnorm -> relu -> batchtospace layer"""
return nn.SequentialCell([SpaceToBatch(space_to_batch_block_shape, paddings),
nn.Conv2d(in_channel,
out_channel,
kernel_size=ksize,
stride=stride,
padding=padding,
dilation=dilation,
pad_mode=pad_mode),
BatchToSpace(batch_to_space_block_shape, crops),
nn.BatchNorm2d(out_channel, use_batch_statistics=use_batch_statistics),
nn.ReLU()]
)


def _make_layer(block,
in_channels,
out_channels,
num_blocks,
stride=1,
rate=1,
multi_grads=None,
output_stride=None,
g_current_stride=2,
g_rate=1):
"""Make layer for DeepLab-ResNet network."""
if multi_grads is None:
multi_grads = [1] * num_blocks
# (stride == 2, num_blocks == 4 --> strides == [1, 1, 1, 2])
strides = [1] * (num_blocks - 1) + [stride]
blocks = []
if output_stride is not None:
if output_stride % 4 != 0:
raise ValueError('The output_stride needs to be a multiple of 4.')
output_stride //= 4
for i_stride, _ in enumerate(strides):
if output_stride is not None and g_current_stride > output_stride:
raise ValueError('The target output_stride cannot be reached.')
if output_stride is not None and g_current_stride == output_stride:
b_rate = g_rate
b_stride = 1
g_rate *= strides[i_stride]
else:
b_rate = rate
b_stride = strides[i_stride]
g_current_stride *= strides[i_stride]
blocks.append(block(in_channels=in_channels,
out_channels=out_channels,
stride=b_stride,
rate=b_rate,
multi_grad=multi_grads[i_stride]))
in_channels = out_channels
layer = nn.SequentialCell(blocks)
return layer, g_current_stride, g_rate


class Subsample(nn.Cell):
"""
Subsample for DeepLab-ResNet.
Args:
factor (int): Sample factor.
Returns:
Tensor, the sub sampled tensor.
Examples:
>>> Subsample(2)
"""
def __init__(self, factor):
super(Subsample, self).__init__()
self.factor = factor
self.pool = nn.MaxPool2d(kernel_size=1,
stride=factor)

def construct(self, x):
if self.factor == 1:
return x
return self.pool(x)


class SpaceToBatch(nn.Cell):
def __init__(self, block_shape, paddings):
super(SpaceToBatch, self).__init__()
self.space_to_batch = P.SpaceToBatch(block_shape, paddings)
self.bs = block_shape
self.pd = paddings

def construct(self, x):
return self.space_to_batch(x)


class BatchToSpace(nn.Cell):
def __init__(self, block_shape, crops):
super(BatchToSpace, self).__init__()
self.batch_to_space = P.BatchToSpace(block_shape, crops)
self.bs = block_shape
self.cr = crops

def construct(self, x):
return self.batch_to_space(x)


class _DepthwiseConv2dNative(nn.Cell):
"""Depthwise Conv2D Cell."""
def __init__(self,
in_channels,
channel_multiplier,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
weight_init):
super(_DepthwiseConv2dNative, self).__init__()
self.in_channels = in_channels
self.channel_multiplier = channel_multiplier
self.kernel_size = kernel_size
self.stride = stride
self.pad_mode = pad_mode
self.padding = padding
self.dilation = dilation
self.group = group
if not (isinstance(in_channels, int) and in_channels > 0):
raise ValueError('Attr \'in_channels\' of \'DepthwiseConv2D\' Op passed '
+ str(in_channels) + ', should be a int and greater than 0.')
if (not isinstance(kernel_size, tuple)) or len(kernel_size) != 2 or \
(not isinstance(kernel_size[0], int)) or (not isinstance(kernel_size[1], int)) or \
kernel_size[0] < 1 or kernel_size[1] < 1:
raise ValueError('Attr \'kernel_size\' of \'DepthwiseConv2D\' Op passed '
+ str(self.kernel_size) + ', should be a int or tuple and equal to or greater than 1.')
self.weight = Parameter(initializer(weight_init, [1, in_channels // group, *kernel_size]),
name='weight')

def construct(self, *inputs):
"""Must be overridden by all subclasses."""
raise NotImplementedError


class DepthwiseConv2dNative(_DepthwiseConv2dNative):
"""Depthwise Conv2D Cell."""
def __init__(self,
in_channels,
channel_multiplier,
kernel_size,
stride=1,
pad_mode='same',
padding=0,
dilation=1,
group=1,
weight_init='normal'):
kernel_size = twice(kernel_size)
super(DepthwiseConv2dNative, self).__init__(
in_channels,
channel_multiplier,
kernel_size,
stride,
pad_mode,
padding,
dilation,
group,
weight_init)
self.depthwise_conv2d_native = P.DepthwiseConv2dNative(channel_multiplier=self.channel_multiplier,
kernel_size=self.kernel_size,
mode=3,
pad_mode=self.pad_mode,
pad=self.padding,
stride=self.stride,
dilation=self.dilation,
group=self.group)

def set_strategy(self, strategy):
self.depthwise_conv2d_native.set_strategy(strategy)
return self

def construct(self, x):
return self.depthwise_conv2d_native(x, self.weight)


class BottleneckV1(nn.Cell):
"""
ResNet V1 BottleneckV1 block definition.
Args:
in_channels (int): Input channel.
out_channels (int): Output channel.
stride (int): Stride size for the initial convolutional layer. Default: 1.
rate (int): Rate for convolution. Default: 1.
multi_grad (int): Employ a rate within network. Default: 1.
Returns:
Tensor, the ResNet unit's output.
Examples:
>>> BottleneckV1(3,256,stride=2)
"""
def __init__(self,
in_channels,
out_channels,
stride=1,
use_batch_statistics=False,
use_batch_to_stob_and_btos=False):
super(BottleneckV1, self).__init__()
expansion = 4
mid_channels = out_channels // expansion
self.conv_bn1 = _conv_bn_relu(in_channels,
mid_channels,
ksize=1,
stride=1,
use_batch_statistics=use_batch_statistics)
self.conv_bn2 = _conv_bn_relu(mid_channels,
mid_channels,
ksize=3,
stride=stride,
padding=1,
dilation=1,
use_batch_statistics=use_batch_statistics)
if use_batch_to_stob_and_btos:
self.conv_bn2 = _stob_conv_btos_bn_relu(mid_channels,
mid_channels,
ksize=3,
stride=stride,
padding=0,
dilation=1,
space_to_batch_block_shape=2,
batch_to_space_block_shape=2,
paddings=[[2, 3], [2, 3]],
crops=[[0, 1], [0, 1]],
pad_mode="valid",
use_batch_statistics=use_batch_statistics)

self.conv3 = nn.Conv2d(mid_channels,
out_channels,
kernel_size=1,
stride=1)
self.bn3 = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics)
if in_channels != out_channels:
conv = nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=stride)
bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics)
self.downsample = nn.SequentialCell([conv, bn])
else:
self.downsample = Subsample(stride)
self.add = P.TensorAdd()
self.relu = nn.ReLU()
self.Reshape = P.Reshape()

def construct(self, x):
out = self.conv_bn1(x)
out = self.conv_bn2(out)
out = self.bn3(self.conv3(out))
out = self.add(out, self.downsample(x))
out = self.relu(out)
return out


class BottleneckV2(nn.Cell):
"""
ResNet V2 Bottleneck variance V2 block definition.
Args:
in_channels (int): Input channel.
out_channels (int): Output channel.
stride (int): Stride size for the initial convolutional layer. Default: 1.
Returns:
Tensor, the ResNet unit's output.
Examples:
>>> BottleneckV2(3,256,stride=2)
"""
def __init__(self,
in_channels,
out_channels,
stride=1,
use_batch_statistics=False,
use_batch_to_stob_and_btos=False,
dilation=1):
super(BottleneckV2, self).__init__()
expansion = 4
mid_channels = out_channels // expansion
self.conv_bn1 = _conv_bn_relu(in_channels,
mid_channels,
ksize=1,
stride=1,
use_batch_statistics=use_batch_statistics)
self.conv_bn2 = _conv_bn_relu(mid_channels,
mid_channels,
ksize=3,
stride=stride,
padding=1,
dilation=dilation,
use_batch_statistics=use_batch_statistics)
if use_batch_to_stob_and_btos:
self.conv_bn2 = _stob_conv_btos_bn_relu(mid_channels,
mid_channels,
ksize=3,
stride=stride,
padding=0,
dilation=1,
space_to_batch_block_shape=2,
batch_to_space_block_shape=2,
paddings=[[2, 3], [2, 3]],
crops=[[0, 1], [0, 1]],
pad_mode="valid",
use_batch_statistics=use_batch_statistics)
self.conv3 = nn.Conv2d(mid_channels,
out_channels,
kernel_size=1,
stride=1)
self.bn3 = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics)
if in_channels != out_channels:
conv = nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=stride)
bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics)
self.downsample = nn.SequentialCell([conv, bn])
else:
self.downsample = Subsample(stride)
self.add = P.TensorAdd()
self.relu = nn.ReLU()

def construct(self, x):
out = self.conv_bn1(x)
out = self.conv_bn2(out)
out = self.bn3(self.conv3(out))
out = self.add(out, x)
out = self.relu(out)
return out


class BottleneckV3(nn.Cell):
"""
ResNet V1 Bottleneck variance V1 block definition.
Args:
in_channels (int): Input channel.
out_channels (int): Output channel.
stride (int): Stride size for the initial convolutional layer. Default: 1.
Returns:
Tensor, the ResNet unit's output.
Examples:
>>> BottleneckV3(3,256,stride=2)
"""
def __init__(self,
in_channels,
out_channels,
stride=1,
use_batch_statistics=False):
super(BottleneckV3, self).__init__()
expansion = 4
mid_channels = out_channels // expansion
self.conv_bn1 = _conv_bn_relu(in_channels,
mid_channels,
ksize=1,
stride=1,
use_batch_statistics=use_batch_statistics)
self.conv_bn2 = _conv_bn_relu(mid_channels,
mid_channels,
ksize=3,
stride=stride,
padding=1,
dilation=1,
use_batch_statistics=use_batch_statistics)
self.conv3 = nn.Conv2d(mid_channels,
out_channels,
kernel_size=1,
stride=1)
self.bn3 = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics)

if in_channels != out_channels:
conv = nn.Conv2d(in_channels,
out_channels,
kernel_size=1,
stride=stride)
bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics)
self.downsample = nn.SequentialCell([conv, bn])
else:
self.downsample = Subsample(stride)
self.downsample = Subsample(stride)
self.add = P.TensorAdd()
self.relu = nn.ReLU()

def construct(self, x):
out = self.conv_bn1(x)
out = self.conv_bn2(out)
out = self.bn3(self.conv3(out))
out = self.add(out, self.downsample(x))
out = self.relu(out)
return out


class ResNetV1(nn.Cell):
"""
ResNet V1 for DeepLab.
Args:
Returns:
Tuple, output tensor tuple, (c2,c5).
Examples:
>>> ResNetV1(False)
"""
def __init__(self, fine_tune_batch_norm=False):
super(ResNetV1, self).__init__()
self.layer_root = nn.SequentialCell(
[RootBlockBeta(fine_tune_batch_norm),
nn.MaxPool2d(kernel_size=(3, 3),
stride=(2, 2),
pad_mode='same')])
self.layer1_1 = BottleneckV1(128, 256, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer1_2 = BottleneckV2(256, 256, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer1_3 = BottleneckV3(256, 256, stride=2, use_batch_statistics=fine_tune_batch_norm)
self.layer2_1 = BottleneckV1(256, 512, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer2_2 = BottleneckV2(512, 512, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer2_3 = BottleneckV2(512, 512, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer2_4 = BottleneckV3(512, 512, stride=2, use_batch_statistics=fine_tune_batch_norm)
self.layer3_1 = BottleneckV1(512, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer3_2 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer3_3 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer3_4 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer3_5 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm)
self.layer3_6 = BottleneckV2(1024, 1024, stride=1, use_batch_statistics=fine_tune_batch_norm)

self.layer4_1 = BottleneckV1(1024, 2048, stride=1, use_batch_to_stob_and_btos=True,
use_batch_statistics=fine_tune_batch_norm)
self.layer4_2 = BottleneckV2(2048, 2048, stride=1, use_batch_to_stob_and_btos=True,
use_batch_statistics=fine_tune_batch_norm)
self.layer4_3 = BottleneckV2(2048, 2048, stride=1, use_batch_to_stob_and_btos=True,
use_batch_statistics=fine_tune_batch_norm)

def construct(self, x):
x = self.layer_root(x)
x = self.layer1_1(x)
c2 = self.layer1_2(x)
x = self.layer1_3(c2)
x = self.layer2_1(x)
x = self.layer2_2(x)
x = self.layer2_3(x)
x = self.layer2_4(x)
x = self.layer3_1(x)
x = self.layer3_2(x)
x = self.layer3_3(x)
x = self.layer3_4(x)
x = self.layer3_5(x)
x = self.layer3_6(x)

x = self.layer4_1(x)
x = self.layer4_2(x)
c5 = self.layer4_3(x)
return c2, c5


class RootBlockBeta(nn.Cell):
"""
ResNet V1 beta root block definition.
Returns:
Tensor, the block unit's output.
Examples:
>>> RootBlockBeta()
"""
def __init__(self, fine_tune_batch_norm=False):
super(RootBlockBeta, self).__init__()
self.conv1 = _conv_bn_relu(3, 64, ksize=3, stride=2, padding=0, pad_mode="valid",
use_batch_statistics=fine_tune_batch_norm)
self.conv2 = _conv_bn_relu(64, 64, ksize=3, stride=1, padding=0, pad_mode="same",
use_batch_statistics=fine_tune_batch_norm)
self.conv3 = _conv_bn_relu(64, 128, ksize=3, stride=1, padding=0, pad_mode="same",
use_batch_statistics=fine_tune_batch_norm)

def construct(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
return x


def resnet50_dl(fine_tune_batch_norm=False):
return ResNetV1(fine_tune_batch_norm)

+ 33
- 0
example/deeplabv3_voc2012/src/config.py View File

@@ -0,0 +1,33 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py and evaluation.py
"""
from easydict import EasyDict as ed

config = ed({
"learning_rate": 0.0014,
"weight_decay": 0.00005,
"momentum": 0.97,
"crop_size": 513,
"eval_scales": [0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
"atrous_rates": None,
"image_pyramid": None,
"output_stride": 16,
"fine_tune_batch_norm": False,
"ignore_label": 255,
"decoder_output_stride": None,
"seg_num_classes": 21
})

+ 457
- 0
example/deeplabv3_voc2012/src/deeplabv3.py View File

@@ -0,0 +1,457 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""DeepLabv3."""

import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from .backbone.resnet_deeplab import _conv_bn_relu, resnet50_dl, _deep_conv_bn_relu, \
DepthwiseConv2dNative, SpaceToBatch, BatchToSpace


class ASPPSampleBlock(nn.Cell):
"""ASPP sample block."""
def __init__(self, feature_shape, scale_size, output_stride):
super(ASPPSampleBlock, self).__init__()
sample_h = (feature_shape[0] * scale_size + 1) / output_stride + 1
sample_w = (feature_shape[1] * scale_size + 1) / output_stride + 1
self.sample = P.ResizeBilinear((int(sample_h), int(sample_w)), align_corners=True)

def construct(self, x):
return self.sample(x)


class ASPP(nn.Cell):
"""
ASPP model for DeepLabv3.

Args:
channel (int): Input channel.
depth (int): Output channel.
feature_shape (list): The shape of feature,[h,w].
scale_sizes (list): Input scales for multi-scale feature extraction.
atrous_rates (list): Atrous rates for atrous spatial pyramid pooling.
output_stride (int): 'The ratio of input to output spatial resolution.'
fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not'

Returns:
Tensor, output tensor.

Examples:
>>> ASPP(channel=2048,256,[14,14],[1],[6],16)
"""
def __init__(self, channel, depth, feature_shape, scale_sizes,
atrous_rates, output_stride, fine_tune_batch_norm=False):
super(ASPP, self).__init__()
self.aspp0 = _conv_bn_relu(channel,
depth,
ksize=1,
stride=1,
use_batch_statistics=fine_tune_batch_norm)
self.atrous_rates = []
if atrous_rates is not None:
self.atrous_rates = atrous_rates
self.aspp_pointwise = _conv_bn_relu(channel,
depth,
ksize=1,
stride=1,
use_batch_statistics=fine_tune_batch_norm)
self.aspp_depth_depthwiseconv = DepthwiseConv2dNative(channel,
channel_multiplier=1,
kernel_size=3,
stride=1,
dilation=1,
pad_mode="valid")
self.aspp_depth_bn = nn.BatchNorm2d(1 * channel, use_batch_statistics=fine_tune_batch_norm)
self.aspp_depth_relu = nn.ReLU()
self.aspp_depths = []
self.aspp_depth_spacetobatchs = []
self.aspp_depth_batchtospaces = []

for scale_size in scale_sizes:
aspp_scale_depth_size = np.ceil((feature_shape[0]*scale_size)/16)
if atrous_rates is None:
break
for rate in atrous_rates:
padding = 0
for j in range(100):
padded_size = rate * j
if padded_size >= aspp_scale_depth_size + 2 * rate:
padding = padded_size - aspp_scale_depth_size - 2 * rate
break
paddings = [[rate, rate + int(padding)],
[rate, rate + int(padding)]]
self.aspp_depth_spacetobatch = SpaceToBatch(rate, paddings)
self.aspp_depth_spacetobatchs.append(self.aspp_depth_spacetobatch)
crops = [[0, int(padding)], [0, int(padding)]]
self.aspp_depth_batchtospace = BatchToSpace(rate, crops)
self.aspp_depth_batchtospaces.append(self.aspp_depth_batchtospace)
self.aspp_depths = nn.CellList(self.aspp_depths)
self.aspp_depth_spacetobatchs = nn.CellList(self.aspp_depth_spacetobatchs)
self.aspp_depth_batchtospaces = nn.CellList(self.aspp_depth_batchtospaces)

self.global_pooling = nn.AvgPool2d(kernel_size=(int(feature_shape[0]), int(feature_shape[1])))
self.global_poolings = []
for scale_size in scale_sizes:
pooling_h = np.ceil((feature_shape[0]*scale_size)/output_stride)
pooling_w = np.ceil((feature_shape[0]*scale_size)/output_stride)
self.global_poolings.append(nn.AvgPool2d(kernel_size=(int(pooling_h), int(pooling_w))))
self.global_poolings = nn.CellList(self.global_poolings)
self.conv_bn = _conv_bn_relu(channel,
depth,
ksize=1,
stride=1,
use_batch_statistics=fine_tune_batch_norm)
self.samples = []
for scale_size in scale_sizes:
self.samples.append(ASPPSampleBlock(feature_shape, scale_size, output_stride))
self.samples = nn.CellList(self.samples)
self.feature_shape = feature_shape
self.concat = P.Concat(axis=1)

def construct(self, x, scale_index=0):
aspp0 = self.aspp0(x)
aspp1 = self.global_poolings[scale_index](x)
aspp1 = self.conv_bn(aspp1)
aspp1 = self.samples[scale_index](aspp1)
output = self.concat((aspp1, aspp0))

for i in range(len(self.atrous_rates)):
aspp_i = self.aspp_depth_spacetobatchs[i + scale_index * len(self.atrous_rates)](x)
aspp_i = self.aspp_depth_depthwiseconv(aspp_i)
aspp_i = self.aspp_depth_batchtospaces[i + scale_index * len(self.atrous_rates)](aspp_i)
aspp_i = self.aspp_depth_bn(aspp_i)
aspp_i = self.aspp_depth_relu(aspp_i)
aspp_i = self.aspp_pointwise(aspp_i)
output = self.concat((output, aspp_i))
return output


class DecoderSampleBlock(nn.Cell):
"""Decoder sample block."""
def __init__(self, feature_shape, scale_size=1.0, decoder_output_stride=4):
super(DecoderSampleBlock, self).__init__()
sample_h = (feature_shape[0] * scale_size + 1) / decoder_output_stride + 1
sample_w = (feature_shape[1] * scale_size + 1) / decoder_output_stride + 1
self.sample = P.ResizeBilinear((int(sample_h), int(sample_w)), align_corners=True)

def construct(self, x):
return self.sample(x)


class Decoder(nn.Cell):
"""
Decode module for DeepLabv3.
Args:
low_level_channel (int): Low level input channel
channel (int): Input channel.
depth (int): Output channel.
feature_shape (list): 'Input image shape, [N,C,H,W].'
scale_sizes (list): 'Input scales for multi-scale feature extraction.'
decoder_output_stride (int): 'The ratio of input to output spatial resolution'
fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not'
Returns:
Tensor, output tensor.
Examples:
>>> Decoder(256, 100, [56,56])
"""
def __init__(self,
low_level_channel,
channel,
depth,
feature_shape,
scale_sizes,
decoder_output_stride,
fine_tune_batch_norm):
super(Decoder, self).__init__()
self.feature_projection = _conv_bn_relu(low_level_channel, 48, ksize=1, stride=1,
pad_mode="same", use_batch_statistics=fine_tune_batch_norm)
self.decoder_depth0 = _deep_conv_bn_relu(channel + 48,
channel_multiplier=1,
ksize=3,
stride=1,
pad_mode="same",
dilation=1,
use_batch_statistics=fine_tune_batch_norm)
self.decoder_pointwise0 = _conv_bn_relu(channel + 48,
depth,
ksize=1,
stride=1,
use_batch_statistics=fine_tune_batch_norm)
self.decoder_depth1 = _deep_conv_bn_relu(depth,
channel_multiplier=1,
ksize=3,
stride=1,
pad_mode="same",
dilation=1,
use_batch_statistics=fine_tune_batch_norm)
self.decoder_pointwise1 = _conv_bn_relu(depth,
depth,
ksize=1,
stride=1,
use_batch_statistics=fine_tune_batch_norm)
self.depth = depth
self.concat = P.Concat(axis=1)
self.samples = []
for scale_size in scale_sizes:
self.samples.append(DecoderSampleBlock(feature_shape, scale_size, decoder_output_stride))
self.samples = nn.CellList(self.samples)

def construct(self, x, low_level_feature, scale_index):
low_level_feature = self.feature_projection(low_level_feature)
low_level_feature = self.samples[scale_index](low_level_feature)
x = self.samples[scale_index](x)
output = self.concat((x, low_level_feature))
output = self.decoder_depth0(output)
output = self.decoder_pointwise0(output)
output = self.decoder_depth1(output)
output = self.decoder_pointwise1(output)
return output


class SingleDeepLabV3(nn.Cell):
"""
DeepLabv3 Network.
Args:
num_classes (int): Class number.
feature_shape (list): Input image shape, [N,C,H,W].
backbone (Cell): Backbone Network.
channel (int): Resnet output channel.
depth (int): ASPP block depth.
scale_sizes (list): Input scales for multi-scale feature extraction.
atrous_rates (list): Atrous rates for atrous spatial pyramid pooling.
decoder_output_stride (int): 'The ratio of input to output spatial resolution'
output_stride (int): 'The ratio of input to output spatial resolution.'
fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not'
Returns:
Tensor, output tensor.
Examples:
>>> SingleDeepLabV3(num_classes=10,
>>> feature_shape=[1,3,224,224],
>>> backbone=resnet50_dl(),
>>> channel=2048,
>>> depth=256)
>>> scale_sizes=[1.0])
>>> atrous_rates=[6])
>>> decoder_output_stride=4)
>>> output_stride=16)
"""

def __init__(self,
num_classes,
feature_shape,
backbone,
channel,
depth,
scale_sizes,
atrous_rates,
decoder_output_stride,
output_stride,
fine_tune_batch_norm=False):
super(SingleDeepLabV3, self).__init__()
self.num_classes = num_classes
self.channel = channel
self.depth = depth
self.scale_sizes = []
for scale_size in np.sort(scale_sizes):
self.scale_sizes.append(scale_size)
self.net = backbone
self.aspp = ASPP(channel=self.channel,
depth=self.depth,
feature_shape=[feature_shape[2],
feature_shape[3]],
scale_sizes=self.scale_sizes,
atrous_rates=atrous_rates,
output_stride=output_stride,
fine_tune_batch_norm=fine_tune_batch_norm)
self.aspp.add_flags(loop_can_unroll=True)
atrous_rates_len = 0
if atrous_rates is not None:
atrous_rates_len = len(atrous_rates)
self.fc1 = _conv_bn_relu(depth * (2 + atrous_rates_len), depth,
ksize=1,
stride=1,
use_batch_statistics=fine_tune_batch_norm)
self.fc2 = nn.Conv2d(depth,
num_classes,
kernel_size=1,
stride=1,
has_bias=True)
self.upsample = P.ResizeBilinear((int(feature_shape[2]),
int(feature_shape[3])),
align_corners=True)
self.samples = []
for scale_size in self.scale_sizes:
self.samples.append(SampleBlock(feature_shape, scale_size))
self.samples = nn.CellList(self.samples)
self.feature_shape = [float(feature_shape[0]), float(feature_shape[1]), float(feature_shape[2]),
float(feature_shape[3])]

self.pad = P.Pad(((0, 0), (0, 0), (1, 1), (1, 1)))
self.dropout = nn.Dropout(keep_prob=0.9)
self.shape = P.Shape()
self.decoder_output_stride = decoder_output_stride
if decoder_output_stride is not None:
self.decoder = Decoder(low_level_channel=depth,
channel=depth,
depth=depth,
feature_shape=[feature_shape[2],
feature_shape[3]],
scale_sizes=self.scale_sizes,
decoder_output_stride=decoder_output_stride,
fine_tune_batch_norm=fine_tune_batch_norm)

def construct(self, x, scale_index=0):
x = (2.0 / 255.0) * x - 1.0
x = self.pad(x)
low_level_feature, feature_map = self.net(x)
for scale_size in self.scale_sizes:
if scale_size * self.feature_shape[2] + 1.0 >= self.shape(x)[2] - 2:
output = self.aspp(feature_map, scale_index)
output = self.fc1(output)
if self.decoder_output_stride is not None:
output = self.decoder(output, low_level_feature, scale_index)
output = self.fc2(output)
output = self.samples[scale_index](output)
return output
scale_index += 1
return feature_map


class SampleBlock(nn.Cell):
"""Sample block."""
def __init__(self,
feature_shape,
scale_size=1.0):
super(SampleBlock, self).__init__()
sample_h = np.ceil(float(feature_shape[2]) * scale_size)
sample_w = np.ceil(float(feature_shape[3]) * scale_size)
self.sample = P.ResizeBilinear((int(sample_h), int(sample_w)), align_corners=True)

def construct(self, x):
return self.sample(x)


class DeepLabV3(nn.Cell):
"""DeepLabV3 model."""
def __init__(self, num_classes, feature_shape, backbone, channel, depth, infer_scale_sizes, atrous_rates,
decoder_output_stride, output_stride, fine_tune_batch_norm, image_pyramid):
super(DeepLabV3, self).__init__()
self.infer_scale_sizes = []
if infer_scale_sizes is not None:
self.infer_scale_sizes = infer_scale_sizes

self.infer_scale_sizes = infer_scale_sizes
if image_pyramid is None:
image_pyramid = [1.0]

self.image_pyramid = image_pyramid
scale_sizes = []
for pyramid in image_pyramid:
scale_sizes.append(pyramid)
for scale in infer_scale_sizes:
scale_sizes.append(scale)
self.samples = []
for scale_size in scale_sizes:
self.samples.append(SampleBlock(feature_shape, scale_size))
self.samples = nn.CellList(self.samples)
self.deeplabv3 = SingleDeepLabV3(num_classes=num_classes,
feature_shape=feature_shape,
backbone=resnet50_dl(fine_tune_batch_norm),
channel=channel,
depth=depth,
scale_sizes=scale_sizes,
atrous_rates=atrous_rates,
decoder_output_stride=decoder_output_stride,
output_stride=output_stride,
fine_tune_batch_norm=fine_tune_batch_norm)
self.softmax = P.Softmax(axis=1)
self.concat = P.Concat(axis=2)
self.expand_dims = P.ExpandDims()
self.reduce_mean = P.ReduceMean()
self.sample_common = P.ResizeBilinear((int(feature_shape[2]),
int(feature_shape[3])),
align_corners=True)

def construct(self, x):
logits = ()
if self.training:
if len(self.image_pyramid) >= 1:
if self.image_pyramid[0] == 1:
logits = self.deeplabv3(x)
else:
x1 = self.samples[0](x)
logits = self.deeplabv3(x1)
logits = self.sample_common(logits)
logits = self.expand_dims(logits, 2)
for i in range(len(self.image_pyramid) - 1):
x_i = self.samples[i + 1](x)
logits_i = self.deeplabv3(x_i)
logits_i = self.sample_common(logits_i)
logits_i = self.expand_dims(logits_i, 2)
logits = self.concat((logits, logits_i))
logits = self.reduce_mean(logits, 2)
return logits
if len(self.infer_scale_sizes) >= 1:
infer_index = len(self.image_pyramid)
x1 = self.samples[infer_index](x)
logits = self.deeplabv3(x1)
logits = self.sample_common(logits)
logits = self.softmax(logits)
logits = self.expand_dims(logits, 2)
for i in range(len(self.infer_scale_sizes) - 1):
x_i = self.samples[i + 1 + infer_index](x)
logits_i = self.deeplabv3(x_i)
logits_i = self.sample_common(logits_i)
logits_i = self.softmax(logits_i)
logits_i = self.expand_dims(logits_i, 2)
logits = self.concat((logits, logits_i))
logits = self.reduce_mean(logits, 2)
return logits


def deeplabv3_resnet50(num_classes, feature_shape, image_pyramid,
infer_scale_sizes, atrous_rates=None, decoder_output_stride=None,
output_stride=16, fine_tune_batch_norm=False):
"""
ResNet50 based DeepLabv3 network.

Args:
num_classes (int): Class number.
feature_shape (list): Input image shape, [N,C,H,W].
image_pyramid (list): Input scales for multi-scale feature extraction.
atrous_rates (list): Atrous rates for atrous spatial pyramid pooling.
infer_scale_sizes (list): 'The scales to resize images for inference.
decoder_output_stride (int): 'The ratio of input to output spatial resolution'
output_stride (int): 'The ratio of input to output spatial resolution.'
fine_tune_batch_norm (bool): 'Fine tune the batch norm parameters or not'

Returns:
Cell, cell instance of ResNet50 based DeepLabv3 neural network.

Examples:
>>> deeplabv3_resnet50(100, [1,3,224,224],[1.0],[1.0])
"""
return DeepLabV3(num_classes=num_classes,
feature_shape=feature_shape,
backbone=resnet50_dl(fine_tune_batch_norm),
channel=2048,
depth=256,
infer_scale_sizes=infer_scale_sizes,
atrous_rates=atrous_rates,
decoder_output_stride=decoder_output_stride,
output_stride=output_stride,
fine_tune_batch_norm=fine_tune_batch_norm,
image_pyramid=image_pyramid)

+ 84
- 0
example/deeplabv3_voc2012/src/ei_dataset.py View File

@@ -0,0 +1,84 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Process Dataset."""
import abc
import os
import time

from .utils.adapter import get_raw_samples, read_image


class BaseDataset:
"""
Create dataset.

Args:
data_url (str): The path of data.
usage (str): Whether to use train or eval (default='train').

Returns:
Dataset.
"""
def __init__(self, data_url, usage):
self.data_url = data_url
self.usage = usage
self.cur_index = 0
self.samples = []
_s_time = time.time()
self._load_samples()
_e_time = time.time()
print(f"load samples success~, time cost = {_e_time - _s_time}")

def __getitem__(self, item):
sample = self.samples[item]
return self._next_data(sample)

def __len__(self):
return len(self.samples)

@staticmethod
def _next_data(sample):
image_path = sample[0]
mask_image_path = sample[1]

image = read_image(image_path)
mask_image = read_image(mask_image_path)
return [image, mask_image]

@abc.abstractmethod
def _load_samples(self):
pass


class HwVocRawDataset(BaseDataset):
"""
Create dataset with raw data.

Args:
data_url (str): The path of data.
usage (str): Whether to use train or eval (default='train').

Returns:
Dataset.
"""
def __init__(self, data_url, usage="train"):
super().__init__(data_url, usage)

def _load_samples(self):
try:
self.samples = get_raw_samples(os.path.join(self.data_url, self.usage))
except Exception as e:
print("load HwVocRawDataset failed!!!")
raise e

+ 63
- 0
example/deeplabv3_voc2012/src/losses.py View File

@@ -0,0 +1,63 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""OhemLoss."""
import mindspore.nn as nn
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore.ops import functional as F


class OhemLoss(nn.Cell):
"""Ohem loss cell."""
def __init__(self, num, ignore_label):
super(OhemLoss, self).__init__()
self.mul = P.Mul()
self.shape = P.Shape()
self.one_hot = nn.OneHot(-1, num, 1.0, 0.0)
self.squeeze = P.Squeeze()
self.num = num
self.cross_entropy = P.SoftmaxCrossEntropyWithLogits()
self.mean = P.ReduceMean()
self.select = P.Select()
self.reshape = P.Reshape()
self.cast = P.Cast()
self.not_equal = P.NotEqual()
self.equal = P.Equal()
self.reduce_sum = P.ReduceSum(keep_dims=False)
self.fill = P.Fill()
self.transpose = P.Transpose()
self.ignore_label = ignore_label
self.loss_weight = 1.0

def construct(self, logits, labels):
logits = self.transpose(logits, (0, 2, 3, 1))
logits = self.reshape(logits, (-1, self.num))
labels = F.cast(labels, mstype.int32)
labels = self.reshape(labels, (-1,))
one_hot_labels = self.one_hot(labels)
losses = self.cross_entropy(logits, one_hot_labels)[0]
weights = self.cast(self.not_equal(labels, self.ignore_label), mstype.float32) * self.loss_weight
weighted_losses = self.mul(losses, weights)
loss = self.reduce_sum(weighted_losses, (0,))
zeros = self.fill(mstype.float32, self.shape(weights), 0.0)
ones = self.fill(mstype.float32, self.shape(weights), 1.0)
present = self.select(self.equal(weights, zeros), zeros, ones)
present = self.reduce_sum(present, (0,))

zeros = self.fill(mstype.float32, self.shape(present), 0.0)
min_control = self.fill(mstype.float32, self.shape(present), 1.0)
present = self.select(self.equal(present, zeros), min_control, present)
loss = loss / present
return loss

+ 115
- 0
example/deeplabv3_voc2012/src/md_dataset.py View File

@@ -0,0 +1,115 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Dataset module."""
from PIL import Image
import mindspore.dataset as de
import mindspore.dataset.transforms.vision.c_transforms as C

from .ei_dataset import HwVocRawDataset
from .utils import custom_transforms as tr


class DataTransform:
"""Transform dataset for DeepLabV3."""

def __init__(self, args, usage):
self.args = args
self.usage = usage

def __call__(self, image, label):
if self.usage == "train":
return self._train(image, label)
if self.usage == "eval":
return self._eval(image, label)
return None

def _train(self, image, label):
"""
Process training data.

Args:
image (list): Image data.
label (list): Dataset label.
"""
image = Image.fromarray(image)
label = Image.fromarray(label)

rsc_tr = tr.RandomScaleCrop(base_size=self.args.base_size, crop_size=self.args.crop_size)
image, label = rsc_tr(image, label)

rhf_tr = tr.RandomHorizontalFlip()
image, label = rhf_tr(image, label)

nor_tr = tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
image, label = nor_tr(image, label)

return image, label

def _eval(self, image, label):
"""
Process eval data.

Args:
image (list): Image data.
label (list): Dataset label.
"""
image = Image.fromarray(image)
label = Image.fromarray(label)

fsc_tr = tr.FixScaleCrop(crop_size=self.args.crop_size)
image, label = fsc_tr(image, label)

nor_tr = tr.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
image, label = nor_tr(image, label)

return image, label


def create_dataset(args, data_url, epoch_num=1, batch_size=1, usage="train"):
"""
Create Dataset for DeepLabV3.

Args:
args (dict): Train parameters.
data_url (str): Dataset path.
epoch_num (int): Epoch of dataset (default=1).
batch_size (int): Batch size of dataset (default=1).
usage (str): Whether is use to train or eval (default='train').

Returns:
Dataset.
"""
# create iter dataset
dataset = HwVocRawDataset(data_url, usage=usage)
dataset_len = len(dataset)

# wrapped with GeneratorDataset
dataset = de.GeneratorDataset(dataset, ["image", "label"], sampler=None)
dataset.set_dataset_size(dataset_len)
dataset = dataset.map(input_columns=["image", "label"], operations=DataTransform(args, usage=usage))

channelswap_op = C.HWC2CHW()
dataset = dataset.map(input_columns="image", operations=channelswap_op)

# 1464 samples / batch_size 8 = 183 batches
# epoch_num is num of steps
# 3658 steps / 183 = 20 epochs
if usage == "train":
dataset = dataset.shuffle(1464)
dataset = dataset.batch(batch_size, drop_remainder=(usage == "train"))
dataset = dataset.repeat(count=epoch_num)
dataset.map_model = 4

return dataset

+ 72
- 0
example/deeplabv3_voc2012/src/miou_precision.py View File

@@ -0,0 +1,72 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""mIou."""
import numpy as np
from mindspore.nn.metrics.metric import Metric


def confuse_matrix(target, pred, n):
k = (target >= 0) & (target < n)
return np.bincount(n * target[k].astype(int) + pred[k], minlength=n ** 2).reshape(n, n)


def iou(hist):
denominator = hist.sum(1) + hist.sum(0) - np.diag(hist)
res = np.diag(hist) / np.where(denominator > 0, denominator, 1)
res = np.sum(res) / np.count_nonzero(denominator)
return res


class MiouPrecision(Metric):
"""Calculate miou precision."""
def __init__(self, num_class=21):
super(MiouPrecision, self).__init__()
if not isinstance(num_class, int):
raise TypeError('num_class should be integer type, but got {}'.format(type(num_class)))
if num_class < 1:
raise ValueError('num_class must be at least 1, but got {}'.format(num_class))
self._num_class = num_class
self._mIoU = []
self.clear()

def clear(self):
self._hist = np.zeros((self._num_class, self._num_class))
self._mIoU = []

def update(self, *inputs):
if len(inputs) != 2:
raise ValueError('Need 2 inputs (y_pred, y), but got {}'.format(len(inputs)))
predict_in = self._convert_data(inputs[0])
label_in = self._convert_data(inputs[1])
if predict_in.shape[1] != self._num_class:
raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} '
'classes'.format(self._num_class, predict_in.shape[1]))
pred = np.argmax(predict_in, axis=1)
label = label_in
if len(label.flatten()) != len(pred.flatten()):
print('Skipping: len(gt) = {:d}, len(pred) = {:d}'.format(len(label.flatten()), len(pred.flatten())))
raise ValueError('Class number not match, last input data contain {} classes, but current data contain {} '
'classes'.format(self._num_class, predict_in.shape[1]))
self._hist = confuse_matrix(label.flatten(), pred.flatten(), self._num_class)
mIoUs = iou(self._hist)
self._mIoU.append(mIoUs)

def eval(self):
"""
Computes the mIoU categorical accuracy.
"""
mIoU = np.nanmean(self._mIoU)
print('mIoU = {}'.format(mIoU))
return mIoU

+ 14
- 0
example/deeplabv3_voc2012/src/utils/__init__.py View File

@@ -0,0 +1,14 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

+ 67
- 0
example/deeplabv3_voc2012/src/utils/adapter.py View File

@@ -0,0 +1,67 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Adapter dataset."""
import fnmatch
import io
import os

import numpy as np
from PIL import Image

from ..utils import file_io


def get_raw_samples(data_url):
"""
Get dataset from raw data.

Args:
data_url (str): Dataset path.

Returns:
list, a file list.
"""
def _list_files(dir_path, pattern):
full_files = []
_, _, files = next(file_io.walk(dir_path))
for f in files:
if fnmatch.fnmatch(f.lower(), pattern.lower()):
full_files.append(os.path.join(dir_path, f))
return full_files

img_files = _list_files(os.path.join(data_url, "Images"), "*.jpg")
seg_files = _list_files(os.path.join(data_url, "SegmentationClassRaw"), "*.png")

files = []
for img_file in img_files:
_, file_name = os.path.split(img_file)
name, _ = os.path.splitext(file_name)
seg_file = os.path.join(data_url, "SegmentationClassRaw", ".".join([name, "png"]))
if seg_file in seg_files:
files.append([img_file, seg_file])
return files


def read_image(img_path):
"""
Read image from file.

Args:
img_path (str): image path.
"""
img = file_io.read(img_path.strip(), binary=True)
data = io.BytesIO(img)
img = Image.open(data)
return np.array(img)

+ 148
- 0
example/deeplabv3_voc2012/src/utils/custom_transforms.py View File

@@ -0,0 +1,148 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the License);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# httpwww.apache.orglicensesLICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Random process dataset."""
import random

import numpy as np
from PIL import Image, ImageOps, ImageFilter


class Normalize:
"""Normalize a tensor image with mean and standard deviation.
Args:
mean (tuple): means for each channel.
std (tuple): standard deviations for each channel.
"""

def __init__(self, mean=(0., 0., 0.), std=(1., 1., 1.)):
self.mean = mean
self.std = std

def __call__(self, img, mask):
img = np.array(img).astype(np.float32)
mask = np.array(mask).astype(np.float32)

return img, mask


class RandomHorizontalFlip:
"""Randomly decide whether to horizontal flip."""
def __call__(self, img, mask):
if random.random() < 0.5:
img = img.transpose(Image.FLIP_LEFT_RIGHT)
mask = mask.transpose(Image.FLIP_LEFT_RIGHT)

return img, mask


class RandomRotate:
"""
Randomly decide whether to rotate.

Args:
degree (float): The degree of rotate.
"""
def __init__(self, degree):
self.degree = degree

def __call__(self, img, mask):
rotate_degree = random.uniform(-1 * self.degree, self.degree)
img = img.rotate(rotate_degree, Image.BILINEAR)
mask = mask.rotate(rotate_degree, Image.NEAREST)

return img, mask


class RandomGaussianBlur:
"""Randomly decide whether to filter image with gaussian blur."""
def __call__(self, img, mask):
if random.random() < 0.5:
img = img.filter(ImageFilter.GaussianBlur(
radius=random.random()))

return img, mask


class RandomScaleCrop:
"""Randomly decide whether to scale and crop image."""
def __init__(self, base_size, crop_size, fill=0):
self.base_size = base_size
self.crop_size = crop_size
self.fill = fill

def __call__(self, img, mask):
# random scale (short edge)
short_size = random.randint(int(self.base_size * 0.5), int(self.base_size * 2.0))
w, h = img.size
if h > w:
ow = short_size
oh = int(1.0 * h * ow / w)
else:
oh = short_size
ow = int(1.0 * w * oh / h)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# pad crop
if short_size < self.crop_size:
padh = self.crop_size - oh if oh < self.crop_size else 0
padw = self.crop_size - ow if ow < self.crop_size else 0
img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=self.fill)
# random crop crop_size
w, h = img.size
x1 = random.randint(0, w - self.crop_size)
y1 = random.randint(0, h - self.crop_size)
img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))

return img, mask


class FixScaleCrop:
"""Scale and crop image with fixing size."""
def __init__(self, crop_size):
self.crop_size = crop_size

def __call__(self, img, mask):
w, h = img.size
if w > h:
oh = self.crop_size
ow = int(1.0 * w * oh / h)
else:
ow = self.crop_size
oh = int(1.0 * h * ow / w)
img = img.resize((ow, oh), Image.BILINEAR)
mask = mask.resize((ow, oh), Image.NEAREST)
# center crop
w, h = img.size
x1 = int(round((w - self.crop_size) / 2.))
y1 = int(round((h - self.crop_size) / 2.))
img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))

return img, mask


class FixedResize:
"""Resize image with fixing size."""
def __init__(self, size):
self.size = (size, size)

def __call__(self, img, mask):
assert img.size == mask.size

img = img.resize(self.size, Image.BILINEAR)
mask = mask.resize(self.size, Image.NEAREST)
return img, mask

+ 36
- 0
example/deeplabv3_voc2012/src/utils/file_io.py View File

@@ -0,0 +1,36 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""File operation module."""
import os


def _is_obs(url):
return url.startswith("obs://") or url.startswith("s3://")


def read(url, binary=False):
if _is_obs(url):
# TODO read cloud file.
return None

with open(url, "rb" if binary else "r") as f:
return f.read()


def walk(url):
if _is_obs(url):
# TODO read cloud file.
return None
return os.walk(url)

+ 92
- 0
example/deeplabv3_voc2012/train.py View File

@@ -0,0 +1,92 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train."""
import argparse
from mindspore import context
from mindspore.communication.management import init
from mindspore.nn.optim.momentum import Momentum
from mindspore import Model, ParallelMode
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.train.callback import Callback, CheckpointConfig, ModelCheckpoint, TimeMonitor
from src.md_dataset import create_dataset
from src.losses import OhemLoss
from src.deeplabv3 import deeplabv3_resnet50
from src.config import config

parser = argparse.ArgumentParser(description="Deeplabv3 training")
parser.add_argument("--distribute", type=str, default="false", help="Run distribute, default is false.")
parser.add_argument('--epoch_size', type=int, default=6, help='Epoch size.')
parser.add_argument('--batch_size', type=int, default=2, help='Batch size.')
parser.add_argument('--data_url', required=True, default=None, help='Train data url')
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path')
parser.add_argument("--enable_save_ckpt", type=str, default="true", help="Enable save checkpoint, default is true.")
parser.add_argument("--save_checkpoint_steps", type=int, default=1000, help="Save checkpoint steps, default is 1000.")
parser.add_argument("--save_checkpoint_num", type=int, default=1, help="Save checkpoint numbers, default is 1.")
args_opt = parser.parse_args()
print(args_opt)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
class LossCallBack(Callback):
"""
Monitor the loss in training.
Note:
if per_print_times is 0 do not print loss.
Args:
per_print_times (int): Print loss every times. Default: 1.
"""
def __init__(self, per_print_times=1):
super(LossCallBack, self).__init__()
if not isinstance(per_print_times, int) or per_print_times < 0:
raise ValueError("print_step must be int and >= 0")
self._per_print_times = per_print_times
def step_end(self, run_context):
cb_params = run_context.original_args()
print("epoch: {}, step: {}, outputs are {}".format(cb_params.cur_epoch_num, cb_params.cur_step_num,
str(cb_params.net_outputs)))
def model_fine_tune(flags, train_net, fix_weight_layer):
checkpoint_path = flags.checkpoint_url
if checkpoint_path is None:
return
param_dict = load_checkpoint(checkpoint_path)
load_param_into_net(train_net, param_dict)
for para in train_net.trainable_params():
if fix_weight_layer in para.name:
para.requires_grad = False
if __name__ == "__main__":
if args_opt.distribute == "true":
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True)
init()
args_opt.base_size = config.crop_size
args_opt.crop_size = config.crop_size
train_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="train")
dataset_size = train_dataset.get_dataset_size()
time_cb = TimeMonitor(data_size=dataset_size)
callback = [time_cb, LossCallBack()]
if args_opt.enable_save_ckpt == "true":
config_ck = CheckpointConfig(save_checkpoint_steps=args_opt.save_checkpoint_steps,
keep_checkpoint_max=args_opt.save_checkpoint_num)
ckpoint_cb = ModelCheckpoint(prefix='checkpoint_deeplabv3', config=config_ck)
callback.append(ckpoint_cb)
net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size],
infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates,
decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride,
fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid)
net.set_train()
model_fine_tune(args_opt, net, 'layer')
loss = OhemLoss(config.seg_num_classes, config.ignore_label)
opt = Momentum(filter(lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'depth' not in x.name and 'bias' not in x.name, net.trainable_params()), learning_rate=config.learning_rate, momentum=config.momentum, weight_decay=config.weight_decay)
model = Model(net, loss, opt)
model.train(args_opt.epoch_size, train_dataset, callback)

+ 142
- 0
example/fasterrcnn_coco2017/README.md View File

@@ -0,0 +1,142 @@
# FasterRcnn Example
## Description
FasterRcnn is a two-stage target detection network,This network uses a region proposal network (RPN), which can share the convolution features of the whole image with the detection network, so that the calculation of region proposal is almost cost free. The whole network further combines RPN and FastRcnn into a network by sharing the convolution features.

## Requirements

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the dataset COCO2017.

- We use coco2017 as training dataset in this example by default, and you can also use your own datasets.

1. If coco dataset is used. **Select dataset to coco when run script.**
Install Cython and pycocotool, and you can also install mmcv to process data.

```
pip install Cython

pip install pycocotools

pip install mmcv
```
And change the COCO_ROOT and other settings you need in `config.py`. The directory structure is as follows:


```
.
└─cocodataset
├─annotations
├─instance_train2017.json
└─instance_val2017.json
├─val2017
└─train2017
```

2. If your own dataset is used. **Select dataset to other when run script.**
Organize the dataset infomation into a TXT file, each row in the file is as follows:

```
train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
```

Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), `IMAGE_DIR` and `ANNO_PATH` are setting in `config.py`.


## Example structure
```shell
.
└─FasterRcnn
├─README.md
├─scripts
├─run_download_process_data.sh
├─run_standalone_train.sh
├─run_train.sh
└─run_eval.sh
├─src
├─FasterRcnn
├─__init__.py
├─anchor_generator.py
├─bbox_assign_sample.py
├─bbox_assign_sample_stage2.py
├─faster_rcnn_r50.py
├─fpn_neck.py
├─proposal_generator.py
├─rcnn.py
├─resnet50.py
├─roi_align.py
└─rpn.py
├─config.py
├─dataset.py
├─lr_schedule.py
├─network_define.py
└─util.py
├─eval.py
└─train.py
```

## Running the example

### Train
#### Usage

```
# distributed training
sh run_distribute_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_MODEL]
# standalone training
sh run_standalone_train.sh [PRETRAINED_MODEL]
```
> About rank_table.json, you can refer to the [distributed training tutorial](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html).

#### Result
Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". You can find checkpoint file together with result like the followings in loss.log.

```
# distribute training result(8p)
epoch: 1 step: 7393, rpn_loss: 0.12054, rcnn_loss: 0.40601, rpn_cls_loss: 0.04025, rpn_reg_loss: 0.08032, rcnn_cls_loss: 0.25854, rcnn_reg_loss: 0.14746, total_loss: 0.52655
epoch: 2 step: 7393, rpn_loss: 0.06561, rcnn_loss: 0.50293, rpn_cls_loss: 0.02587, rpn_reg_loss: 0.03967, rcnn_cls_loss: 0.35669, rcnn_reg_loss: 0.14624, total_loss: 0.56854
epoch: 3 step: 7393, rpn_loss: 0.06940, rcnn_loss: 0.49658, rpn_cls_loss: 0.03769, rpn_reg_loss: 0.03165, rcnn_cls_loss: 0.36353, rcnn_reg_loss: 0.13318, total_loss: 0.56598
...
epoch: 10 step: 7393, rpn_loss: 0.03555, rcnn_loss: 0.32666, rpn_cls_loss: 0.00697, rpn_reg_loss: 0.02859, rcnn_cls_loss: 0.16125, rcnn_reg_loss: 0.16541, total_loss: 0.36221
epoch: 11 step: 7393, rpn_loss: 0.19849, rcnn_loss: 0.47827, rpn_cls_loss: 0.11639, rpn_reg_loss: 0.08209, rcnn_cls_loss: 0.29712, rcnn_reg_loss: 0.18115, total_loss: 0.67676
epoch: 12 step: 7393, rpn_loss: 0.00691, rcnn_loss: 0.10168, rpn_cls_loss: 0.00529, rpn_reg_loss: 0.00162, rcnn_cls_loss: 0.05426, rcnn_reg_loss: 0.04745, total_loss: 0.10859
```

### Infer
#### Usage
```
# infer
sh run_infer.sh [VALIDATION_DATASET_PATH] [CHECKPOINT_PATH]
```
> checkpoint can be produced in training process.

#### Result
Inference result will be stored in the example path, whose folder name is "infer". Under this, you can find result like the followings in log.
```
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.360
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.586
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.385
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.229
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.402
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.441
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.299
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.487
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.515
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.346
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.562
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.631
```

+ 130
- 0
example/fasterrcnn_coco2017/eval.py View File

@@ -0,0 +1,130 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# less required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""Evaluation for FasterRcnn"""
import os
import argparse
import time
import random
import numpy as np
from pycocotools.coco import COCO
from mindspore import context, Tensor
from mindspore.train.serialization import load_checkpoint, load_param_into_net
import mindspore.dataset.engine as de

from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50
from src.config import config
from src.dataset import data_to_mindrecord_byte_image, create_fasterrcnn_dataset
from src.util import coco_eval, bbox2result_1image, results2json

random.seed(1)
np.random.seed(1)
de.config.set_seed(1)

parser = argparse.ArgumentParser(description="FasterRcnn evaluation")
parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.")
parser.add_argument("--ann_file", type=str, default="val.json", help="Ann file, default is val.json.")
parser.add_argument("--checkpoint_path", type=str, required=True, help="Checkpoint file path.")
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
args_opt = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id)

def FasterRcnn_eval(dataset_path, ckpt_path, ann_file):
"""FasterRcnn evaluation."""
ds = create_fasterrcnn_dataset(dataset_path, batch_size=config.test_batch_size,
repeat_num=1, is_training=False)
net = Faster_Rcnn_Resnet50(config)
param_dict = load_checkpoint(ckpt_path)
load_param_into_net(net, param_dict)
net.set_train(False)

eval_iter = 0
total = ds.get_dataset_size()
outputs = []
dataset_coco = COCO(ann_file)

print("\n========================================\n")
print("total images num: ", total)
print("Processing, please wait a moment.")
max_num = 128
for data in ds.create_dict_iterator():
eval_iter = eval_iter + 1

img_data = data['image']
img_metas = data['image_shape']
gt_bboxes = data['box']
gt_labels = data['label']
gt_num = data['valid_num']

start = time.time()
# run net
output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num))
end = time.time()
print("Iter {} cost time {}".format(eval_iter, end - start))

# output
all_bbox = output[0]
all_label = output[1]
all_mask = output[2]

for j in range(config.test_batch_size):
all_bbox_squee = np.squeeze(all_bbox.asnumpy()[j, :, :])
all_label_squee = np.squeeze(all_label.asnumpy()[j, :, :])
all_mask_squee = np.squeeze(all_mask.asnumpy()[j, :, :])

all_bboxes_tmp_mask = all_bbox_squee[all_mask_squee, :]
all_labels_tmp_mask = all_label_squee[all_mask_squee]

if all_bboxes_tmp_mask.shape[0] > max_num:
inds = np.argsort(-all_bboxes_tmp_mask[:, -1])
inds = inds[:max_num]
all_bboxes_tmp_mask = all_bboxes_tmp_mask[inds]
all_labels_tmp_mask = all_labels_tmp_mask[inds]

outputs_tmp = bbox2result_1image(all_bboxes_tmp_mask, all_labels_tmp_mask, config.num_classes)

outputs.append(outputs_tmp)

eval_types = ["bbox"]
result_files = results2json(dataset_coco, outputs, "./results.pkl")

coco_eval(result_files, eval_types, dataset_coco, single_result=True)


if __name__ == '__main__':
prefix = "FasterRcnn_eval.mindrecord"
mindrecord_dir = config.mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix)
if not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco":
if os.path.isdir(config.coco_root):
print("Create Mindrecord.")
data_to_mindrecord_byte_image("coco", False, prefix, file_num=1)
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else:
print("coco_root not exits.")
else:
if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH):
print("Create Mindrecord.")
data_to_mindrecord_byte_image("other", False, prefix, file_num=1)
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else:
print("IMAGE_DIR or ANNO_PATH not exits.")

print("Start Eval!")
FasterRcnn_eval(mindrecord_file, args_opt.checkpoint_path, args_opt.ann_file)

+ 69
- 0
example/fasterrcnn_coco2017/scripts/run_distribute_train.sh View File

@@ -0,0 +1,69 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 2 ]
then
echo "Usage: sh run_train.sh [MINDSPORE_HCCL_CONFIG_PATH] [PRETRAINED_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)

echo $PATH1
echo $PATH2

if [ ! -f $PATH1 ]
then
echo "error: MINDSPORE_HCCL_CONFIG_PATH=$PATH1 is not a file"
exit 1
fi

if [ ! -f $PATH2 ]
then
echo "error: PRETRAINED_PATH=$PATH2 is not a file"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=8
export RANK_SIZE=8
export MINDSPORE_HCCL_CONFIG_PATH=$PATH1
export RANK_TABLE_FILE=$PATH1

for((i=0; i<${DEVICE_NUM}; i++))
do
export DEVICE_ID=$i
export RANK_ID=$i
rm -rf ./train_parallel$i
mkdir ./train_parallel$i
cp ../*.py ./train_parallel$i
cp *.sh ./train_parallel$i
cp -r ../src ./train_parallel$i
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"
env > env.log
python train.py --do_train=True --device_id=$i --rank_id=$i --run_distribute=True --device_num=$DEVICE_NUM \
--pre_trained=$PATH2 &> log &
cd ..
done

+ 65
- 0
example/fasterrcnn_coco2017/scripts/run_eval.sh View File

@@ -0,0 +1,65 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 2 ]
then
echo "Usage: sh run_eval.sh [ANN_FILE] [CHECKPOINT_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
echo $PATH1
echo $PATH2

if [ ! -f $PATH1 ]
then
echo "error: ANN_FILE=$PATH1 is not a file"
exit 1
fi

if [ ! -f $PATH2 ]
then
echo "error: CHECKPOINT_PATH=$PATH2 is not a file"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=1
export RANK_SIZE=$DEVICE_NUM
export DEVICE_ID=0
export RANK_ID=0

if [ -d "eval" ];
then
rm -rf ./eval
fi
mkdir ./eval
cp ../*.py ./eval
cp *.sh ./eval
cp -r ../src ./eval
cd ./eval || exit
env > env.log
echo "start eval for device $DEVICE_ID"
python eval.py --device_id=$DEVICE_ID --ann_file=$PATH1 --checkpoint_path=$PATH2 &> log &
cd ..

+ 57
- 0
example/fasterrcnn_coco2017/scripts/run_standalone_train.sh View File

@@ -0,0 +1,57 @@
#!/bin/bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

if [ $# != 1 ]
then
echo "Usage: sh run_standalone_train.sh [PRETRAINED_PATH]"
exit 1
fi

get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
PATH1=$(get_real_path $1)
echo $PATH1

if [ ! -f $PATH1 ]
then
echo "error: PRETRAINED_PATH=$PATH1 is not a file"
exit 1
fi

ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1

if [ -d "train" ];
then
rm -rf ./train
fi
mkdir ./train
cp ../*.py ./train
cp *.sh ./train
cp -r ../src ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"
env > env.log
python train.py --do_train=True --device_id=$DEVICE_ID --pre_trained=$PATH1 &> log &
cd ..

+ 31
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/__init__.py View File

@@ -0,0 +1,31 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn Init."""

from .resnet50 import ResNetFea, ResidualBlockUsing
from .bbox_assign_sample import BboxAssignSample
from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn
from .fpn_neck import FeatPyramidNeck
from .proposal_generator import Proposal
from .rcnn import Rcnn
from .rpn import RPN
from .roi_align import SingleRoIExtractor
from .anchor_generator import AnchorGenerator

__all__ = [
"ResNetFea", "BboxAssignSample", "BboxAssignSampleForRcnn",
"FeatPyramidNeck", "Proposal", "Rcnn",
"RPN", "SingleRoIExtractor", "AnchorGenerator", "ResidualBlockUsing"
]

+ 84
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/anchor_generator.py View File

@@ -0,0 +1,84 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn anchor generator."""

import numpy as np

class AnchorGenerator():
"""Anchor generator for FasterRcnn."""
def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
"""Anchor generator init method."""
self.base_size = base_size
self.scales = np.array(scales)
self.ratios = np.array(ratios)
self.scale_major = scale_major
self.ctr = ctr
self.base_anchors = self.gen_base_anchors()

def gen_base_anchors(self):
"""Generate a single anchor."""
w = self.base_size
h = self.base_size
if self.ctr is None:
x_ctr = 0.5 * (w - 1)
y_ctr = 0.5 * (h - 1)
else:
x_ctr, y_ctr = self.ctr

h_ratios = np.sqrt(self.ratios)
w_ratios = 1 / h_ratios
if self.scale_major:
ws = (w * w_ratios[:, None] * self.scales[None, :]).reshape(-1)
hs = (h * h_ratios[:, None] * self.scales[None, :]).reshape(-1)
else:
ws = (w * self.scales[:, None] * w_ratios[None, :]).reshape(-1)
hs = (h * self.scales[:, None] * h_ratios[None, :]).reshape(-1)

base_anchors = np.stack(
[
x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
],
axis=-1).round()

return base_anchors

def _meshgrid(self, x, y, row_major=True):
"""Generate grid."""
xx = np.repeat(x.reshape(1, len(x)), len(y), axis=0).reshape(-1)
yy = np.repeat(y, len(x))
if row_major:
return xx, yy

return yy, xx

def grid_anchors(self, featmap_size, stride=16):
"""Generate anchor list."""
base_anchors = self.base_anchors

feat_h, feat_w = featmap_size
shift_x = np.arange(0, feat_w) * stride
shift_y = np.arange(0, feat_h) * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
shifts = shifts.astype(base_anchors.dtype)
# first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4)

all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
all_anchors = all_anchors.reshape(-1, 4)

return all_anchors

+ 164
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/bbox_assign_sample.py View File

@@ -0,0 +1,164 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn positive and negative sample screening for RPN."""

import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
import mindspore.common.dtype as mstype


class BboxAssignSample(nn.Cell):
"""
Bbox assigner and sampler defination.

Args:
config (dict): Config.
batch_size (int): Batchsize.
num_bboxes (int): The anchor nums.
add_gt_as_proposals (bool): add gt bboxes as proposals flag.

Returns:
Tensor, output tensor.
bbox_targets: bbox location, (batch_size, num_bboxes, 4)
bbox_weights: bbox weights, (batch_size, num_bboxes, 1)
labels: label for every bboxes, (batch_size, num_bboxes, 1)
label_weights: label weight for every bboxes, (batch_size, num_bboxes, 1)

Examples:
BboxAssignSample(config, 2, 1024, True)
"""

def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals):
super(BboxAssignSample, self).__init__()
cfg = config
self.batch_size = batch_size

self.neg_iou_thr = Tensor(cfg.neg_iou_thr, mstype.float16)
self.pos_iou_thr = Tensor(cfg.pos_iou_thr, mstype.float16)
self.min_pos_iou = Tensor(cfg.min_pos_iou, mstype.float16)
self.zero_thr = Tensor(0.0, mstype.float16)

self.num_bboxes = num_bboxes
self.num_gts = cfg.num_gts
self.num_expected_pos = cfg.num_expected_pos
self.num_expected_neg = cfg.num_expected_neg
self.add_gt_as_proposals = add_gt_as_proposals

if self.add_gt_as_proposals:
self.label_inds = Tensor(np.arange(1, self.num_gts + 1))

self.concat = P.Concat(axis=0)
self.max_gt = P.ArgMaxWithValue(axis=0)
self.max_anchor = P.ArgMaxWithValue(axis=1)
self.sum_inds = P.ReduceSum()
self.iou = P.IOU()
self.greaterequal = P.GreaterEqual()
self.greater = P.Greater()
self.select = P.Select()
self.gatherND = P.GatherNd()
self.squeeze = P.Squeeze()
self.cast = P.Cast()
self.logicaland = P.LogicalAnd()
self.less = P.Less()
self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos)
self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg)
self.reshape = P.Reshape()
self.equal = P.Equal()
self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0))
self.scatterNdUpdate = P.ScatterNdUpdate()
self.scatterNd = P.ScatterNd()
self.logicalnot = P.LogicalNot()
self.tile = P.Tile()
self.zeros_like = P.ZerosLike()

self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32))
self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32))
self.assigned_gt_ignores = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32))

self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool))
self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16))
self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))


def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids):
gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \
(self.num_gts, 1)), (1, 4)), mstype.bool_), gt_bboxes_i, self.check_gt_one)
bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, mstype.int32), \
(self.num_bboxes, 1)), (1, 4)), mstype.bool_), bboxes, self.check_anchor_two)

overlaps = self.iou(bboxes, gt_bboxes_i)

max_overlaps_w_gt_index, max_overlaps_w_gt = self.max_gt(overlaps)
_, max_overlaps_w_ac = self.max_anchor(overlaps)

neg_sample_iou_mask = self.logicaland(self.greaterequal(max_overlaps_w_gt, self.zero_thr), \
self.less(max_overlaps_w_gt, self.neg_iou_thr))
assigned_gt_inds2 = self.select(neg_sample_iou_mask, self.assigned_gt_zeros, self.assigned_gt_inds)

pos_sample_iou_mask = self.greaterequal(max_overlaps_w_gt, self.pos_iou_thr)
assigned_gt_inds3 = self.select(pos_sample_iou_mask, \
max_overlaps_w_gt_index + self.assigned_gt_ones, assigned_gt_inds2)
assigned_gt_inds4 = assigned_gt_inds3
for j in range(self.num_gts):
max_overlaps_w_ac_j = max_overlaps_w_ac[j:j+1:1]
overlaps_w_gt_j = self.squeeze(overlaps[j:j+1:1, ::])

pos_mask_j = self.logicaland(self.greaterequal(max_overlaps_w_ac_j, self.min_pos_iou), \
self.equal(overlaps_w_gt_j, max_overlaps_w_ac_j))

assigned_gt_inds4 = self.select(pos_mask_j, self.assigned_gt_ones + j, assigned_gt_inds4)

assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds4, self.assigned_gt_ignores)

pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0))

pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16)
pos_check_valid = self.sum_inds(pos_check_valid, -1)
valid_pos_index = self.less(self.range_pos_size, pos_check_valid)
pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1))

pos_assigned_gt_index = self.gatherND(assigned_gt_inds5, pos_index) - self.assigned_pos_ones
pos_assigned_gt_index = pos_assigned_gt_index * self.cast(valid_pos_index, mstype.int32)
pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, (self.num_expected_pos, 1))

neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0))

num_pos = self.cast(self.logicalnot(valid_pos_index), mstype.float16)
num_pos = self.sum_inds(num_pos, -1)
unvalid_pos_index = self.less(self.range_pos_size, num_pos)
valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index)

pos_bboxes_ = self.gatherND(bboxes, pos_index)
pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index)
pos_gt_labels = self.gatherND(gt_labels_i, pos_assigned_gt_index)

pos_bbox_targets_ = self.bounding_box_encode(pos_bboxes_, pos_gt_bboxes_)

valid_pos_index = self.cast(valid_pos_index, mstype.int32)
valid_neg_index = self.cast(valid_neg_index, mstype.int32)
bbox_targets_total = self.scatterNd(pos_index, pos_bbox_targets_, (self.num_bboxes, 4))
bbox_weights_total = self.scatterNd(pos_index, valid_pos_index, (self.num_bboxes,))
labels_total = self.scatterNd(pos_index, pos_gt_labels, (self.num_bboxes,))
total_index = self.concat((pos_index, neg_index))
total_valid_index = self.concat((valid_pos_index, valid_neg_index))
label_weights_total = self.scatterNd(total_index, total_valid_index, (self.num_bboxes,))

return bbox_targets_total, self.cast(bbox_weights_total, mstype.bool_), \
labels_total, self.cast(label_weights_total, mstype.bool_)

+ 195
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/bbox_assign_sample_stage2.py View File

@@ -0,0 +1,195 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn tpositive and negative sample screening for Rcnn."""

import numpy as np
import mindspore.nn as nn
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor


class BboxAssignSampleForRcnn(nn.Cell):
"""
Bbox assigner and sampler defination.

Args:
config (dict): Config.
batch_size (int): Batchsize.
num_bboxes (int): The anchor nums.
add_gt_as_proposals (bool): add gt bboxes as proposals flag.

Returns:
Tensor, output tensor.
bbox_targets: bbox location, (batch_size, num_bboxes, 4)
bbox_weights: bbox weights, (batch_size, num_bboxes, 1)
labels: label for every bboxes, (batch_size, num_bboxes, 1)
label_weights: label weight for every bboxes, (batch_size, num_bboxes, 1)

Examples:
BboxAssignSampleForRcnn(config, 2, 1024, True)
"""

def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals):
super(BboxAssignSampleForRcnn, self).__init__()
cfg = config
self.batch_size = batch_size
self.neg_iou_thr = cfg.neg_iou_thr_stage2
self.pos_iou_thr = cfg.pos_iou_thr_stage2
self.min_pos_iou = cfg.min_pos_iou_stage2
self.num_gts = cfg.num_gts
self.num_bboxes = num_bboxes
self.num_expected_pos = cfg.num_expected_pos_stage2
self.num_expected_neg = cfg.num_expected_neg_stage2
self.num_expected_total = cfg.num_expected_total_stage2

self.add_gt_as_proposals = add_gt_as_proposals
self.label_inds = Tensor(np.arange(1, self.num_gts + 1).astype(np.int32))
self.add_gt_as_proposals_valid = Tensor(np.array(self.add_gt_as_proposals * np.ones(self.num_gts),
dtype=np.int32))

self.concat = P.Concat(axis=0)
self.max_gt = P.ArgMaxWithValue(axis=0)
self.max_anchor = P.ArgMaxWithValue(axis=1)
self.sum_inds = P.ReduceSum()
self.iou = P.IOU()
self.greaterequal = P.GreaterEqual()
self.greater = P.Greater()
self.select = P.Select()
self.gatherND = P.GatherNd()
self.squeeze = P.Squeeze()
self.cast = P.Cast()
self.logicaland = P.LogicalAnd()
self.less = P.Less()
self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos)
self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg)
self.reshape = P.Reshape()
self.equal = P.Equal()
self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(10.0, 10.0, 5.0, 5.0))
self.concat_axis1 = P.Concat(axis=1)
self.logicalnot = P.LogicalNot()
self.tile = P.Tile()

# Check
self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))

# Init tensor
self.assigned_gt_inds = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32))
self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32))
self.assigned_gt_ignores = Tensor(np.array(-1 * np.ones(num_bboxes), dtype=np.int32))
self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32))

self.gt_ignores = Tensor(np.array(-1 * np.ones(self.num_gts), dtype=np.int32))
self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16))
self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool))
self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=np.float16))
self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8))

self.reshape_shape_pos = (self.num_expected_pos, 1)
self.reshape_shape_neg = (self.num_expected_neg, 1)

self.scalar_zero = Tensor(0.0, dtype=mstype.float16)
self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=mstype.float16)
self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=mstype.float16)
self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=mstype.float16)

def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids):
gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \
(self.num_gts, 1)), (1, 4)), mstype.bool_), \
gt_bboxes_i, self.check_gt_one)
bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, mstype.int32), \
(self.num_bboxes, 1)), (1, 4)), mstype.bool_), \
bboxes, self.check_anchor_two)
# 1 dim = gt, 2 dim = bbox
overlaps = self.iou(bboxes, gt_bboxes_i)

max_overlaps_w_gt_index, max_overlaps_w_gt = self.max_gt(overlaps)
_, max_overlaps_w_ac = self.max_anchor(overlaps)

neg_sample_iou_mask = self.logicaland(self.greaterequal(max_overlaps_w_gt,
self.scalar_zero),
self.less(max_overlaps_w_gt,
self.scalar_neg_iou_thr))

assigned_gt_inds2 = self.select(neg_sample_iou_mask, self.assigned_gt_zeros, self.assigned_gt_inds)

pos_sample_iou_mask = self.greaterequal(max_overlaps_w_gt, self.scalar_pos_iou_thr)
assigned_gt_inds3 = self.select(pos_sample_iou_mask, \
max_overlaps_w_gt_index + self.assigned_gt_ones, assigned_gt_inds2)

for j in range(self.num_gts):
max_overlaps_w_ac_j = max_overlaps_w_ac[j:j+1:1]
overlaps_w_ac_j = overlaps[j:j+1:1, ::]
temp1 = self.greaterequal(max_overlaps_w_ac_j, self.scalar_min_pos_iou)
temp2 = self.squeeze(self.equal(overlaps_w_ac_j, max_overlaps_w_ac_j))
pos_mask_j = self.logicaland(temp1, temp2)
assigned_gt_inds3 = self.select(pos_mask_j, (j+1)*self.assigned_gt_ones, assigned_gt_inds3)

assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds3, self.assigned_gt_ignores)

bboxes = self.concat((gt_bboxes_i, bboxes))
label_inds_valid = self.select(gt_valids, self.label_inds, self.gt_ignores)
label_inds_valid = label_inds_valid * self.add_gt_as_proposals_valid
assigned_gt_inds5 = self.concat((label_inds_valid, assigned_gt_inds5))

# Get pos index
pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0))

pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16)
pos_check_valid = self.sum_inds(pos_check_valid, -1)
valid_pos_index = self.less(self.range_pos_size, pos_check_valid)
pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1))

num_pos = self.sum_inds(self.cast(self.logicalnot(valid_pos_index), mstype.float16), -1)
valid_pos_index = self.cast(valid_pos_index, mstype.int32)
pos_index = self.reshape(pos_index, self.reshape_shape_pos)
valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos)
pos_index = pos_index * valid_pos_index

pos_assigned_gt_index = self.gatherND(assigned_gt_inds5, pos_index) - self.assigned_pos_ones
pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, self.reshape_shape_pos)
pos_assigned_gt_index = pos_assigned_gt_index * valid_pos_index

pos_gt_labels = self.gatherND(gt_labels_i, pos_assigned_gt_index)

# Get neg index
neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0))

unvalid_pos_index = self.less(self.range_pos_size, num_pos)
valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index)
neg_index = self.reshape(neg_index, self.reshape_shape_neg)

valid_neg_index = self.cast(valid_neg_index, mstype.int32)
valid_neg_index = self.reshape(valid_neg_index, self.reshape_shape_neg)
neg_index = neg_index * valid_neg_index

pos_bboxes_ = self.gatherND(bboxes, pos_index)

neg_bboxes_ = self.gatherND(bboxes, neg_index)
pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, self.reshape_shape_pos)
pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index)
pos_bbox_targets_ = self.bounding_box_encode(pos_bboxes_, pos_gt_bboxes_)

total_bboxes = self.concat((pos_bboxes_, neg_bboxes_))
total_deltas = self.concat((pos_bbox_targets_, self.bboxs_neg_mask))
total_labels = self.concat((pos_gt_labels, self.labels_neg_mask))

valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos)
valid_neg_index = self.reshape(valid_neg_index, self.reshape_shape_neg)
total_mask = self.concat((valid_pos_index, valid_neg_index))

return total_bboxes, total_deltas, total_labels, total_mask

+ 425
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/faster_rcnn_r50.py View File

@@ -0,0 +1,425 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn based on ResNet50."""

import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
import mindspore.common.dtype as mstype
from mindspore.ops import functional as F
from .resnet50 import ResNetFea, ResidualBlockUsing
from .bbox_assign_sample_stage2 import BboxAssignSampleForRcnn
from .fpn_neck import FeatPyramidNeck
from .proposal_generator import Proposal
from .rcnn import Rcnn
from .rpn import RPN
from .roi_align import SingleRoIExtractor
from .anchor_generator import AnchorGenerator

class Faster_Rcnn_Resnet50(nn.Cell):
"""
FasterRcnn Network.

Note:
backbone = resnet50

Returns:
Tuple, tuple of output tensor.
rpn_loss: Scalar, Total loss of RPN subnet.
rcnn_loss: Scalar, Total loss of RCNN subnet.
rpn_cls_loss: Scalar, Classification loss of RPN subnet.
rpn_reg_loss: Scalar, Regression loss of RPN subnet.
rcnn_cls_loss: Scalar, Classification loss of RCNN subnet.
rcnn_reg_loss: Scalar, Regression loss of RCNN subnet.

Examples:
net = Faster_Rcnn_Resnet50()
"""
def __init__(self, config):
super(Faster_Rcnn_Resnet50, self).__init__()
self.train_batch_size = config.batch_size
self.num_classes = config.num_classes
self.anchor_scales = config.anchor_scales
self.anchor_ratios = config.anchor_ratios
self.anchor_strides = config.anchor_strides
self.target_means = tuple(config.rcnn_target_means)
self.target_stds = tuple(config.rcnn_target_stds)

# Anchor generator
anchor_base_sizes = None
self.anchor_base_sizes = list(
self.anchor_strides) if anchor_base_sizes is None else anchor_base_sizes

self.anchor_generators = []
for anchor_base in self.anchor_base_sizes:
self.anchor_generators.append(
AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios))

self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales)

featmap_sizes = config.feature_shapes
assert len(featmap_sizes) == len(self.anchor_generators)

self.anchor_list = self.get_anchors(featmap_sizes)

# Backbone resnet50
self.backbone = ResNetFea(ResidualBlockUsing,
config.resnet_block,
config.resnet_in_channels,
config.resnet_out_channels,
False)

# Fpn
self.fpn_ncek = FeatPyramidNeck(config.fpn_in_channels,
config.fpn_out_channels,
config.fpn_num_outs)

# Rpn and rpn loss
self.gt_labels_stage1 = Tensor(np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8))
self.rpn_with_loss = RPN(config,
self.train_batch_size,
config.rpn_in_channels,
config.rpn_feat_channels,
config.num_anchors,
config.rpn_cls_out_channels)

# Proposal
self.proposal_generator = Proposal(config,
self.train_batch_size,
config.activate_num_classes,
config.use_sigmoid_cls)
self.proposal_generator.set_train_local(config, True)
self.proposal_generator_test = Proposal(config,
config.test_batch_size,
config.activate_num_classes,
config.use_sigmoid_cls)
self.proposal_generator_test.set_train_local(config, False)

# Assign and sampler stage two
self.bbox_assigner_sampler_for_rcnn = BboxAssignSampleForRcnn(config, self.train_batch_size,
config.num_bboxes_stage2, True)
self.decode = P.BoundingBoxDecode(max_shape=(768, 1280), means=self.target_means, \
stds=self.target_stds)

# Roi
self.roi_align = SingleRoIExtractor(config,
config.roi_layer,
config.roi_align_out_channels,
config.roi_align_featmap_strides,
self.train_batch_size,
config.roi_align_finest_scale)
self.roi_align.set_train_local(config, True)
self.roi_align_test = SingleRoIExtractor(config,
config.roi_layer,
config.roi_align_out_channels,
config.roi_align_featmap_strides,
1,
config.roi_align_finest_scale)
self.roi_align_test.set_train_local(config, False)

# Rcnn
self.rcnn = Rcnn(config, config.rcnn_in_channels * config.roi_layer['out_size'] * config.roi_layer['out_size'],
self.train_batch_size, self.num_classes)

# Op declare
self.squeeze = P.Squeeze()
self.cast = P.Cast()

self.concat = P.Concat(axis=0)
self.concat_1 = P.Concat(axis=1)
self.concat_2 = P.Concat(axis=2)
self.reshape = P.Reshape()
self.select = P.Select()
self.greater = P.Greater()
self.transpose = P.Transpose()

# Test mode
self.test_batch_size = config.test_batch_size
self.split = P.Split(axis=0, output_num=self.test_batch_size)
self.split_shape = P.Split(axis=0, output_num=4)
self.split_scores = P.Split(axis=1, output_num=self.num_classes)
self.split_cls = P.Split(axis=0, output_num=self.num_classes-1)
self.tile = P.Tile()
self.gather = P.GatherNd()

self.rpn_max_num = config.rpn_max_num

self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(np.float16))
self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool)
self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool)
self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask,
self.ones_mask, self.zeros_mask), axis=1))
self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask,
self.ones_mask, self.ones_mask, self.zeros_mask), axis=1))

self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_score_thr)
self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * 0)
self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(np.float16) * -1)
self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(np.float16) * config.test_iou_thr)
self.test_max_per_img = config.test_max_per_img
self.nms_test = P.NMSWithMask(config.test_iou_thr)
self.softmax = P.Softmax(axis=1)
self.logicand = P.LogicalAnd()
self.oneslike = P.OnesLike()
self.test_topk = P.TopK(sorted=True)
self.test_num_proposal = self.test_batch_size * self.rpn_max_num

# Improve speed
self.concat_start = min(self.num_classes - 2, 55)
self.concat_end = (self.num_classes - 1)

# Init tensor
roi_align_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i,
dtype=np.float16) for i in range(self.train_batch_size)]

roi_align_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=np.float16) \
for i in range(self.test_batch_size)]

self.roi_align_index_tensor = Tensor(np.concatenate(roi_align_index))
self.roi_align_index_test_tensor = Tensor(np.concatenate(roi_align_index_test))

def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids):
x = self.backbone(img_data)
x = self.fpn_ncek(x)

rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(x,
img_metas,
self.anchor_list,
gt_bboxes,
self.gt_labels_stage1,
gt_valids)

if self.training:
proposal, proposal_mask = self.proposal_generator(cls_score, bbox_pred, self.anchor_list)
else:
proposal, proposal_mask = self.proposal_generator_test(cls_score, bbox_pred, self.anchor_list)

gt_labels = self.cast(gt_labels, mstype.int32)
gt_valids = self.cast(gt_valids, mstype.int32)
bboxes_tuple = ()
deltas_tuple = ()
labels_tuple = ()
mask_tuple = ()
if self.training:
for i in range(self.train_batch_size):
gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::])

gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::])
gt_labels_i = self.cast(gt_labels_i, mstype.uint8)

gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::])
gt_valids_i = self.cast(gt_valids_i, mstype.bool_)

bboxes, deltas, labels, mask = self.bbox_assigner_sampler_for_rcnn(gt_bboxes_i,
gt_labels_i,
proposal_mask[i],
proposal[i][::, 0:4:1],
gt_valids_i)
bboxes_tuple += (bboxes,)
deltas_tuple += (deltas,)
labels_tuple += (labels,)
mask_tuple += (mask,)

bbox_targets = self.concat(deltas_tuple)
rcnn_labels = self.concat(labels_tuple)
bbox_targets = F.stop_gradient(bbox_targets)
rcnn_labels = F.stop_gradient(rcnn_labels)
rcnn_labels = self.cast(rcnn_labels, mstype.int32)
else:
mask_tuple += proposal_mask
bbox_targets = proposal_mask
rcnn_labels = proposal_mask
for p_i in proposal:
bboxes_tuple += (p_i[::, 0:4:1],)

if self.training:
if self.train_batch_size > 1:
bboxes_all = self.concat(bboxes_tuple)
else:
bboxes_all = bboxes_tuple[0]
rois = self.concat_1((self.roi_align_index_tensor, bboxes_all))
else:
if self.test_batch_size > 1:
bboxes_all = self.concat(bboxes_tuple)
else:
bboxes_all = bboxes_tuple[0]
rois = self.concat_1((self.roi_align_index_test_tensor, bboxes_all))


rois = self.cast(rois, mstype.float32)
rois = F.stop_gradient(rois)

if self.training:
roi_feats = self.roi_align(rois,
self.cast(x[0], mstype.float32),
self.cast(x[1], mstype.float32),
self.cast(x[2], mstype.float32),
self.cast(x[3], mstype.float32))
else:
roi_feats = self.roi_align_test(rois,
self.cast(x[0], mstype.float32),
self.cast(x[1], mstype.float32),
self.cast(x[2], mstype.float32),
self.cast(x[3], mstype.float32))


roi_feats = self.cast(roi_feats, mstype.float16)
rcnn_masks = self.concat(mask_tuple)
rcnn_masks = F.stop_gradient(rcnn_masks)
rcnn_mask_squeeze = self.squeeze(self.cast(rcnn_masks, mstype.bool_))
rcnn_loss, rcnn_cls_loss, rcnn_reg_loss, _ = self.rcnn(roi_feats,
bbox_targets,
rcnn_labels,
rcnn_mask_squeeze)

output = ()
if self.training:
output += (rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss, rcnn_cls_loss, rcnn_reg_loss)
else:
output = self.get_det_bboxes(rcnn_cls_loss, rcnn_reg_loss, rcnn_masks, bboxes_all, img_metas)

return output

def get_det_bboxes(self, cls_logits, reg_logits, mask_logits, rois, img_metas):
"""Get the actual detection box."""
scores = self.softmax(cls_logits)

boxes_all = ()
for i in range(self.num_classes):
k = i * 4
reg_logits_i = self.squeeze(reg_logits[::, k:k+4:1])
out_boxes_i = self.decode(rois, reg_logits_i)
boxes_all += (out_boxes_i,)

img_metas_all = self.split(img_metas)
scores_all = self.split(scores)
mask_all = self.split(self.cast(mask_logits, mstype.int32))

boxes_all_with_batchsize = ()
for i in range(self.test_batch_size):
scale = self.split_shape(self.squeeze(img_metas_all[i]))
scale_h = scale[2]
scale_w = scale[3]
boxes_tuple = ()
for j in range(self.num_classes):
boxes_tmp = self.split(boxes_all[j])
out_boxes_h = boxes_tmp[i] / scale_h
out_boxes_w = boxes_tmp[i] / scale_w
boxes_tuple += (self.select(self.bbox_mask, out_boxes_w, out_boxes_h),)
boxes_all_with_batchsize += (boxes_tuple,)

output = self.multiclass_nms(boxes_all_with_batchsize, scores_all, mask_all)

return output

def multiclass_nms(self, boxes_all, scores_all, mask_all):
"""Multiscale postprocessing."""
all_bboxes = ()
all_labels = ()
all_masks = ()

for i in range(self.test_batch_size):
bboxes = boxes_all[i]
scores = scores_all[i]
masks = self.cast(mask_all[i], mstype.bool_)

res_boxes_tuple = ()
res_labels_tuple = ()
res_masks_tuple = ()

for j in range(self.num_classes - 1):
k = j + 1
_cls_scores = scores[::, k:k + 1:1]
_bboxes = self.squeeze(bboxes[k])
_mask_o = self.reshape(masks, (self.rpn_max_num, 1))

cls_mask = self.greater(_cls_scores, self.test_score_thresh)
_mask = self.logicand(_mask_o, cls_mask)

_reg_mask = self.cast(self.tile(self.cast(_mask, mstype.int32), (1, 4)), mstype.bool_)

_bboxes = self.select(_reg_mask, _bboxes, self.test_box_zeros)
_cls_scores = self.select(_mask, _cls_scores, self.test_score_zeros)
__cls_scores = self.squeeze(_cls_scores)
scores_sorted, topk_inds = self.test_topk(__cls_scores, self.rpn_max_num)
topk_inds = self.reshape(topk_inds, (self.rpn_max_num, 1))
scores_sorted = self.reshape(scores_sorted, (self.rpn_max_num, 1))
_bboxes_sorted = self.gather(_bboxes, topk_inds)
_mask_sorted = self.gather(_mask, topk_inds)

scores_sorted = self.tile(scores_sorted, (1, 4))
cls_dets = self.concat_1((_bboxes_sorted, scores_sorted))
cls_dets = P.Slice()(cls_dets, (0, 0), (self.rpn_max_num, 5))

cls_dets, _index, _mask_nms = self.nms_test(cls_dets)
_index = self.reshape(_index, (self.rpn_max_num, 1))
_mask_nms = self.reshape(_mask_nms, (self.rpn_max_num, 1))

_mask_n = self.gather(_mask_sorted, _index)

_mask_n = self.logicand(_mask_n, _mask_nms)
cls_labels = self.oneslike(_index) * j
res_boxes_tuple += (cls_dets,)
res_labels_tuple += (cls_labels,)
res_masks_tuple += (_mask_n,)

res_boxes_start = self.concat(res_boxes_tuple[:self.concat_start])
res_labels_start = self.concat(res_labels_tuple[:self.concat_start])
res_masks_start = self.concat(res_masks_tuple[:self.concat_start])

res_boxes_end = self.concat(res_boxes_tuple[self.concat_start:self.concat_end])
res_labels_end = self.concat(res_labels_tuple[self.concat_start:self.concat_end])
res_masks_end = self.concat(res_masks_tuple[self.concat_start:self.concat_end])

res_boxes = self.concat((res_boxes_start, res_boxes_end))
res_labels = self.concat((res_labels_start, res_labels_end))
res_masks = self.concat((res_masks_start, res_masks_end))

reshape_size = (self.num_classes - 1) * self.rpn_max_num
res_boxes = self.reshape(res_boxes, (1, reshape_size, 5))
res_labels = self.reshape(res_labels, (1, reshape_size, 1))
res_masks = self.reshape(res_masks, (1, reshape_size, 1))

all_bboxes += (res_boxes,)
all_labels += (res_labels,)
all_masks += (res_masks,)

all_bboxes = self.concat(all_bboxes)
all_labels = self.concat(all_labels)
all_masks = self.concat(all_masks)
return all_bboxes, all_labels, all_masks

def get_anchors(self, featmap_sizes):
"""Get anchors according to feature map sizes.

Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.

Returns:
tuple: anchors of each image, valid flags of each image
"""
num_levels = len(featmap_sizes)

# since feature map sizes of all images are the same, we only compute
# anchors for one time
multi_level_anchors = ()
for i in range(num_levels):
anchors = self.anchor_generators[i].grid_anchors(
featmap_sizes[i], self.anchor_strides[i])
multi_level_anchors += (Tensor(anchors.astype(np.float16)),)

return multi_level_anchors

+ 112
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/fpn_neck.py View File

@@ -0,0 +1,112 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn feature pyramid network."""

import numpy as np
import mindspore.nn as nn
from mindspore import context
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
from mindspore.common import dtype as mstype
from mindspore.common.initializer import initializer

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)

def bias_init_zeros(shape):
"""Bias init method."""
return Tensor(np.array(np.zeros(shape).astype(np.float32)).astype(np.float16))

def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'):
"""Conv2D wrapper."""
shape = (out_channels, in_channels, kernel_size, kernel_size)
weights = initializer("XavierUniform", shape=shape, dtype=mstype.float16).to_tensor()
shape_bias = (out_channels,)
biass = bias_init_zeros(shape_bias)
return nn.Conv2d(in_channels, out_channels,
kernel_size=kernel_size, stride=stride, padding=padding,
pad_mode=pad_mode, weight_init=weights, has_bias=True, bias_init=biass)

class FeatPyramidNeck(nn.Cell):
"""
Feature pyramid network cell, usually uses as network neck.

Applies the convolution on multiple, input feature maps
and output feature map with same channel size. if required num of
output larger then num of inputs, add extra maxpooling for further
downsampling;

Args:
in_channels (tuple) - Channel size of input feature maps.
out_channels (int) - Channel size output.
num_outs (int) - Num of output features.

Returns:
Tuple, with tensors of same channel size.

Examples:
neck = FeatPyramidNeck([100,200,300], 50, 4)
input_data = (normal(0,0.1,(1,c,1280//(4*2**i), 768//(4*2**i)),
dtype=np.float32) \
for i, c in enumerate(config.fpn_in_channels))
x = neck(input_data)
"""

def __init__(self,
in_channels,
out_channels,
num_outs):
super(FeatPyramidNeck, self).__init__()
self.num_outs = num_outs
self.in_channels = in_channels
self.fpn_layer = len(self.in_channels)

assert not self.num_outs < len(in_channels)

self.lateral_convs_list_ = []
self.fpn_convs_ = []

for _, channel in enumerate(in_channels):
l_conv = _conv(channel, out_channels, kernel_size=1, stride=1, padding=0, pad_mode='valid')
fpn_conv = _conv(out_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='same')
self.lateral_convs_list_.append(l_conv)
self.fpn_convs_.append(fpn_conv)
self.lateral_convs_list = nn.layer.CellList(self.lateral_convs_list_)
self.fpn_convs_list = nn.layer.CellList(self.fpn_convs_)
self.interpolate1 = P.ResizeNearestNeighbor((48, 80))
self.interpolate2 = P.ResizeNearestNeighbor((96, 160))
self.interpolate3 = P.ResizeNearestNeighbor((192, 320))
self.maxpool = P.MaxPool(ksize=1, strides=2, padding="same")

def construct(self, inputs):
x = ()
for i in range(self.fpn_layer):
x += (self.lateral_convs_list[i](inputs[i]),)

y = (x[3],)
y = y + (x[2] + self.interpolate1(y[self.fpn_layer - 4]),)
y = y + (x[1] + self.interpolate2(y[self.fpn_layer - 3]),)
y = y + (x[0] + self.interpolate3(y[self.fpn_layer - 2]),)

z = ()
for i in range(self.fpn_layer - 1, -1, -1):
z = z + (y[i],)

outs = ()
for i in range(self.fpn_layer):
outs = outs + (self.fpn_convs_list[i](z[i]),)

for i in range(self.num_outs - self.fpn_layer):
outs = outs + (self.maxpool(outs[3]),)
return outs

+ 199
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/proposal_generator.py View File

@@ -0,0 +1,199 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn proposal generator."""

import numpy as np
import mindspore.nn as nn
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore import Tensor
from mindspore import context


context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)


class Proposal(nn.Cell):
"""
Proposal subnet.

Args:
config (dict): Config.
batch_size (int): Batchsize.
num_classes (int) - Class number.
use_sigmoid_cls (bool) - Select sigmoid or softmax function.
target_means (tuple) - Means for encode function. Default: (.0, .0, .0, .0).
target_stds (tuple) - Stds for encode function. Default: (1.0, 1.0, 1.0, 1.0).

Returns:
Tuple, tuple of output tensor,(proposal, mask).

Examples:
Proposal(config = config, batch_size = 1, num_classes = 81, use_sigmoid_cls = True, \
target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0))
"""
def __init__(self,
config,
batch_size,
num_classes,
use_sigmoid_cls,
target_means=(.0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0)
):
super(Proposal, self).__init__()
cfg = config
self.batch_size = batch_size
self.num_classes = num_classes
self.target_means = target_means
self.target_stds = target_stds
self.use_sigmoid_cls = use_sigmoid_cls

if self.use_sigmoid_cls:
self.cls_out_channels = num_classes - 1
self.activation = P.Sigmoid()
self.reshape_shape = (-1, 1)
else:
self.cls_out_channels = num_classes
self.activation = P.Softmax(axis=1)
self.reshape_shape = (-1, 2)

if self.cls_out_channels <= 0:
raise ValueError('num_classes={} is too small'.format(num_classes))

self.num_pre = cfg.rpn_proposal_nms_pre
self.min_box_size = cfg.rpn_proposal_min_bbox_size
self.nms_thr = cfg.rpn_proposal_nms_thr
self.nms_post = cfg.rpn_proposal_nms_post
self.nms_across_levels = cfg.rpn_proposal_nms_across_levels
self.max_num = cfg.rpn_proposal_max_num
self.num_levels = cfg.fpn_num_outs

# Op Define
self.squeeze = P.Squeeze()
self.reshape = P.Reshape()
self.cast = P.Cast()

self.feature_shapes = cfg.feature_shapes

self.transpose_shape = (1, 2, 0)

self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \
means=self.target_means, \
stds=self.target_stds)

self.nms = P.NMSWithMask(self.nms_thr)
self.concat_axis0 = P.Concat(axis=0)
self.concat_axis1 = P.Concat(axis=1)
self.split = P.Split(axis=1, output_num=5)
self.min = P.Minimum()
self.gatherND = P.GatherNd()
self.slice = P.Slice()
self.select = P.Select()
self.greater = P.Greater()
self.transpose = P.Transpose()
self.tile = P.Tile()
self.set_train_local(config, training=True)

self.multi_10 = Tensor(10.0, mstype.float16)

def set_train_local(self, config, training=True):
"""Set training flag."""
self.training_local = training

cfg = config
self.topK_stage1 = ()
self.topK_shape = ()
total_max_topk_input = 0
if not self.training_local:
self.num_pre = cfg.rpn_nms_pre
self.min_box_size = cfg.rpn_min_bbox_min_size
self.nms_thr = cfg.rpn_nms_thr
self.nms_post = cfg.rpn_nms_post
self.nms_across_levels = cfg.rpn_nms_across_levels
self.max_num = cfg.rpn_max_num

for shp in self.feature_shapes:
k_num = min(self.num_pre, (shp[0] * shp[1] * 3))
total_max_topk_input += k_num
self.topK_stage1 += (k_num,)
self.topK_shape += ((k_num, 1),)

self.topKv2 = P.TopK(sorted=True)
self.topK_shape_stage2 = (self.max_num, 1)
self.min_float_num = -65536.0
self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, np.float16))

def construct(self, rpn_cls_score_total, rpn_bbox_pred_total, anchor_list):
proposals_tuple = ()
masks_tuple = ()
for img_id in range(self.batch_size):
cls_score_list = ()
bbox_pred_list = ()
for i in range(self.num_levels):
rpn_cls_score_i = self.squeeze(rpn_cls_score_total[i][img_id:img_id+1:1, ::, ::, ::])
rpn_bbox_pred_i = self.squeeze(rpn_bbox_pred_total[i][img_id:img_id+1:1, ::, ::, ::])

cls_score_list = cls_score_list + (rpn_cls_score_i,)
bbox_pred_list = bbox_pred_list + (rpn_bbox_pred_i,)

proposals, masks = self.get_bboxes_single(cls_score_list, bbox_pred_list, anchor_list)
proposals_tuple += (proposals,)
masks_tuple += (masks,)
return proposals_tuple, masks_tuple

def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors):
"""Get proposal boundingbox."""
mlvl_proposals = ()
mlvl_mask = ()
for idx in range(self.num_levels):
rpn_cls_score = self.transpose(cls_scores[idx], self.transpose_shape)
rpn_bbox_pred = self.transpose(bbox_preds[idx], self.transpose_shape)
anchors = mlvl_anchors[idx]

rpn_cls_score = self.reshape(rpn_cls_score, self.reshape_shape)
rpn_cls_score = self.activation(rpn_cls_score)
rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 0::]), mstype.float16)

rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), mstype.float16)

scores_sorted, topk_inds = self.topKv2(rpn_cls_score_process, self.topK_stage1[idx])

topk_inds = self.reshape(topk_inds, self.topK_shape[idx])

bboxes_sorted = self.gatherND(rpn_bbox_pred_process, topk_inds)
anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), mstype.float16)

proposals_decode = self.decode(anchors_sorted, bboxes_sorted)

proposals_decode = self.concat_axis1((proposals_decode, self.reshape(scores_sorted, self.topK_shape[idx])))
proposals, _, mask_valid = self.nms(proposals_decode)

mlvl_proposals = mlvl_proposals + (proposals,)
mlvl_mask = mlvl_mask + (mask_valid,)

proposals = self.concat_axis0(mlvl_proposals)
masks = self.concat_axis0(mlvl_mask)

_, _, _, _, scores = self.split(proposals)
scores = self.squeeze(scores)
topk_mask = self.cast(self.topK_mask, mstype.float16)
scores_using = self.select(masks, scores, topk_mask)

_, topk_inds = self.topKv2(scores_using, self.max_num)

topk_inds = self.reshape(topk_inds, self.topK_shape_stage2)
proposals = self.gatherND(proposals, topk_inds)
masks = self.gatherND(masks, topk_inds)
return proposals, masks

+ 171
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/rcnn.py View File

@@ -0,0 +1,171 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn Rcnn network."""

import numpy as np
import mindspore.common.dtype as mstype
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter


class DenseNoTranpose(nn.Cell):
"""Dense method"""
def __init__(self, input_channels, output_channels, weight_init):
super(DenseNoTranpose, self).__init__()

self.weight = Parameter(initializer(weight_init, [input_channels, output_channels], mstype.float16),
name="weight")
self.bias = Parameter(initializer("zeros", [output_channels], mstype.float16).to_tensor(), name="bias")

self.matmul = P.MatMul(transpose_b=False)
self.bias_add = P.BiasAdd()

def construct(self, x):
output = self.bias_add(self.matmul(x, self.weight), self.bias)
return output


class Rcnn(nn.Cell):
"""
Rcnn subnet.

Args:
config (dict) - Config.
representation_size (int) - Channels of shared dense.
batch_size (int) - Batchsize.
num_classes (int) - Class number.
target_means (list) - Means for encode function. Default: (.0, .0, .0, .0]).
target_stds (list) - Stds for encode function. Default: (0.1, 0.1, 0.2, 0.2).

Returns:
Tuple, tuple of output tensor.

Examples:
Rcnn(config=config, representation_size = 1024, batch_size=2, num_classes = 81, \
target_means=(0., 0., 0., 0.), target_stds=(0.1, 0.1, 0.2, 0.2))
"""
def __init__(self,
config,
representation_size,
batch_size,
num_classes,
target_means=(0., 0., 0., 0.),
target_stds=(0.1, 0.1, 0.2, 0.2)
):
super(Rcnn, self).__init__()
cfg = config
self.rcnn_loss_cls_weight = Tensor(np.array(cfg.rcnn_loss_cls_weight).astype(np.float16))
self.rcnn_loss_reg_weight = Tensor(np.array(cfg.rcnn_loss_reg_weight).astype(np.float16))
self.rcnn_fc_out_channels = cfg.rcnn_fc_out_channels
self.target_means = target_means
self.target_stds = target_stds
self.num_classes = num_classes
self.in_channels = cfg.rcnn_in_channels
self.train_batch_size = batch_size
self.test_batch_size = cfg.test_batch_size

shape_0 = (self.rcnn_fc_out_channels, representation_size)
weights_0 = initializer("XavierUniform", shape=shape_0[::-1], dtype=mstype.float16).to_tensor()
shape_1 = (self.rcnn_fc_out_channels, self.rcnn_fc_out_channels)
weights_1 = initializer("XavierUniform", shape=shape_1[::-1], dtype=mstype.float16).to_tensor()
self.shared_fc_0 = DenseNoTranpose(representation_size, self.rcnn_fc_out_channels, weights_0)
self.shared_fc_1 = DenseNoTranpose(self.rcnn_fc_out_channels, self.rcnn_fc_out_channels, weights_1)

cls_weight = initializer('Normal', shape=[num_classes, self.rcnn_fc_out_channels][::-1],
dtype=mstype.float16).to_tensor()
reg_weight = initializer('Normal', shape=[num_classes * 4, self.rcnn_fc_out_channels][::-1],
dtype=mstype.float16).to_tensor()
self.cls_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes, cls_weight)
self.reg_scores = DenseNoTranpose(self.rcnn_fc_out_channels, num_classes * 4, reg_weight)

self.flatten = P.Flatten()
self.relu = P.ReLU()
self.logicaland = P.LogicalAnd()
self.loss_cls = P.SoftmaxCrossEntropyWithLogits()
self.loss_bbox = P.SmoothL1Loss(sigma=1.0)
self.reshape = P.Reshape()
self.onehot = P.OneHot()
self.greater = P.Greater()
self.cast = P.Cast()
self.sum_loss = P.ReduceSum()
self.tile = P.Tile()
self.expandims = P.ExpandDims()

self.gather = P.GatherNd()
self.argmax = P.ArgMaxWithValue(axis=1)

self.on_value = Tensor(1.0, mstype.float32)
self.off_value = Tensor(0.0, mstype.float32)
self.value = Tensor(1.0, mstype.float16)

self.num_bboxes = (cfg.num_expected_pos_stage2 + cfg.num_expected_neg_stage2) * batch_size

rmv_first = np.ones((self.num_bboxes, self.num_classes))
rmv_first[:, 0] = np.zeros((self.num_bboxes,))
self.rmv_first_tensor = Tensor(rmv_first.astype(np.float16))

self.num_bboxes_test = cfg.rpn_max_num * cfg.test_batch_size

range_max = np.arange(self.num_bboxes_test).astype(np.int32)
self.range_max = Tensor(range_max)

def construct(self, featuremap, bbox_targets, labels, mask):
x = self.flatten(featuremap)

x = self.relu(self.shared_fc_0(x))
x = self.relu(self.shared_fc_1(x))

x_cls = self.cls_scores(x)
x_reg = self.reg_scores(x)

if self.training:
bbox_weights = self.cast(self.logicaland(self.greater(labels, 0), mask), mstype.int32) * labels
labels = self.cast(self.onehot(labels, self.num_classes, self.on_value, self.off_value), mstype.float16)
bbox_targets = self.tile(self.expandims(bbox_targets, 1), (1, self.num_classes, 1))

loss, loss_cls, loss_reg, loss_print = self.loss(x_cls, x_reg, bbox_targets, bbox_weights, labels, mask)
out = (loss, loss_cls, loss_reg, loss_print)
else:
out = (x_cls, (x_cls / self.value), x_reg, x_cls)

return out

def loss(self, cls_score, bbox_pred, bbox_targets, bbox_weights, labels, weights):
"""Loss method."""
loss_print = ()
loss_cls, _ = self.loss_cls(cls_score, labels)

weights = self.cast(weights, mstype.float16)
loss_cls = loss_cls * weights
loss_cls = self.sum_loss(loss_cls, (0,)) / self.sum_loss(weights, (0,))

bbox_weights = self.cast(self.onehot(bbox_weights, self.num_classes, self.on_value, self.off_value),
mstype.float16)
bbox_weights = bbox_weights * self.rmv_first_tensor

pos_bbox_pred = self.reshape(bbox_pred, (self.num_bboxes, -1, 4))
loss_reg = self.loss_bbox(pos_bbox_pred, bbox_targets)
loss_reg = self.sum_loss(loss_reg, (2,))
loss_reg = loss_reg * bbox_weights
loss_reg = loss_reg / self.sum_loss(weights, (0,))
loss_reg = self.sum_loss(loss_reg, (0, 1))

loss = self.rcnn_loss_cls_weight * loss_cls + self.rcnn_loss_reg_weight * loss_reg
loss_print += (loss_cls, loss_reg)

return loss, loss_cls, loss_reg, loss_print

+ 248
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/resnet50.py View File

@@ -0,0 +1,248 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Resnet50 backbone."""
import numpy as np
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.common.tensor import Tensor
from mindspore.ops import functional as F
from mindspore import context
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True)
def weight_init_ones(shape):
"""Weight init."""
return Tensor(np.array(np.ones(shape).astype(np.float32) * 0.01).astype(np.float16))
def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'):
"""Conv2D wrapper."""
shape = (out_channels, in_channels, kernel_size, kernel_size)
weights = weight_init_ones(shape)
return nn.Conv2d(in_channels, out_channels,
kernel_size=kernel_size, stride=stride, padding=padding,
pad_mode=pad_mode, weight_init=weights, has_bias=False)
def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True):
"""Batchnorm2D wrapper."""
gamma_init = Tensor(np.array(np.ones(out_chls)).astype(np.float16))
beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(np.float16))
moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(np.float16))
moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(np.float16))
return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init,
beta_init=beta_init, moving_mean_init=moving_mean_init,
moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics)
class ResNetFea(nn.Cell):
"""
ResNet architecture.
Args:
block (Cell): Block for network.
layer_nums (list): Numbers of block in different layers.
in_channels (list): Input channel in each layer.
out_channels (list): Output channel in each layer.
weights_update (bool): Weight update flag.
Returns:
Tensor, output tensor.
Examples:
>>> ResNet(ResidualBlock,
>>> [3, 4, 6, 3],
>>> [64, 256, 512, 1024],
>>> [256, 512, 1024, 2048],
>>> False)
"""
def __init__(self,
block,
layer_nums,
in_channels,
out_channels,
weights_update=False):
super(ResNetFea, self).__init__()
if not len(layer_nums) == len(in_channels) == len(out_channels) == 4:
raise ValueError("the length of "
"layer_num, inchannel, outchannel list must be 4!")
bn_training = False
self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad')
self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training)
self.relu = P.ReLU()
self.maxpool = P.MaxPool(ksize=3, strides=2, padding="SAME")
self.weights_update = weights_update
if not self.weights_update:
self.conv1.weight.requires_grad = False
self.layer1 = self._make_layer(block,
layer_nums[0],
in_channel=in_channels[0],
out_channel=out_channels[0],
stride=1,
training=bn_training,
weights_update=self.weights_update)
self.layer2 = self._make_layer(block,
layer_nums[1],
in_channel=in_channels[1],
out_channel=out_channels[1],
stride=2,
training=bn_training,
weights_update=True)
self.layer3 = self._make_layer(block,
layer_nums[2],
in_channel=in_channels[2],
out_channel=out_channels[2],
stride=2,
training=bn_training,
weights_update=True)
self.layer4 = self._make_layer(block,
layer_nums[3],
in_channel=in_channels[3],
out_channel=out_channels[3],
stride=2,
training=bn_training,
weights_update=True)
def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False):
"""Make block layer."""
layers = []
down_sample = False
if stride != 1 or in_channel != out_channel:
down_sample = True
resblk = block(in_channel,
out_channel,
stride=stride,
down_sample=down_sample,
training=training,
weights_update=weights_update)
layers.append(resblk)
for _ in range(1, layer_num):
resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update)
layers.append(resblk)
return nn.SequentialCell(layers)
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
c1 = self.maxpool(x)
c2 = self.layer1(c1)
identity = c2
if not self.weights_update:
identity = F.stop_gradient(c2)
c3 = self.layer2(identity)
c4 = self.layer3(c3)
c5 = self.layer4(c4)
return identity, c3, c4, c5
class ResidualBlockUsing(nn.Cell):
"""
ResNet V1 residual block definition.
Args:
in_channels (int) - Input channel.
out_channels (int) - Output channel.
stride (int) - Stride size for the initial convolutional layer. Default: 1.
down_sample (bool) - If to do the downsample in block. Default: False.
momentum (float) - Momentum for batchnorm layer. Default: 0.1.
training (bool) - Training flag. Default: False.
weights_updata (bool) - Weights update flag. Default: False.
Returns:
Tensor, output tensor.
Examples:
ResidualBlock(3,256,stride=2,down_sample=True)
"""
expansion = 4
def __init__(self,
in_channels,
out_channels,
stride=1,
down_sample=False,
momentum=0.1,
training=False,
weights_update=False):
super(ResidualBlockUsing, self).__init__()
self.affine = weights_update
out_chls = out_channels // self.expansion
self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0)
self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1)
self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training)
self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0)
self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training)
if training:
self.bn1 = self.bn1.set_train()
self.bn2 = self.bn2.set_train()
self.bn3 = self.bn3.set_train()
if not weights_update:
self.conv1.weight.requires_grad = False
self.conv2.weight.requires_grad = False
self.conv3.weight.requires_grad = False
self.relu = P.ReLU()
self.downsample = down_sample
if self.downsample:
self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0)
self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine,
use_batch_statistics=training)
if training:
self.bn_down_sample = self.bn_down_sample.set_train()
if not weights_update:
self.conv_down_sample.weight.requires_grad = False
self.add = P.TensorAdd()
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample:
identity = self.conv_down_sample(identity)
identity = self.bn_down_sample(identity)
out = self.add(out, identity)
out = self.relu(out)
return out

+ 178
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/roi_align.py View File

@@ -0,0 +1,178 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn ROIAlign module."""

import numpy as np
import mindspore.nn as nn
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore.ops import composite as C
from mindspore.nn import layer as L
from mindspore.common.tensor import Tensor

class ROIAlign(nn.Cell):
"""
Extract RoI features from mulitple feature map.

Args:
out_size_h (int) - RoI height.
out_size_w (int) - RoI width.
spatial_scale (int) - RoI spatial scale.
sample_num (int) - RoI sample number.
"""
def __init__(self,
out_size_h,
out_size_w,
spatial_scale,
sample_num=0):
super(ROIAlign, self).__init__()

self.out_size = (out_size_h, out_size_w)
self.spatial_scale = float(spatial_scale)
self.sample_num = int(sample_num)
self.align_op = P.ROIAlign(self.out_size[0], self.out_size[1],
self.spatial_scale, self.sample_num)

def construct(self, features, rois):
return self.align_op(features, rois)

def __repr__(self):
format_str = self.__class__.__name__
format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
self.out_size, self.spatial_scale, self.sample_num)
return format_str


class SingleRoIExtractor(nn.Cell):
"""
Extract RoI features from a single level feature map.

If there are mulitple input feature levels, each RoI is mapped to a level
according to its scale.

Args:
config (dict): Config
roi_layer (dict): Specify RoI layer type and arguments.
out_channels (int): Output channels of RoI layers.
featmap_strides (int): Strides of input feature maps.
batch_size (int): Batchsize.
finest_scale (int): Scale threshold of mapping to level 0.
"""

def __init__(self,
config,
roi_layer,
out_channels,
featmap_strides,
batch_size=1,
finest_scale=56):
super(SingleRoIExtractor, self).__init__()
cfg = config
self.train_batch_size = batch_size
self.out_channels = out_channels
self.featmap_strides = featmap_strides
self.num_levels = len(self.featmap_strides)
self.out_size = roi_layer['out_size']
self.sample_num = roi_layer['sample_num']
self.roi_layers = self.build_roi_layers(self.featmap_strides)
self.roi_layers = L.CellList(self.roi_layers)

self.sqrt = P.Sqrt()
self.log = P.Log()
self.finest_scale_ = finest_scale
self.clamp = C.clip_by_value

self.cast = P.Cast()
self.equal = P.Equal()
self.select = P.Select()

_mode_16 = False
self.dtype = np.float16 if _mode_16 else np.float32
self.ms_dtype = mstype.float16 if _mode_16 else mstype.float32
self.set_train_local(cfg, training=True)

def set_train_local(self, config, training=True):
"""Set training flag."""
self.training_local = training

cfg = config
# Init tensor
self.batch_size = cfg.roi_sample_num if self.training_local else cfg.rpn_max_num
self.batch_size = self.train_batch_size*self.batch_size \
if self.training_local else cfg.test_batch_size*self.batch_size
self.ones = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype))
finest_scale = np.array(np.ones((self.batch_size, 1)), dtype=self.dtype) * self.finest_scale_
self.finest_scale = Tensor(finest_scale)
self.epslion = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype)*self.dtype(1e-6))
self.zeros = Tensor(np.array(np.zeros((self.batch_size, 1)), dtype=np.int32))
self.max_levels = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=np.int32)*(self.num_levels-1))
self.twos = Tensor(np.array(np.ones((self.batch_size, 1)), dtype=self.dtype) * 2)
self.res_ = Tensor(np.array(np.zeros((self.batch_size, self.out_channels,
self.out_size, self.out_size)), dtype=self.dtype))
def num_inputs(self):
return len(self.featmap_strides)

def init_weights(self):
pass

def log2(self, value):
return self.log(value) / self.log(self.twos)

def build_roi_layers(self, featmap_strides):
roi_layers = []
for s in featmap_strides:
layer_cls = ROIAlign(self.out_size, self.out_size,
spatial_scale=1 / s,
sample_num=self.sample_num)
roi_layers.append(layer_cls)
return roi_layers

def _c_map_roi_levels(self, rois):
"""Map rois to corresponding feature levels by scales.

- scale < finest_scale * 2: level 0
- finest_scale * 2 <= scale < finest_scale * 4: level 1
- finest_scale * 4 <= scale < finest_scale * 8: level 2
- scale >= finest_scale * 8: level 3

Args:
rois (Tensor): Input RoIs, shape (k, 5).
num_levels (int): Total level number.

Returns:
Tensor: Level index (0-based) of each RoI, shape (k, )
"""
scale = self.sqrt(rois[::, 3:4:1] - rois[::, 1:2:1] + self.ones) * \
self.sqrt(rois[::, 4:5:1] - rois[::, 2:3:1] + self.ones)

target_lvls = self.log2(scale / self.finest_scale + self.epslion)
target_lvls = P.Floor()(target_lvls)
target_lvls = self.cast(target_lvls, mstype.int32)
target_lvls = self.clamp(target_lvls, self.zeros, self.max_levels)

return target_lvls

def construct(self, rois, feat1, feat2, feat3, feat4):
feats = (feat1, feat2, feat3, feat4)
res = self.res_
target_lvls = self._c_map_roi_levels(rois)
for i in range(self.num_levels):
mask = self.equal(target_lvls, P.ScalarToArray()(i))
mask = P.Reshape()(mask, (-1, 1, 1, 1))
roi_feats_t = self.roi_layers[i](feats[i], rois)
mask = self.cast(P.Tile()(self.cast(mask, mstype.int32), (1, 256, 7, 7)), mstype.bool_)
res = self.select(mask, roi_feats_t, res)

return res

+ 311
- 0
example/fasterrcnn_coco2017/src/FasterRcnn/rpn.py View File

@@ -0,0 +1,311 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""RPN for fasterRCNN"""
import numpy as np
import mindspore.nn as nn
import mindspore.common.dtype as mstype
from mindspore.ops import operations as P
from mindspore import Tensor
from mindspore.ops import functional as F
from mindspore.common.initializer import initializer
from .bbox_assign_sample import BboxAssignSample


class RpnRegClsBlock(nn.Cell):
"""
Rpn reg cls block for rpn layer

Args:
in_channels (int) - Input channels of shared convolution.
feat_channels (int) - Output channels of shared convolution.
num_anchors (int) - The anchor number.
cls_out_channels (int) - Output channels of classification convolution.
weight_conv (Tensor) - weight init for rpn conv.
bias_conv (Tensor) - bias init for rpn conv.
weight_cls (Tensor) - weight init for rpn cls conv.
bias_cls (Tensor) - bias init for rpn cls conv.
weight_reg (Tensor) - weight init for rpn reg conv.
bias_reg (Tensor) - bias init for rpn reg conv.

Returns:
Tensor, output tensor.
"""
def __init__(self,
in_channels,
feat_channels,
num_anchors,
cls_out_channels,
weight_conv,
bias_conv,
weight_cls,
bias_cls,
weight_reg,
bias_reg):
super(RpnRegClsBlock, self).__init__()
self.rpn_conv = nn.Conv2d(in_channels, feat_channels, kernel_size=3, stride=1, pad_mode='same',
has_bias=True, weight_init=weight_conv, bias_init=bias_conv)
self.relu = nn.ReLU()

self.rpn_cls = nn.Conv2d(feat_channels, num_anchors * cls_out_channels, kernel_size=1, pad_mode='valid',
has_bias=True, weight_init=weight_cls, bias_init=bias_cls)
self.rpn_reg = nn.Conv2d(feat_channels, num_anchors * 4, kernel_size=1, pad_mode='valid',
has_bias=True, weight_init=weight_reg, bias_init=bias_reg)

def construct(self, x):
x = self.relu(self.rpn_conv(x))

x1 = self.rpn_cls(x)
x2 = self.rpn_reg(x)

return x1, x2


class RPN(nn.Cell):
"""
ROI proposal network..

Args:
config (dict) - Config.
batch_size (int) - Batchsize.
in_channels (int) - Input channels of shared convolution.
feat_channels (int) - Output channels of shared convolution.
num_anchors (int) - The anchor number.
cls_out_channels (int) - Output channels of classification convolution.

Returns:
Tuple, tuple of output tensor.

Examples:
RPN(config=config, batch_size=2, in_channels=256, feat_channels=1024,
num_anchors=3, cls_out_channels=512)
"""
def __init__(self,
config,
batch_size,
in_channels,
feat_channels,
num_anchors,
cls_out_channels):
super(RPN, self).__init__()
cfg_rpn = config
self.num_bboxes = cfg_rpn.num_bboxes
self.slice_index = ()
self.feature_anchor_shape = ()
self.slice_index += (0,)
index = 0
for shape in cfg_rpn.feature_shapes:
self.slice_index += (self.slice_index[index] + shape[0] * shape[1] * num_anchors,)
self.feature_anchor_shape += (shape[0] * shape[1] * num_anchors * batch_size,)
index += 1

self.num_anchors = num_anchors
self.batch_size = batch_size
self.test_batch_size = cfg_rpn.test_batch_size
self.num_layers = 5
self.real_ratio = Tensor(np.ones((1, 1)).astype(np.float16))

self.rpn_convs_list = nn.layer.CellList(self._make_rpn_layer(self.num_layers, in_channels, feat_channels,
num_anchors, cls_out_channels))

self.transpose = P.Transpose()
self.reshape = P.Reshape()
self.concat = P.Concat(axis=0)
self.fill = P.Fill()
self.placeh1 = Tensor(np.ones((1,)).astype(np.float16))

self.trans_shape = (0, 2, 3, 1)

self.reshape_shape_reg = (-1, 4)
self.reshape_shape_cls = (-1,)
self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(np.float16))
self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(np.float16))
self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(np.float16))
self.num_bboxes = cfg_rpn.num_bboxes
self.get_targets = BboxAssignSample(cfg_rpn, self.batch_size, self.num_bboxes, False)
self.CheckValid = P.CheckValid()
self.sum_loss = P.ReduceSum()
self.loss_cls = P.SigmoidCrossEntropyWithLogits()
self.loss_bbox = P.SmoothL1Loss(sigma=1.0/9.0)
self.squeeze = P.Squeeze()
self.cast = P.Cast()
self.tile = P.Tile()
self.zeros_like = P.ZerosLike()
self.loss = Tensor(np.zeros((1,)).astype(np.float16))
self.clsloss = Tensor(np.zeros((1,)).astype(np.float16))
self.regloss = Tensor(np.zeros((1,)).astype(np.float16))

def _make_rpn_layer(self, num_layers, in_channels, feat_channels, num_anchors, cls_out_channels):
"""
make rpn layer for rpn proposal network

Args:
num_layers (int) - layer num.
in_channels (int) - Input channels of shared convolution.
feat_channels (int) - Output channels of shared convolution.
num_anchors (int) - The anchor number.
cls_out_channels (int) - Output channels of classification convolution.

Returns:
List, list of RpnRegClsBlock cells.
"""
rpn_layer = []

shp_weight_conv = (feat_channels, in_channels, 3, 3)
shp_bias_conv = (feat_channels,)
weight_conv = initializer('Normal', shape=shp_weight_conv, dtype=mstype.float16).to_tensor()
bias_conv = initializer(0, shape=shp_bias_conv, dtype=mstype.float16).to_tensor()

shp_weight_cls = (num_anchors * cls_out_channels, feat_channels, 1, 1)
shp_bias_cls = (num_anchors * cls_out_channels,)
weight_cls = initializer('Normal', shape=shp_weight_cls, dtype=mstype.float16).to_tensor()
bias_cls = initializer(0, shape=shp_bias_cls, dtype=mstype.float16).to_tensor()

shp_weight_reg = (num_anchors * 4, feat_channels, 1, 1)
shp_bias_reg = (num_anchors * 4,)
weight_reg = initializer('Normal', shape=shp_weight_reg, dtype=mstype.float16).to_tensor()
bias_reg = initializer(0, shape=shp_bias_reg, dtype=mstype.float16).to_tensor()

for i in range(num_layers):
rpn_layer.append(RpnRegClsBlock(in_channels, feat_channels, num_anchors, cls_out_channels, \
weight_conv, bias_conv, weight_cls, \
bias_cls, weight_reg, bias_reg))

for i in range(1, num_layers):
rpn_layer[i].rpn_conv.weight = rpn_layer[0].rpn_conv.weight
rpn_layer[i].rpn_cls.weight = rpn_layer[0].rpn_cls.weight
rpn_layer[i].rpn_reg.weight = rpn_layer[0].rpn_reg.weight

rpn_layer[i].rpn_conv.bias = rpn_layer[0].rpn_conv.bias
rpn_layer[i].rpn_cls.bias = rpn_layer[0].rpn_cls.bias
rpn_layer[i].rpn_reg.bias = rpn_layer[0].rpn_reg.bias

return rpn_layer

def construct(self, inputs, img_metas, anchor_list, gt_bboxes, gt_labels, gt_valids):
loss_print = ()
rpn_cls_score = ()
rpn_bbox_pred = ()
rpn_cls_score_total = ()
rpn_bbox_pred_total = ()

for i in range(self.num_layers):
x1, x2 = self.rpn_convs_list[i](inputs[i])

rpn_cls_score_total = rpn_cls_score_total + (x1,)
rpn_bbox_pred_total = rpn_bbox_pred_total + (x2,)

x1 = self.transpose(x1, self.trans_shape)
x1 = self.reshape(x1, self.reshape_shape_cls)

x2 = self.transpose(x2, self.trans_shape)
x2 = self.reshape(x2, self.reshape_shape_reg)

rpn_cls_score = rpn_cls_score + (x1,)
rpn_bbox_pred = rpn_bbox_pred + (x2,)

loss = self.loss
clsloss = self.clsloss
regloss = self.regloss
bbox_targets = ()
bbox_weights = ()
labels = ()
label_weights = ()

output = ()
if self.training:
for i in range(self.batch_size):
multi_level_flags = ()
anchor_list_tuple = ()

for j in range(self.num_layers):
res = self.cast(self.CheckValid(anchor_list[j], self.squeeze(img_metas[i:i + 1:1, ::])),
mstype.int32)
multi_level_flags = multi_level_flags + (res,)
anchor_list_tuple = anchor_list_tuple + (anchor_list[j],)

valid_flag_list = self.concat(multi_level_flags)
anchor_using_list = self.concat(anchor_list_tuple)

gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::])
gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::])
gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::])

bbox_target, bbox_weight, label, label_weight = self.get_targets(gt_bboxes_i,
gt_labels_i,
self.cast(valid_flag_list,
mstype.bool_),
anchor_using_list, gt_valids_i)

bbox_weight = self.cast(bbox_weight, mstype.float16)
label = self.cast(label, mstype.float16)
label_weight = self.cast(label_weight, mstype.float16)

for j in range(self.num_layers):
begin = self.slice_index[j]
end = self.slice_index[j + 1]
stride = 1
bbox_targets += (bbox_target[begin:end:stride, ::],)
bbox_weights += (bbox_weight[begin:end:stride],)
labels += (label[begin:end:stride],)
label_weights += (label_weight[begin:end:stride],)

for i in range(self.num_layers):
bbox_target_using = ()
bbox_weight_using = ()
label_using = ()
label_weight_using = ()

for j in range(self.batch_size):
bbox_target_using += (bbox_targets[i + (self.num_layers * j)],)
bbox_weight_using += (bbox_weights[i + (self.num_layers * j)],)
label_using += (labels[i + (self.num_layers * j)],)
label_weight_using += (label_weights[i + (self.num_layers * j)],)

bbox_target_with_batchsize = self.concat(bbox_target_using)
bbox_weight_with_batchsize = self.concat(bbox_weight_using)
label_with_batchsize = self.concat(label_using)
label_weight_with_batchsize = self.concat(label_weight_using)

# stop
bbox_target_ = F.stop_gradient(bbox_target_with_batchsize)
bbox_weight_ = F.stop_gradient(bbox_weight_with_batchsize)
label_ = F.stop_gradient(label_with_batchsize)
label_weight_ = F.stop_gradient(label_weight_with_batchsize)

cls_score_i = rpn_cls_score[i]
reg_score_i = rpn_bbox_pred[i]

loss_cls = self.loss_cls(cls_score_i, label_)
loss_cls_item = loss_cls * label_weight_
loss_cls_item = self.sum_loss(loss_cls_item, (0,)) / self.num_expected_total

loss_reg = self.loss_bbox(reg_score_i, bbox_target_)
bbox_weight_ = self.tile(self.reshape(bbox_weight_, (self.feature_anchor_shape[i], 1)), (1, 4))
loss_reg = loss_reg * bbox_weight_
loss_reg_item = self.sum_loss(loss_reg, (1,))
loss_reg_item = self.sum_loss(loss_reg_item, (0,)) / self.num_expected_total

loss_total = self.rpn_loss_cls_weight * loss_cls_item + self.rpn_loss_reg_weight * loss_reg_item

loss += loss_total
loss_print += (loss_total, loss_cls_item, loss_reg_item)
clsloss += loss_cls_item
regloss += loss_reg_item

output = (loss, rpn_cls_score_total, rpn_bbox_pred_total, clsloss, regloss, loss_print)
else:
output = (self.placeh1, rpn_cls_score_total, rpn_bbox_pred_total, self.placeh1, self.placeh1, self.placeh1)

return output

+ 158
- 0
example/fasterrcnn_coco2017/src/config.py View File

@@ -0,0 +1,158 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#" :===========================================================================
"""
network config setting, will be used in train.py and eval.py
"""
from easydict import EasyDict as ed

config = ed({
"img_width": 1280,
"img_height": 768,
"keep_ratio": False,
"flip_ratio": 0.5,
"photo_ratio": 0.5,
"expand_ratio": 1.0,

# anchor
"feature_shapes": [(192, 320), (96, 160), (48, 80), (24, 40), (12, 20)],
"anchor_scales": [8],
"anchor_ratios": [0.5, 1.0, 2.0],
"anchor_strides": [4, 8, 16, 32, 64],
"num_anchors": 3,

# resnet
"resnet_block": [3, 4, 6, 3],
"resnet_in_channels": [64, 256, 512, 1024],
"resnet_out_channels": [256, 512, 1024, 2048],

# fpn
"fpn_in_channels": [256, 512, 1024, 2048],
"fpn_out_channels": 256,
"fpn_num_outs": 5,

# rpn
"rpn_in_channels": 256,
"rpn_feat_channels": 256,
"rpn_loss_cls_weight": 1.0,
"rpn_loss_reg_weight": 1.0,
"rpn_cls_out_channels": 1,
"rpn_target_means": [0., 0., 0., 0.],
"rpn_target_stds": [1.0, 1.0, 1.0, 1.0],

# bbox_assign_sampler
"neg_iou_thr": 0.3,
"pos_iou_thr": 0.7,
"min_pos_iou": 0.3,
"num_bboxes": 245520,
"num_gts": 128,
"num_expected_neg": 256,
"num_expected_pos": 128,

# proposal
"activate_num_classes": 2,
"use_sigmoid_cls": True,

# roi_align
"roi_layer": dict(type='RoIAlign', out_size=7, sample_num=2),
"roi_align_out_channels": 256,
"roi_align_featmap_strides": [4, 8, 16, 32],
"roi_align_finest_scale": 56,
"roi_sample_num": 640,

# bbox_assign_sampler_stage2
"neg_iou_thr_stage2": 0.5,
"pos_iou_thr_stage2": 0.5,
"min_pos_iou_stage2": 0.5,
"num_bboxes_stage2": 2000,
"num_expected_pos_stage2": 128,
"num_expected_neg_stage2": 512,
"num_expected_total_stage2": 512,

# rcnn
"rcnn_num_layers": 2,
"rcnn_in_channels": 256,
"rcnn_fc_out_channels": 1024,
"rcnn_loss_cls_weight": 1,
"rcnn_loss_reg_weight": 1,
"rcnn_target_means": [0., 0., 0., 0.],
"rcnn_target_stds": [0.1, 0.1, 0.2, 0.2],

# train proposal
"rpn_proposal_nms_across_levels": False,
"rpn_proposal_nms_pre": 2000,
"rpn_proposal_nms_post": 2000,
"rpn_proposal_max_num": 2000,
"rpn_proposal_nms_thr": 0.7,
"rpn_proposal_min_bbox_size": 0,

# test proposal
"rpn_nms_across_levels": False,
"rpn_nms_pre": 1000,
"rpn_nms_post": 1000,
"rpn_max_num": 1000,
"rpn_nms_thr": 0.7,
"rpn_min_bbox_min_size": 0,
"test_score_thr": 0.05,
"test_iou_thr": 0.5,
"test_max_per_img": 100,
"test_batch_size": 2,

"rpn_head_loss_type": "CrossEntropyLoss",
"rpn_head_use_sigmoid": True,
"rpn_head_weight": 1.0,

# LR
"base_lr": 0.02,
"base_step": 58633,
"total_epoch": 13,
"warmup_step": 500,
"warmup_mode": "linear",
"warmup_ratio": 1/3.0,
"sgd_step": [8, 11],
"sgd_momentum": 0.9,

# train
"batch_size": 2,
"loss_scale": 1,
"momentum": 0.91,
"weight_decay": 1e-4,
"epoch_size": 12,
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 10,
"save_checkpoint_path": "./checkpoint",

"mindrecord_dir": "../MindRecord_COCO_TRAIN",
"coco_root": "./cocodataset/",
"train_data_type": "train2017",
"val_data_type": "val2017",
"instance_set": "annotations/instances_{}.json",
"coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'),
"num_classes": 81
})

+ 441
- 0
example/fasterrcnn_coco2017/src/dataset.py View File

@@ -0,0 +1,441 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""FasterRcnn dataset"""
from __future__ import division

import os
import numpy as np
from numpy import random

import mmcv
import mindspore.dataset as de
import mindspore.dataset.transforms.vision.c_transforms as C
from mindspore.mindrecord import FileWriter
from src.config import config


def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
"""Calculate the ious between each bbox of bboxes1 and bboxes2.

Args:
bboxes1(ndarray): shape (n, 4)
bboxes2(ndarray): shape (k, 4)
mode(str): iou (intersection over union) or iof (intersection
over foreground)

Returns:
ious(ndarray): shape (n, k)
"""

assert mode in ['iou', 'iof']

bboxes1 = bboxes1.astype(np.float32)
bboxes2 = bboxes2.astype(np.float32)
rows = bboxes1.shape[0]
cols = bboxes2.shape[0]
ious = np.zeros((rows, cols), dtype=np.float32)
if rows * cols == 0:
return ious
exchange = False
if bboxes1.shape[0] > bboxes2.shape[0]:
bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1)
area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1)
for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
y_end - y_start + 1, 0)
if mode == 'iou':
union = area1[i] + area2 - overlap
else:
union = area1[i] if not exchange else area2
ious[i, :] = overlap / union
if exchange:
ious = ious.T
return ious

class PhotoMetricDistortion:
"""Photo Metric Distortion"""
def __init__(self,
brightness_delta=32,
contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5),
hue_delta=18):
self.brightness_delta = brightness_delta
self.contrast_lower, self.contrast_upper = contrast_range
self.saturation_lower, self.saturation_upper = saturation_range
self.hue_delta = hue_delta

def __call__(self, img, boxes, labels):
# random brightness
img = img.astype('float32')

if random.randint(2):
delta = random.uniform(-self.brightness_delta,
self.brightness_delta)
img += delta

# mode == 0 --> do random contrast first
# mode == 1 --> do random contrast last
mode = random.randint(2)
if mode == 1:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha

# convert color from BGR to HSV
img = mmcv.bgr2hsv(img)

# random saturation
if random.randint(2):
img[..., 1] *= random.uniform(self.saturation_lower,
self.saturation_upper)

# random hue
if random.randint(2):
img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta)
img[..., 0][img[..., 0] > 360] -= 360
img[..., 0][img[..., 0] < 0] += 360

# convert color from HSV to BGR
img = mmcv.hsv2bgr(img)

# random contrast
if mode == 0:
if random.randint(2):
alpha = random.uniform(self.contrast_lower,
self.contrast_upper)
img *= alpha

# randomly swap channels
if random.randint(2):
img = img[..., random.permutation(3)]

return img, boxes, labels

class Expand:
"""expand image"""
def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)):
if to_rgb:
self.mean = mean[::-1]
else:
self.mean = mean
self.min_ratio, self.max_ratio = ratio_range

def __call__(self, img, boxes, labels):
if random.randint(2):
return img, boxes, labels

h, w, c = img.shape
ratio = random.uniform(self.min_ratio, self.max_ratio)
expand_img = np.full((int(h * ratio), int(w * ratio), c),
self.mean).astype(img.dtype)
left = int(random.uniform(0, w * ratio - w))
top = int(random.uniform(0, h * ratio - h))
expand_img[top:top + h, left:left + w] = img
img = expand_img
boxes += np.tile((left, top), 2)
return img, boxes, labels

def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""rescale operation for image"""
img_data, scale_factor = mmcv.imrescale(img, (config.img_width, config.img_height), return_scale=True)
if img_data.shape[0] > config.img_height:
img_data, scale_factor2 = mmcv.imrescale(img_data, (config.img_height, config.img_width), return_scale=True)
scale_factor = scale_factor*scale_factor2
img_shape = np.append(img_shape, scale_factor)
img_shape = np.asarray(img_shape, dtype=np.float32)
gt_bboxes = gt_bboxes * scale_factor

gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)

return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image"""
img_data = img
img_data, w_scale, h_scale = mmcv.imresize(
img_data, (config.img_width, config.img_height), return_scale=True)
scale_factor = np.array(
[w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
img_shape = (config.img_height, config.img_width, 1.0)
img_shape = np.asarray(img_shape, dtype=np.float32)

gt_bboxes = gt_bboxes * scale_factor

gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)

return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num):
"""resize operation for image of eval"""
img_data = img
img_data, w_scale, h_scale = mmcv.imresize(
img_data, (config.img_width, config.img_height), return_scale=True)
scale_factor = np.array(
[w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
img_shape = np.append(img_shape, (h_scale, w_scale))
img_shape = np.asarray(img_shape, dtype=np.float32)

gt_bboxes = gt_bboxes * scale_factor

gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1)
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1)

return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""impad operation for image"""
img_data = mmcv.impad(img, (config.img_height, config.img_width))
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""imnormalize operation for image"""
img_data = mmcv.imnormalize(img, [123.675, 116.28, 103.53], [58.395, 57.12, 57.375], True)
img_data = img_data.astype(np.float32)
return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""flip operation for image"""
img_data = img
img_data = mmcv.imflip(img_data)
flipped = gt_bboxes.copy()
_, w, _ = img_data.shape

flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1
flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1

return (img_data, img_shape, flipped, gt_label, gt_num)

def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""transpose operation for image"""
img_data = img.transpose(2, 0, 1).copy()
img_data = img_data.astype(np.float16)
img_shape = img_shape.astype(np.float16)
gt_bboxes = gt_bboxes.astype(np.float16)
gt_label = gt_label.astype(np.int32)
gt_num = gt_num.astype(np.bool)

return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""photo crop operation for image"""
random_photo = PhotoMetricDistortion()
img_data, gt_bboxes, gt_label = random_photo(img, gt_bboxes, gt_label)

return (img_data, img_shape, gt_bboxes, gt_label, gt_num)

def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num):
"""expand operation for image"""
expand = Expand()
img, gt_bboxes, gt_label = expand(img, gt_bboxes, gt_label)

return (img, img_shape, gt_bboxes, gt_label, gt_num)

def preprocess_fn(image, box, is_training):
"""Preprocess function for dataset."""
def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert):
image_shape = image_shape[:2]
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert

if config.keep_ratio:
input_data = rescale_column(*input_data)
else:
input_data = resize_column_test(*input_data)
input_data = imnormalize_column(*input_data)

output_data = transpose_column(*input_data)
return output_data

def _data_aug(image, box, is_training):
"""Data augmentation function."""
image_bgr = image.copy()
image_bgr[:, :, 0] = image[:, :, 2]
image_bgr[:, :, 1] = image[:, :, 1]
image_bgr[:, :, 2] = image[:, :, 0]
image_shape = image_bgr.shape[:2]
gt_box = box[:, :4]
gt_label = box[:, 4]
gt_iscrowd = box[:, 5]

pad_max_number = 128
gt_box_new = np.pad(gt_box, ((0, pad_max_number - box.shape[0]), (0, 0)), mode="constant", constant_values=0)
gt_label_new = np.pad(gt_label, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=-1)
gt_iscrowd_new = np.pad(gt_iscrowd, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=1)
gt_iscrowd_new_revert = (~(gt_iscrowd_new.astype(np.bool))).astype(np.int32)

if not is_training:
return _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert)

flip = (np.random.rand() < config.flip_ratio)
photo = (np.random.rand() < config.photo_ratio)
expand = (np.random.rand() < config.expand_ratio)
input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert

if expand:
input_data = expand_column(*input_data)
if config.keep_ratio:
input_data = rescale_column(*input_data)
else:
input_data = resize_column(*input_data)
if photo:
input_data = photo_crop_column(*input_data)
input_data = imnormalize_column(*input_data)
if flip:
input_data = flip_column(*input_data)

output_data = transpose_column(*input_data)
return output_data

return _data_aug(image, box, is_training)

def create_coco_label(is_training):
"""Get image path and annotation from COCO."""
from pycocotools.coco import COCO

coco_root = config.coco_root
data_type = config.val_data_type
if is_training:
data_type = config.train_data_type

#Classes need to train or test.
train_cls = config.coco_classes
train_cls_dict = {}
for i, cls in enumerate(train_cls):
train_cls_dict[cls] = i

anno_json = os.path.join(coco_root, config.instance_set.format(data_type))

coco = COCO(anno_json)
classs_dict = {}
cat_ids = coco.loadCats(coco.getCatIds())
for cat in cat_ids:
classs_dict[cat["id"]] = cat["name"]

image_ids = coco.getImgIds()
image_files = []
image_anno_dict = {}

for img_id in image_ids:
image_info = coco.loadImgs(img_id)
file_name = image_info[0]["file_name"]
anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None)
anno = coco.loadAnns(anno_ids)
image_path = os.path.join(coco_root, data_type, file_name)
annos = []
for label in anno:
bbox = label["bbox"]
class_name = classs_dict[label["category_id"]]
if class_name in train_cls:
x1, x2 = bbox[0], bbox[0] + bbox[2]
y1, y2 = bbox[1], bbox[1] + bbox[3]
annos.append([x1, y1, x2, y2] + [train_cls_dict[class_name]] + [int(label["iscrowd"])])

image_files.append(image_path)
if annos:
image_anno_dict[image_path] = np.array(annos)
else:
image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1])

return image_files, image_anno_dict

def anno_parser(annos_str):
"""Parse annotation from string to list."""
annos = []
for anno_str in annos_str:
anno = list(map(int, anno_str.strip().split(',')))
annos.append(anno)
return annos

def filter_valid_data(image_dir, anno_path):
"""Filter valid image file, which both in image_dir and anno_path."""
image_files = []
image_anno_dict = {}
if not os.path.isdir(image_dir):
raise RuntimeError("Path given is not valid.")
if not os.path.isfile(anno_path):
raise RuntimeError("Annotation file is not valid.")

with open(anno_path, "rb") as f:
lines = f.readlines()
for line in lines:
line_str = line.decode("utf-8").strip()
line_split = str(line_str).split(' ')
file_name = line_split[0]
image_path = os.path.join(image_dir, file_name)
if os.path.isfile(image_path):
image_anno_dict[image_path] = anno_parser(line_split[1:])
image_files.append(image_path)
return image_files, image_anno_dict

def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="fasterrcnn.mindrecord", file_num=8):
"""Create MindRecord file."""
mindrecord_dir = config.mindrecord_dir
mindrecord_path = os.path.join(mindrecord_dir, prefix)
writer = FileWriter(mindrecord_path, file_num)
if dataset == "coco":
image_files, image_anno_dict = create_coco_label(is_training)
else:
image_files, image_anno_dict = filter_valid_data(config.IMAGE_DIR, config.ANNO_PATH)

fasterrcnn_json = {
"image": {"type": "bytes"},
"annotation": {"type": "int32", "shape": [-1, 6]},
}
writer.add_schema(fasterrcnn_json, "fasterrcnn_json")

for image_name in image_files:
with open(image_name, 'rb') as f:
img = f.read()
annos = np.array(image_anno_dict[image_name], dtype=np.int32)
row = {"image": img, "annotation": annos}
writer.write_raw_data([row])
writer.commit()

def create_fasterrcnn_dataset(mindrecord_file, batch_size=2, repeat_num=12, device_num=1, rank_id=0,
is_training=True, num_parallel_workers=8):
"""Creatr FasterRcnn dataset with MindDataset."""
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode)
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))

if is_training:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
columns_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, python_multiprocessing=True, num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
else:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
columns_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
return ds

+ 42
- 0
example/fasterrcnn_coco2017/src/lr_schedule.py View File

@@ -0,0 +1,42 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""lr generator for fasterrcnn"""
import math

def linear_warmup_learning_rate(current_step, warmup_steps, base_lr, init_lr):
lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps)
learning_rate = float(init_lr) + lr_inc * current_step
return learning_rate

def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps):
base = float(current_step - warmup_steps) / float(decay_steps)
learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr
return learning_rate

def dynamic_lr(config, rank_size=1):
"""dynamic learning rate generator"""
base_lr = config.base_lr

base_step = (config.base_step // rank_size) + rank_size
total_steps = int(base_step * config.total_epoch)
warmup_steps = int(config.warmup_step)
lr = []
for i in range(total_steps):
if i < warmup_steps:
lr.append(linear_warmup_learning_rate(i, warmup_steps, base_lr, base_lr * config.warmup_ratio))
else:
lr.append(a_cosine_learning_rate(i, base_lr, warmup_steps, total_steps))

return lr

+ 182
- 0
example/fasterrcnn_coco2017/src/network_define.py View File

@@ -0,0 +1,182 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""FasterRcnn training network wrapper."""

import time
import numpy as np
import mindspore.nn as nn
from mindspore.common.tensor import Tensor
from mindspore.ops import functional as F
from mindspore.ops import composite as C
from mindspore import ParameterTuple
from mindspore.train.callback import Callback
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer

time_stamp_init = False
time_stamp_first = 0
class LossCallBack(Callback):
"""
Monitor the loss in training.

If the loss is NAN or INF terminating training.

Note:
If per_print_times is 0 do not print loss.

Args:
per_print_times (int): Print loss every times. Default: 1.
"""

def __init__(self, per_print_times=1):
super(LossCallBack, self).__init__()
if not isinstance(per_print_times, int) or per_print_times < 0:
raise ValueError("print_step must be int and >= 0.")
self._per_print_times = per_print_times
self.count = 0
self.rpn_loss_sum = 0
self.rcnn_loss_sum = 0
self.rpn_cls_loss_sum = 0
self.rpn_reg_loss_sum = 0
self.rcnn_cls_loss_sum = 0
self.rcnn_reg_loss_sum = 0

global time_stamp_init, time_stamp_first
if not time_stamp_init:
time_stamp_first = time.time()
time_stamp_init = True

def step_end(self, run_context):
cb_params = run_context.original_args()
rpn_loss = cb_params.net_outputs[0].asnumpy()
rcnn_loss = cb_params.net_outputs[1].asnumpy()
rpn_cls_loss = cb_params.net_outputs[2].asnumpy()

rpn_reg_loss = cb_params.net_outputs[3].asnumpy()
rcnn_cls_loss = cb_params.net_outputs[4].asnumpy()
rcnn_reg_loss = cb_params.net_outputs[5].asnumpy()

self.count += 1
self.rpn_loss_sum += float(rpn_loss)
self.rcnn_loss_sum += float(rcnn_loss)
self.rpn_cls_loss_sum += float(rpn_cls_loss)
self.rpn_reg_loss_sum += float(rpn_reg_loss)
self.rcnn_cls_loss_sum += float(rcnn_cls_loss)
self.rcnn_reg_loss_sum += float(rcnn_reg_loss)

cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1

if self.count >= 1:
global time_stamp_first
time_stamp_current = time.time()

rpn_loss = self.rpn_loss_sum/self.count
rcnn_loss = self.rcnn_loss_sum/self.count
rpn_cls_loss = self.rpn_cls_loss_sum/self.count

rpn_reg_loss = self.rpn_reg_loss_sum/self.count
rcnn_cls_loss = self.rcnn_cls_loss_sum/self.count
rcnn_reg_loss = self.rcnn_reg_loss_sum/self.count

total_loss = rpn_loss + rcnn_loss

loss_file = open("./loss.log", "a+")
loss_file.write("%lu epoch: %s step: %s ,rpn_loss: %.5f, rcnn_loss: %.5f, rpn_cls_loss: %.5f, "
"rpn_reg_loss: %.5f, rcnn_cls_loss: %.5f, rcnn_reg_loss: %.5f, total_loss: %.5f" %
(time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch,
rpn_loss, rcnn_loss, rpn_cls_loss, rpn_reg_loss,
rcnn_cls_loss, rcnn_reg_loss, total_loss))
loss_file.write("\n")
loss_file.close()

self.count = 0
self.rpn_loss_sum = 0
self.rcnn_loss_sum = 0
self.rpn_cls_loss_sum = 0
self.rpn_reg_loss_sum = 0
self.rcnn_cls_loss_sum = 0
self.rcnn_reg_loss_sum = 0

class LossNet(nn.Cell):
"""FasterRcnn loss method"""
def __init__(self):
super(LossNet, self).__init__()
def construct(self, x1, x2, x3, x4, x5, x6):
return x1 + x2

class WithLossCell(nn.Cell):
"""
Wrap the network with loss function to compute loss.

Args:
backbone (Cell): The target network to wrap.
loss_fn (Cell): The loss function used to compute loss.
"""
def __init__(self, backbone, loss_fn):
super(WithLossCell, self).__init__(auto_prefix=False)
self._backbone = backbone
self._loss_fn = loss_fn

def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num):
loss1, loss2, loss3, loss4, loss5, loss6 = self._backbone(x, img_shape, gt_bboxe, gt_label, gt_num)
return self._loss_fn(loss1, loss2, loss3, loss4, loss5, loss6)

@property
def backbone_network(self):
"""
Get the backbone network.

Returns:
Cell, return backbone network.
"""
return self._backbone


class TrainOneStepCell(nn.Cell):
"""
Network training package class.

Append an optimizer to the training network after that the construct function
can be called to create the backward graph.

Args:
network (Cell): The training network.
network_backbone (Cell): The forward network.
optimizer (Cell): Optimizer for updating the weights.
sens (Number): The adjust parameter. Default value is 1.0.
reduce_flag (bool): The reduce flag. Default value is False.
mean (bool): Allreduce method. Default value is False.
degree (int): Device number. Default value is None.
"""
def __init__(self, network, network_backbone, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None):
super(TrainOneStepCell, self).__init__(auto_prefix=False)
self.network = network
self.backbone = network_backbone
self.weights = ParameterTuple(network.trainable_params())
self.optimizer = optimizer
self.grad = C.GradOperation('grad',
get_by_list=True,
sens_param=True)
self.sens = Tensor((np.ones((1,)) * sens).astype(np.float16))
self.reduce_flag = reduce_flag
if reduce_flag:
self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree)

def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num):
weights = self.weights
loss1, loss2, loss3, loss4, loss5, loss6 = self.backbone(x, img_shape, gt_bboxe, gt_label, gt_num)
grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
if self.reduce_flag:
grads = self.grad_reducer(grads)
return F.depend(loss1, self.optimizer(grads)), loss2, loss3, loss4, loss5, loss6

+ 225
- 0
example/fasterrcnn_coco2017/src/util.py View File

@@ -0,0 +1,225 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""coco eval for fasterrcnn"""
import json
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import mmcv

_init_value = np.array(0.0)
summary_init = {
'Precision/mAP': _init_value,
'Precision/mAP@.50IOU': _init_value,
'Precision/mAP@.75IOU': _init_value,
'Precision/mAP (small)': _init_value,
'Precision/mAP (medium)': _init_value,
'Precision/mAP (large)': _init_value,
'Recall/AR@1': _init_value,
'Recall/AR@10': _init_value,
'Recall/AR@100': _init_value,
'Recall/AR@100 (small)': _init_value,
'Recall/AR@100 (medium)': _init_value,
'Recall/AR@100 (large)': _init_value,
}


def coco_eval(result_files, result_types, coco, max_dets=(100, 300, 1000), single_result=False):
"""coco eval for fasterrcnn"""
anns = json.load(open(result_files['bbox']))
if not anns:
return summary_init

if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)

for res_type in result_types:
result_file = result_files[res_type]
assert result_file.endswith('.json')

coco_dets = coco.loadRes(result_file)
gt_img_ids = coco.getImgIds()
det_img_ids = coco_dets.getImgIds()
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)

tgt_ids = gt_img_ids if not single_result else det_img_ids

if single_result:
res_dict = dict()
for id_i in tgt_ids:
cocoEval = COCOeval(coco, coco_dets, iou_type)
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)

cocoEval.params.imgIds = [id_i]
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
res_dict.update({coco.imgs[id_i]['file_name']: cocoEval.stats[1]})

cocoEval = COCOeval(coco, coco_dets, iou_type)
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)

cocoEval.params.imgIds = tgt_ids
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

summary_metrics = {
'Precision/mAP': cocoEval.stats[0],
'Precision/mAP@.50IOU': cocoEval.stats[1],
'Precision/mAP@.75IOU': cocoEval.stats[2],
'Precision/mAP (small)': cocoEval.stats[3],
'Precision/mAP (medium)': cocoEval.stats[4],
'Precision/mAP (large)': cocoEval.stats[5],
'Recall/AR@1': cocoEval.stats[6],
'Recall/AR@10': cocoEval.stats[7],
'Recall/AR@100': cocoEval.stats[8],
'Recall/AR@100 (small)': cocoEval.stats[9],
'Recall/AR@100 (medium)': cocoEval.stats[10],
'Recall/AR@100 (large)': cocoEval.stats[11],
}

return summary_metrics


def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]

def bbox2result_1image(bboxes, labels, num_classes):
"""Convert detection results to a list of numpy arrays.

Args:
bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, )
num_classes (int): class number, including background class

Returns:
list(ndarray): bbox results of each class
"""
if bboxes.shape[0] == 0:
result = [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)]
else:
result = [bboxes[labels == i, :] for i in range(num_classes - 1)]
return result

def proposal2json(dataset, results):
"""convert proposal to json mode"""
img_ids = dataset.getImgIds()
json_results = []
dataset_len = dataset.get_dataset_size()*2
for idx in range(dataset_len):
img_id = img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results

def det2json(dataset, results):
"""convert det to json mode"""
cat_ids = dataset.getCatIds()
img_ids = dataset.getImgIds()
json_results = []
dataset_len = len(img_ids)
for idx in range(dataset_len):
img_id = img_ids[idx]
if idx == len(results): break
result = results[idx]
for label, result_label in enumerate(result):
bboxes = result_label
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = cat_ids[label]
json_results.append(data)
return json_results

def segm2json(dataset, results):
"""convert segm to json mode"""
bbox_json_results = []
segm_json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label, det_label in enumerate(det):
# bbox results
bboxes = det_label
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
bbox_json_results.append(data)

if len(seg) == 2:
segms = seg[0][label]
mask_score = seg[1][label]
else:
segms = seg[label]
mask_score = [bbox[4] for bbox in bboxes]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['score'] = float(mask_score[i])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
segm_json_results.append(data)
return bbox_json_results, segm_json_results

def results2json(dataset, results, out_file):
"""convert result convert to json mode"""
result_files = dict()
if isinstance(results[0], list):
json_results = det2json(dataset, results)
result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
mmcv.dump(json_results, result_files['bbox'])
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox')
result_files['segm'] = '{}.{}.json'.format(out_file, 'segm')
mmcv.dump(json_results[0], result_files['bbox'])
mmcv.dump(json_results[1], result_files['segm'])
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
result_files['proposal'] = '{}.{}.json'.format(out_file, 'proposal')
mmcv.dump(json_results, result_files['proposal'])
else:
raise TypeError('invalid type of results')
return result_files

+ 136
- 0
example/fasterrcnn_coco2017/train.py View File

@@ -0,0 +1,136 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# less required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

"""train FasterRcnn and get checkpoint files."""

import os
import argparse
import random
import numpy as np

import mindspore.common.dtype as mstype
from mindspore import context, Tensor
from mindspore.communication.management import init
from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
from mindspore.train import Model, ParallelMode
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.nn import SGD
import mindspore.dataset.engine as de

from src.FasterRcnn.faster_rcnn_r50 import Faster_Rcnn_Resnet50
from src.network_define import LossCallBack, WithLossCell, TrainOneStepCell, LossNet
from src.config import config
from src.dataset import data_to_mindrecord_byte_image, create_fasterrcnn_dataset
from src.lr_schedule import dynamic_lr

random.seed(1)
np.random.seed(1)
de.config.set_seed(1)

parser = argparse.ArgumentParser(description="FasterRcnn training")
parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create "
"Mindrecord, default is false.")
parser.add_argument("--run_distribute", type=bool, default=False, help="Run distribute, default is false.")
parser.add_argument("--do_train", type=bool, default=True, help="Do train or not, default is true.")
parser.add_argument("--do_eval", type=bool, default=False, help="Do eval or not, default is false.")
parser.add_argument("--dataset", type=str, default="coco", help="Dataset, default is coco.")
parser.add_argument("--pre_trained", type=str, default="", help="Pretrain file path.")
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
parser.add_argument("--rank_id", type=int, default=0, help="Rank id, default is 0.")
args_opt = parser.parse_args()

context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=True, device_id=args_opt.device_id)

if __name__ == '__main__':
if not args_opt.do_eval and args_opt.run_distribute:
rank = args_opt.rank_id
device_num = args_opt.device_num
context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
mirror_mean=True, parameter_broadcast=True)
init()
else:
rank = 0
device_num = 1

print("Start create dataset!")

# It will generate mindrecord file in args_opt.mindrecord_dir,
# and the file name is FasterRcnn.mindrecord0, 1, ... file_num.
prefix = "FasterRcnn.mindrecord"
mindrecord_dir = config.mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
if not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco":
if os.path.isdir(config.coco_root):
print("Create Mindrecord.")
data_to_mindrecord_byte_image("coco", True, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else:
print("coco_root not exits.")
else:
if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH):
print("Create Mindrecord.")
data_to_mindrecord_byte_image("other", True, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else:
print("IMAGE_DIR or ANNO_PATH not exits.")

if not args_opt.only_create_dataset:
loss_scale = float(config.loss_scale)

# When create MindDataset, using the fitst mindrecord file, such as FasterRcnn.mindrecord0.
dataset = create_fasterrcnn_dataset(mindrecord_file, repeat_num=config.epoch_size,
batch_size=config.batch_size, device_num=device_num, rank_id=rank)

dataset_size = dataset.get_dataset_size()
print("Create dataset done!")

net = Faster_Rcnn_Resnet50(config=config)
net = net.set_train()

load_path = args_opt.pre_trained
if load_path != "":
param_dict = load_checkpoint(load_path)
for item in list(param_dict.keys()):
if not item.startswith('backbone'):
param_dict.pop(item)
load_param_into_net(net, param_dict)

loss = LossNet()
lr = Tensor(dynamic_lr(config, rank_size=device_num), mstype.float32)

opt = SGD(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum,
weight_decay=config.weight_decay, loss_scale=config.loss_scale)
net_with_loss = WithLossCell(net, loss)
if args_opt.run_distribute:
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale, reduce_flag=True,
mean=True, degree=device_num)
else:
net = TrainOneStepCell(net_with_loss, net, opt, sens=config.loss_scale)

time_cb = TimeMonitor(data_size=dataset_size)
loss_cb = LossCallBack()
cb = [time_cb, loss_cb]
if config.save_checkpoint:
ckptconfig = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * dataset_size,
keep_checkpoint_max=config.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix='faster_rcnn', directory=config.save_checkpoint_path, config=ckptconfig)
cb += [ckpoint_cb]

model = Model(net)
model.train(config.epoch_size, dataset, callbacks=cb)

mindspore/mindrecord/tools/graph_map_schema.py → example/graph_to_mindrecord/graph_map_schema.py View File


+ 6
- 3
example/graph_to_mindrecord/write_citeseer.sh View File

@@ -1,9 +1,12 @@
#!/bin/bash
rm /tmp/citeseer/mindrecord/*
SRC_PATH=/tmp/citeseer/dataset
MINDRECORD_PATH=/tmp/citeseer/mindrecord

rm -f $MINDRECORD_PATH/*

python writer.py --mindrecord_script citeseer \
--mindrecord_file "/tmp/citeseer/mindrecord/citeseer_mr" \
--mindrecord_file "$MINDRECORD_PATH/citeseer_mr" \
--mindrecord_partitions 1 \
--mindrecord_header_size_by_bit 18 \
--mindrecord_page_size_by_bit 20 \
--graph_api_args "/tmp/citeseer/dataset/citeseer.content:/tmp/citeseer/dataset/citeseer.cites"
--graph_api_args "$SRC_PATH/citeseer.content:$SRC_PATH/citeseer.cites"

+ 6
- 3
example/graph_to_mindrecord/write_cora.sh View File

@@ -1,9 +1,12 @@
#!/bin/bash
rm /tmp/cora/mindrecord/*
SRC_PATH=/tmp/cora/dataset
MINDRECORD_PATH=/tmp/cora/mindrecord

rm -f $MINDRECORD_PATH/*

python writer.py --mindrecord_script cora \
--mindrecord_file "/tmp/cora/mindrecord/cora_mr" \
--mindrecord_file "$MINDRECORD_PATH/cora_mr" \
--mindrecord_partitions 1 \
--mindrecord_header_size_by_bit 18 \
--mindrecord_page_size_by_bit 20 \
--graph_api_args "/tmp/cora/dataset/cora_content.csv:/tmp/cora/dataset/cora_cites.csv"
--graph_api_args "$SRC_PATH/cora_content.csv:$SRC_PATH/cora_cites.csv"

+ 1
- 1
example/graph_to_mindrecord/writer.py View File

@@ -24,7 +24,7 @@ from importlib import import_module
from multiprocessing import Pool

from mindspore.mindrecord import FileWriter
from mindspore.mindrecord import GraphMapSchema
from graph_map_schema import GraphMapSchema


def exec_task(task_id, parallel_writer=True):


+ 100
- 0
example/lstm_aclImdb/README.md View File

@@ -0,0 +1,100 @@
# LSTM Example

## Description

This example is for LSTM model training and evaluation.

## Requirements

- Install [MindSpore](https://www.mindspore.cn/install/en).

- Download the dataset aclImdb_v1.

> Unzip the aclImdb_v1 dataset to any path you want and the folder structure should be as follows:
> ```
> .
> ├── train # train dataset
> └── test # infer dataset
> ```

- Download the GloVe file.

> Unzip the glove.6B.zip to any path you want and the folder structure should be as follows:
> ```
> .
> ├── glove.6B.100d.txt
> ├── glove.6B.200d.txt
> ├── glove.6B.300d.txt # we will use this one later.
> └── glove.6B.50d.txt
> ```

> Adding a new line at the beginning of the file which named `glove.6B.300d.txt`.
> It means reading a total of 400,000 words, each represented by a 300-latitude word vector.
> ```
> 400000 300
> ```

## Running the Example

### Training

```
python train.py --preprocess=true --aclimdb_path=your_imdb_path --glove_path=your_glove_path > out.train.log 2>&1 &
```
The python command above will run in the background, you can view the results through the file `out.train.log`.

After training, you'll get some checkpoint files under the script folder by default.

You will get the loss value as following:
```
# grep "loss is " out.train.log
epoch: 1 step: 390, loss is 0.6003723
epcoh: 2 step: 390, loss is 0.35312173
...
```

### Evaluation

```
python eval.py --ckpt_path=./lstm-20-390.ckpt > out.eval.log 2>&1 &
```
The above python command will run in the background, you can view the results through the file `out.eval.log`.

You will get the accuracy as following:
```
# grep "acc" out.eval.log
result: {'acc': 0.83}
```

## Usage:

### Training
```
usage: train.py [--preprocess {true,false}] [--aclimdb_path ACLIMDB_PATH]
[--glove_path GLOVE_PATH] [--preprocess_path PREPROCESS_PATH]
[--ckpt_path CKPT_PATH] [--device_target {GPU,CPU}]

parameters/options:
--preprocess whether to preprocess data.
--aclimdb_path path where the dataset is stored.
--glove_path path where the GloVe is stored.
--preprocess_path path where the pre-process data is stored.
--ckpt_path the path to save the checkpoint file.
--device_target the target device to run, support "GPU", "CPU".
```

### Evaluation

```
usage: eval.py [--preprocess {true,false}] [--aclimdb_path ACLIMDB_PATH]
[--glove_path GLOVE_PATH] [--preprocess_path PREPROCESS_PATH]
[--ckpt_path CKPT_PATH] [--device_target {GPU,CPU}]

parameters/options:
--preprocess whether to preprocess data.
--aclimdb_path path where the dataset is stored.
--glove_path path where the GloVe is stored.
--preprocess_path path where the pre-process data is stored.
--ckpt_path the checkpoint file path used to evaluate model.
--device_target the target device to run, support "GPU", "CPU".
```

+ 33
- 0
example/lstm_aclImdb/config.py View File

@@ -0,0 +1,33 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting
"""
from easydict import EasyDict as edict

# LSTM CONFIG
lstm_cfg = edict({
'num_classes': 2,
'learning_rate': 0.1,
'momentum': 0.9,
'num_epochs': 20,
'batch_size': 64,
'embed_size': 300,
'num_hiddens': 100,
'num_layers': 2,
'bidirectional': True,
'save_checkpoint_steps': 390,
'keep_checkpoint_max': 10
})

+ 92
- 0
example/lstm_aclImdb/dataset.py View File

@@ -0,0 +1,92 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Data operations, will be used in train.py and eval.py
"""
import os

import numpy as np

from imdb import ImdbParser
import mindspore.dataset as ds
from mindspore.mindrecord import FileWriter


def create_dataset(data_home, batch_size, repeat_num=1, training=True):
"""Data operations."""
ds.config.set_seed(1)
data_dir = os.path.join(data_home, "aclImdb_train.mindrecord0")
if not training:
data_dir = os.path.join(data_home, "aclImdb_test.mindrecord0")

data_set = ds.MindDataset(data_dir, columns_list=["feature", "label"], num_parallel_workers=4)

# apply map operations on images
data_set = data_set.shuffle(buffer_size=data_set.get_dataset_size())
data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)
data_set = data_set.repeat(count=repeat_num)

return data_set


def _convert_to_mindrecord(data_home, features, labels, weight_np=None, training=True):
"""
convert imdb dataset to mindrecoed dataset
"""
if weight_np is not None:
np.savetxt(os.path.join(data_home, 'weight.txt'), weight_np)

# write mindrecord
schema_json = {"id": {"type": "int32"},
"label": {"type": "int32"},
"feature": {"type": "int32", "shape": [-1]}}

data_dir = os.path.join(data_home, "aclImdb_train.mindrecord")
if not training:
data_dir = os.path.join(data_home, "aclImdb_test.mindrecord")

def get_imdb_data(features, labels):
data_list = []
for i, (label, feature) in enumerate(zip(labels, features)):
data_json = {"id": i,
"label": int(label),
"feature": feature.reshape(-1)}
data_list.append(data_json)
return data_list

writer = FileWriter(data_dir, shard_num=4)
data = get_imdb_data(features, labels)
writer.add_schema(schema_json, "nlp_schema")
writer.add_index(["id", "label"])
writer.write_raw_data(data)
writer.commit()


def convert_to_mindrecord(embed_size, aclimdb_path, preprocess_path, glove_path):
"""
convert imdb dataset to mindrecoed dataset
"""
parser = ImdbParser(aclimdb_path, glove_path, embed_size)
parser.parse()

if not os.path.exists(preprocess_path):
print(f"preprocess path {preprocess_path} is not exist")
os.makedirs(preprocess_path)

train_features, train_labels, train_weight_np = parser.get_datas('train')
_convert_to_mindrecord(preprocess_path, train_features, train_labels, train_weight_np)

test_features, test_labels, _ = parser.get_datas('test')
_convert_to_mindrecord(preprocess_path, test_features, test_labels, training=False)

+ 81
- 0
example/lstm_aclImdb/eval.py View File

@@ -0,0 +1,81 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
#################train lstm example on aclImdb########################
python eval.py --ckpt_path=./lstm-20-390.ckpt
"""
import argparse
import os

import numpy as np

from config import lstm_cfg as cfg
from dataset import create_dataset, convert_to_mindrecord
from mindspore import Tensor, nn, Model, context
from mindspore.model_zoo.lstm import SentimentNet
from mindspore.nn import Accuracy
from mindspore.train.callback import LossMonitor
from mindspore.train.serialization import load_checkpoint, load_param_into_net

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MindSpore LSTM Example')
parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'],
help='whether to preprocess data.')
parser.add_argument('--aclimdb_path', type=str, default="./aclImdb",
help='path where the dataset is stored.')
parser.add_argument('--glove_path', type=str, default="./glove",
help='path where the GloVe is stored.')
parser.add_argument('--preprocess_path', type=str, default="./preprocess",
help='path where the pre-process data is stored.')
parser.add_argument('--ckpt_path', type=str, default=None,
help='the checkpoint file path used to evaluate model.')
parser.add_argument('--device_target', type=str, default="GPU", choices=['GPU', 'CPU'],
help='the target device to run, support "GPU", "CPU". Default: "GPU".')
args = parser.parse_args()

context.set_context(
mode=context.GRAPH_MODE,
save_graphs=False,
device_target=args.device_target)

if args.preprocess == "true":
print("============== Starting Data Pre-processing ==============")
convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path)

embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32)
network = SentimentNet(vocab_size=embedding_table.shape[0],
embed_size=cfg.embed_size,
num_hiddens=cfg.num_hiddens,
num_layers=cfg.num_layers,
bidirectional=cfg.bidirectional,
num_classes=cfg.num_classes,
weight=Tensor(embedding_table),
batch_size=cfg.batch_size)

loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
loss_cb = LossMonitor()

model = Model(network, loss, opt, {'acc': Accuracy()})

print("============== Starting Testing ==============")
ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, training=False)
param_dict = load_checkpoint(args.ckpt_path)
load_param_into_net(network, param_dict)
if args.device_target == "CPU":
acc = model.eval(ds_eval, dataset_sink_mode=False)
else:
acc = model.eval(ds_eval)
print("============== Accuracy:{} ==============".format(acc))

+ 155
- 0
example/lstm_aclImdb/imdb.py View File

@@ -0,0 +1,155 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
imdb dataset parser.
"""
import os
from itertools import chain

import gensim
import numpy as np


class ImdbParser():
"""
parse aclImdb data to features and labels.
sentence->tokenized->encoded->padding->features
"""

def __init__(self, imdb_path, glove_path, embed_size=300):
self.__segs = ['train', 'test']
self.__label_dic = {'pos': 1, 'neg': 0}
self.__imdb_path = imdb_path
self.__glove_dim = embed_size
self.__glove_file = os.path.join(glove_path, 'glove.6B.' + str(self.__glove_dim) + 'd.txt')

# properties
self.__imdb_datas = {}
self.__features = {}
self.__labels = {}
self.__vacab = {}
self.__word2idx = {}
self.__weight_np = {}
self.__wvmodel = None

def parse(self):
"""
parse imdb data to memory
"""
self.__wvmodel = gensim.models.KeyedVectors.load_word2vec_format(self.__glove_file)

for seg in self.__segs:
self.__parse_imdb_datas(seg)
self.__parse_features_and_labels(seg)
self.__gen_weight_np(seg)

def __parse_imdb_datas(self, seg):
"""
load data from txt
"""
data_lists = []
for label_name, label_id in self.__label_dic.items():
sentence_dir = os.path.join(self.__imdb_path, seg, label_name)
for file in os.listdir(sentence_dir):
with open(os.path.join(sentence_dir, file), mode='r', encoding='utf8') as f:
sentence = f.read().replace('\n', '')
data_lists.append([sentence, label_id])
self.__imdb_datas[seg] = data_lists

def __parse_features_and_labels(self, seg):
"""
parse features and labels
"""
features = []
labels = []
for sentence, label in self.__imdb_datas[seg]:
features.append(sentence)
labels.append(label)

self.__features[seg] = features
self.__labels[seg] = labels

# update feature to tokenized
self.__updata_features_to_tokenized(seg)
# parse vacab
self.__parse_vacab(seg)
# encode feature
self.__encode_features(seg)
# padding feature
self.__padding_features(seg)

def __updata_features_to_tokenized(self, seg):
tokenized_features = []
for sentence in self.__features[seg]:
tokenized_sentence = [word.lower() for word in sentence.split(" ")]
tokenized_features.append(tokenized_sentence)
self.__features[seg] = tokenized_features

def __parse_vacab(self, seg):
# vocab
tokenized_features = self.__features[seg]
vocab = set(chain(*tokenized_features))
self.__vacab[seg] = vocab

# word_to_idx: {'hello': 1, 'world':111, ... '<unk>': 0}
word_to_idx = {word: i + 1 for i, word in enumerate(vocab)}
word_to_idx['<unk>'] = 0
self.__word2idx[seg] = word_to_idx

def __encode_features(self, seg):
""" encode word to index """
word_to_idx = self.__word2idx['train']
encoded_features = []
for tokenized_sentence in self.__features[seg]:
encoded_sentence = []
for word in tokenized_sentence:
encoded_sentence.append(word_to_idx.get(word, 0))
encoded_features.append(encoded_sentence)
self.__features[seg] = encoded_features

def __padding_features(self, seg, maxlen=500, pad=0):
""" pad all features to the same length """
padded_features = []
for feature in self.__features[seg]:
if len(feature) >= maxlen:
padded_feature = feature[:maxlen]
else:
padded_feature = feature
while len(padded_feature) < maxlen:
padded_feature.append(pad)
padded_features.append(padded_feature)
self.__features[seg] = padded_features

def __gen_weight_np(self, seg):
"""
generate weight by gensim
"""
weight_np = np.zeros((len(self.__word2idx[seg]), self.__glove_dim), dtype=np.float32)
for word, idx in self.__word2idx[seg].items():
if word not in self.__wvmodel:
continue
word_vector = self.__wvmodel.get_vector(word)
weight_np[idx, :] = word_vector

self.__weight_np[seg] = weight_np

def get_datas(self, seg):
"""
return features, labels, and weight
"""
features = np.array(self.__features[seg]).astype(np.int32)
labels = np.array(self.__labels[seg]).astype(np.int32)
weight = np.array(self.__weight_np[seg])
return features, labels, weight

+ 83
- 0
example/lstm_aclImdb/train.py View File

@@ -0,0 +1,83 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
#################train lstm example on aclImdb########################
python train.py --preprocess=true --aclimdb_path=your_imdb_path --glove_path=your_glove_path
"""
import argparse
import os

import numpy as np

from config import lstm_cfg as cfg
from dataset import convert_to_mindrecord
from dataset import create_dataset
from mindspore import Tensor, nn, Model, context
from mindspore.model_zoo.lstm import SentimentNet
from mindspore.nn import Accuracy
from mindspore.train.callback import LossMonitor, CheckpointConfig, ModelCheckpoint, TimeMonitor

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MindSpore LSTM Example')
parser.add_argument('--preprocess', type=str, default='false', choices=['true', 'false'],
help='whether to preprocess data.')
parser.add_argument('--aclimdb_path', type=str, default="./aclImdb",
help='path where the dataset is stored.')
parser.add_argument('--glove_path', type=str, default="./glove",
help='path where the GloVe is stored.')
parser.add_argument('--preprocess_path', type=str, default="./preprocess",
help='path where the pre-process data is stored.')
parser.add_argument('--ckpt_path', type=str, default="./",
help='the path to save the checkpoint file.')
parser.add_argument('--device_target', type=str, default="GPU", choices=['GPU', 'CPU'],
help='the target device to run, support "GPU", "CPU". Default: "GPU".')
args = parser.parse_args()

context.set_context(
mode=context.GRAPH_MODE,
save_graphs=False,
device_target=args.device_target)

if args.preprocess == "true":
print("============== Starting Data Pre-processing ==============")
convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path)

embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32)
network = SentimentNet(vocab_size=embedding_table.shape[0],
embed_size=cfg.embed_size,
num_hiddens=cfg.num_hiddens,
num_layers=cfg.num_layers,
bidirectional=cfg.bidirectional,
num_classes=cfg.num_classes,
weight=Tensor(embedding_table),
batch_size=cfg.batch_size)

loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
loss_cb = LossMonitor()

model = Model(network, loss, opt, {'acc': Accuracy()})

print("============== Starting Training ==============")
ds_train = create_dataset(args.preprocess_path, cfg.batch_size, cfg.num_epochs)
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_checkpoint_steps,
keep_checkpoint_max=cfg.keep_checkpoint_max)
ckpoint_cb = ModelCheckpoint(prefix="lstm", directory=args.ckpt_path, config=config_ck)
time_cb = TimeMonitor(data_size=ds_train.get_dataset_size())
if args.device_target == "CPU":
model.train(cfg.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb], dataset_sink_mode=False)
else:
model.train(cfg.num_epochs, ds_train, callbacks=[time_cb, ckpoint_cb, loss_cb])
print("============== Training Success ==============")

mindspore/model_zoo/mobilenetv2/Readme.md → example/mobilenetv2/Readme.md View File

@@ -1,30 +1,21 @@
# MobileNetV2 Description

MobileNetV2 is a significant improvement over MobileNetV1 and pushes the state of the art for mobile visual recognition including classification, object detection and semantic segmentation.

MobileNetV2 is tuned to mobile phone CPUs through a combination of hardware- aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances.Nov 20, 2019.
MobileNetV2 builds upon the ideas from MobileNetV1, using depthwise separable convolution as efficient building blocks. However, V2 introduces two new features to the architecture: 1) linear bottlenecks between the layers, and 2) shortcut connections between the bottlenecks1.

[Paper](https://arxiv.org/pdf/1905.02244) Howard, Andrew, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang et al. "Searching for MobileNetV2." In Proceedings of the IEEE International Conference on Computer Vision, pp. 1314-1324. 2019.

# Model architecture

The overall network architecture of MobileNetV2 is show below:

[Link](https://arxiv.org/pdf/1905.02244)
[Paper](https://arxiv.org/pdf/1801.04381) Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." Proceedings of the IEEE conference on computer vision and pattern recognition. 2018.

# Dataset

Dataset used: [imagenet](http://www.image-net.org/)
Dataset used: imagenet2012

- Dataset size: ~125G, 1.2W colorful images in 1000 classes
- Train: 120G, 1.2W images
- Test: 5G, 50000 images
- Dataset size: ~125G
- Train: 120G, 1281167 images: 1000 directories
- Test: 5G, 50000 images: images should be classified into 1000 directories firstly, just like train images
- Data format: RGB images.
- Note: Data will be processed in src/dataset.py


# Features


# Environment Requirements

- Hardware(Ascend/GPU)
@@ -60,8 +51,8 @@ Dataset used: [imagenet](http://www.image-net.org/)

### Usage

- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
- GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]
- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]
- GPU: sh run_trian.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]

### Launch

@@ -133,7 +124,7 @@ result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.

#### Inference Performance

| Parameters | GoogLeNet | | |
| Parameters | | | |
| -------------------------- | ----------------------------- | ------------------------- | -------------------- |
| Model Version | V1 | | |
| Resource | Huawei 910 | NV SMX2 V100-32G | Huawei 310 |
@@ -148,4 +139,4 @@ result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.
| Model for inference | | | |

# ModelZoo Homepage
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)

mindspore/model_zoo/mobilenetv3/eval.py → example/mobilenetv2/eval.py View File

@@ -22,9 +22,10 @@ from mindspore import nn
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.common import dtype as mstype
from src.dataset import create_dataset
from mindspore.model_zoo.mobilenetV2 import mobilenet_v2
from src.dataset import create_dataset_py
from src.config import config_ascend, config_gpu
from src.mobilenetV2 import mobilenet_v2

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
@@ -35,21 +36,23 @@ args_opt = parser.parse_args()

if __name__ == '__main__':
config_platform = None
net = None
if args_opt.platform == "Ascend":
config_platform = config_ascend
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
device_id=device_id, save_graphs=False)
net = mobilenet_v2(num_classes=config_platform.num_classes, platform="Ascend")
elif args_opt.platform == "GPU":
config_platform = config_gpu
context.set_context(mode=context.GRAPH_MODE,
device_target="GPU", save_graphs=False)
net = mobilenet_v2(num_classes=config_platform.num_classes, platform="GPU")
else:
raise ValueError("Unsupport platform.")

loss = nn.SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True, reduction='mean')
net = mobilenet_v2(num_classes=config_platform.num_classes)

if args_opt.platform == "Ascend":
net.to_float(mstype.float16)
@@ -57,11 +60,11 @@ if __name__ == '__main__':
if isinstance(cell, nn.Dense):
cell.to_float(mstype.float32)

dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=False,
config=config_platform,
platform=args_opt.platform,
batch_size=config_platform.batch_size)
dataset = create_dataset_py(dataset_path=args_opt.dataset_path,
do_train=False,
config=config_platform,
platform=args_opt.platform,
batch_size=config_platform.batch_size)
step_size = dataset.get_dataset_size()

if args_opt.checkpoint_path:

mindspore/model_zoo/mobilenetv2/scripts/run_infer.sh → example/mobilenetv2/scripts/run_infer.sh View File

@@ -52,4 +52,4 @@ python ${BASEPATH}/../eval.py \
--platform=$1 \
--dataset_path=$2 \
--checkpoint_path=$3 \
&> infer.log & # dataset val folder path
&> ../infer.log & # dataset val folder path

mindspore/model_zoo/mobilenetv2/scripts/run_train.sh → example/mobilenetv2/scripts/run_train.sh View File

@@ -36,13 +36,14 @@ run_ascend()
fi
mkdir ../train
cd ../train || exit
python ${BASEPATH}/../launch.py \
python ${BASEPATH}/../src/launch.py \
--nproc_per_node=$2 \
--visible_devices=$4 \
--server_id=$3 \
--training_script=${BASEPATH}/train.py \
--training_script=${BASEPATH}/../train.py \
--dataset_path=$5 \
--platform=$1 &> train.log & # dataset train folder
--pre_trained=$6 \
--platform=$1 &> ../train.log & # dataset train folder
}

run_gpu()
@@ -73,14 +74,15 @@ run_gpu()
python ${BASEPATH}/../train.py \
--dataset_path=$4 \
--platform=$1 \
&> train.log & # dataset train folder
--pre_trained=$5 \
&> ../train.log & # dataset train folder
}

if [ $# -gt 5 ] || [ $# -lt 4 ]
if [ $# -gt 6 ] || [ $# -lt 4 ]
then
echo "Usage:\n \
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
"
exit 1
fi

mindspore/model_zoo/mobilenetv2/src/config.py → example/mobilenetv2/src/config.py View File


+ 160
- 0
example/mobilenetv2/src/dataset.py View File

@@ -0,0 +1,160 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
create train or eval dataset.
"""
import os
import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
import mindspore.dataset.transforms.vision.py_transforms as P

def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32):
"""
create a train or eval dataset

Args:
dataset_path(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1.
batch_size(int): the batch size of dataset. Default: 32.

Returns:
dataset
"""
if platform == "Ascend":
rank_size = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID"))
if do_train:
if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
elif platform == "GPU":
if do_train:
from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank())
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
else:
raise ValueError("Unsupport platform.")

resize_height = config.image_height

if do_train:
buffer_size = 20480
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

# define map operations
decode_op = C.Decode()
resize_crop_decode_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)

resize_op = C.Resize(256)
center_crop = C.CenterCrop(resize_height)
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
change_swap_op = C.HWC2CHW()

if do_train:
trans = [resize_crop_decode_op, horizontal_flip_op, normalize_op, change_swap_op]
else:
trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op]

type_cast_op = C2.TypeCast(mstype.int32)

ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

# apply dataset repeat operation
ds = ds.repeat(repeat_num)

return ds

def create_dataset_py(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32):
"""
create a train or eval dataset

Args:
dataset_path(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1.
batch_size(int): the batch size of dataset. Default: 32.

Returns:
dataset
"""
if platform == "Ascend":
rank_size = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID"))
if do_train:
if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
elif platform == "GPU":
if do_train:
from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank())
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
else:
raise ValueError("Unsupport platform.")

resize_height = config.image_height

if do_train:
buffer_size = 20480
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

# define map operations
decode_op = P.Decode()
resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5)

resize_op = P.Resize(256)
center_crop = P.CenterCrop(resize_height)
to_tensor = P.ToTensor()
normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

if do_train:
trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op]
else:
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]

compose = P.ComposeOp(trans)

ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True)

# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

# apply dataset repeat operation
ds = ds.repeat(repeat_num)

return ds

mindspore/model_zoo/mobilenetv3/src/launch.py → example/mobilenetv2/src/launch.py View File

@@ -18,6 +18,7 @@ import sys
import json
import subprocess
import shutil
import platform
from argparse import ArgumentParser

def parse_args():
@@ -79,7 +80,8 @@ def main():
device_ips[device_id] = device_ip
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
hccn_table = {}
hccn_table['board_id'] = '0x0000'
arch = platform.processor()
hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch]
hccn_table['chip_info'] = '910'
hccn_table['deploy_mode'] = 'lab'
hccn_table['group_count'] = '1'

mindspore/model_zoo/mobilenetv2/src/lr_generator.py → example/mobilenetv2/src/lr_generator.py View File


mindspore/model_zoo/mobilenetv2/train.py → example/mobilenetv2/train.py View File

@@ -32,12 +32,12 @@ from mindspore.train.model import Model, ParallelMode
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
from mindspore.train.loss_scale_manager import FixedLossScaleManager
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.communication.management import init
from mindspore.communication.management import init, get_group_size
from mindspore.model_zoo.mobilenetV2 import mobilenet_v2
import mindspore.dataset.engine as de
from src.dataset import create_dataset
from src.dataset import create_dataset_py
from src.lr_generator import get_lr
from src.config import config_gpu, config_ascend
from src.mobilenetV2 import mobilenet_v2

random.seed(1)
np.random.seed(1)
@@ -146,7 +146,7 @@ class Monitor(Callback):
self.losses.append(step_loss)
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format(
print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
cb_params.cur_epoch_num -
1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))
@@ -157,6 +157,11 @@ if __name__ == '__main__':
# train on gpu
print("train args: ", args_opt, "\ncfg: ", config_gpu)

init('nccl')
context.set_auto_parallel_context(parallel_mode="data_parallel",
mirror_mean=True,
device_num=get_group_size())

# define net
net = mobilenet_v2(num_classes=config_gpu.num_classes, platform="GPU")
# define loss
@@ -168,12 +173,12 @@ if __name__ == '__main__':
is_grad=False, sparse=True, reduction='mean')
# define dataset
epoch_size = config_gpu.epoch_size
dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=True,
config=config_gpu,
platform=args_opt.platform,
repeat_num=epoch_size,
batch_size=config_gpu.batch_size)
dataset = create_dataset_py(dataset_path=args_opt.dataset_path,
do_train=True,
config=config_gpu,
platform=args_opt.platform,
repeat_num=epoch_size,
batch_size=config_gpu.batch_size)
step_size = dataset.get_dataset_size()
# resume
if args_opt.pre_trained:
@@ -216,23 +221,23 @@ if __name__ == '__main__':
init()

epoch_size = config_ascend.epoch_size
net = mobilenet_v2(num_classes=config_ascend.num_classes)
net = mobilenet_v2(num_classes=config_ascend.num_classes, platform="Ascend")
net.to_float(mstype.float16)
for _, cell in net.cells_and_names():
if isinstance(cell, nn.Dense):
cell.to_float(mstype.float32)
if config_ascend.label_smooth > 0:
loss = CrossEntropyWithLabelSmooth(
smooth_factor=config_ascend.label_smooth, num_classes=config.num_classes)
smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes)
else:
loss = SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True, reduction='mean')
dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=True,
config=config_ascend,
platform=args_opt.platform,
repeat_num=epoch_size,
batch_size=config_ascend.batch_size)
dataset = create_dataset_py(dataset_path=args_opt.dataset_path,
do_train=True,
config=config_ascend,
platform=args_opt.platform,
repeat_num=epoch_size,
batch_size=config_ascend.batch_size)
step_size = dataset.get_dataset_size()
if args_opt.pre_trained:
param_dict = load_checkpoint(args_opt.pre_trained)

+ 101
- 0
example/mobilenetv2_quant/Readme.md View File

@@ -0,0 +1,101 @@
# MobileNetV2 Description

MobileNetV2 is a significant improvement over MobileNetV1 and pushes the state of the art for mobile visual recognition including classification, object detection and semantic segmentation.

MobileNetV2 builds upon the ideas from MobileNetV1, using depthwise separable convolution as efficient building blocks. However, V2 introduces two new features to the architecture: 1) linear bottlenecks between the layers, and 2) shortcut connections between the bottlenecks1.

[Paper](https://arxiv.org/pdf/1801.04381) Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." Proceedings of the IEEE conference on computer vision and pattern recognition. 2018.

# Dataset

Dataset used: imagenet

- Dataset size: ~125G
- Train: 120G, 1281167 images: 1000 directories
- Test: 5G, 50000 images: images should be classified into 1000 directories firstly, just like train images
- Data format: RGB images.
- Note: Data will be processed in src/dataset.py

# Environment Requirements

- Hardware(Ascend)
- Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
- Framework
- [MindSpore](http://10.90.67.50/mindspore/archive/20200506/OpenSource/me_vm_x86/)
- For more information, please check the resources below:
- [MindSpore tutorials](https://www.mindspore.cn/tutorial/zh-CN/master/index.html)
- [MindSpore API](https://www.mindspore.cn/api/zh-CN/master/index.html)


# Script description

## Script and sample code

```python
├── mobilenetv2_quant
├── Readme.md
├── scripts
│ ├──run_train.sh
│ ├──run_eval.sh
├── src
│ ├──config.py
│ ├──dataset.py
│ ├──luanch.py
│ ├──lr_generator.py
│ ├──mobilenetV2_quant.py
├── train.py
├── eval.py
```


## Training process

### Usage

- Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]


### Launch

```
# training example
Ascend: sh run_train.sh Ascend 4 192.168.0.1 0,1,2,3 ~/imagenet/train/ ~/mobilenet.ckpt
```

### Result

Training result will be stored in the example path. Checkpoints will be stored at `. /checkpoint` by default, and training log will be redirected to `./train/train.log` like followings.

```
epoch: [ 0/200], step:[ 624/ 625], loss:[5.258/5.258], time:[140412.236], lr:[0.100]
epoch time: 140522.500, per step time: 224.836, avg loss: 5.258
epoch: [ 1/200], step:[ 624/ 625], loss:[3.917/3.917], time:[138221.250], lr:[0.200]
epoch time: 138331.250, per step time: 221.330, avg loss: 3.917
```

## Eval process

### Usage

- Ascend: sh run_infer.sh Ascend [DATASET_PATH] [CHECKPOINT_PATH]

### Launch

```
# infer example
Ascend: sh run_infer.sh Ascend ~/imagenet/val/ ~/train/mobilenet-200_625.ckpt
```

> checkpoint can be produced in training process.

### Result

Inference result will be stored in the example path, you can find result like the followings in `val.log`.

```
result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.ckpt
```


# ModelZoo Homepage
[Link](https://gitee.com/mindspore/mindspore/tree/master/mindspore/model_zoo)

+ 63
- 0
example/mobilenetv2_quant/eval.py View File

@@ -0,0 +1,63 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
eval.
"""
import os
import argparse
from mindspore import context
from mindspore import nn
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from src.mobilenetV2_quant import mobilenet_v2_quant
from src.dataset import create_dataset
from src.config import config_ascend

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
parser.add_argument('--platform', type=str, default=None, help='run platform')
args_opt = parser.parse_args()

if __name__ == '__main__':
config_platform = None
net = None
if args_opt.platform == "Ascend":
config_platform = config_ascend
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend",
device_id=device_id, save_graphs=False)
net = mobilenet_v2_quant(num_classes=config_platform.num_classes)
else:
raise ValueError("Unsupport platform.")

loss = nn.SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True, reduction='mean')

dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=False,
config=config_platform,
platform=args_opt.platform,
batch_size=config_platform.batch_size)
step_size = dataset.get_dataset_size()

if args_opt.checkpoint_path:
param_dict = load_checkpoint(args_opt.checkpoint_path)
load_param_into_net(net, param_dict)
net.set_train(False)

model = Model(net, loss_fn=loss, metrics={'acc'})
res = model.eval(dataset)
print("result:", res, "ckpt=", args_opt.checkpoint_path)

+ 53
- 0
example/mobilenetv2_quant/scripts/run_infer.sh View File

@@ -0,0 +1,53 @@
#!/usr/bin/env bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [ $# != 3 ]
then
echo "Ascend: sh run_infer.sh [PLATFORM] [DATASET_PATH] [CHECKPOINT_PATH]"
exit 1
fi

# check dataset path
if [ ! -d $2 ]
then
echo "error: DATASET_PATH=$2 is not a directory"
exit 1
fi

# check checkpoint file
if [ ! -f $3 ]
then
echo "error: CHECKPOINT_PATH=$3 is not a file"
exit 1
fi

# set environment
BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
if [ -d "../eval" ];
then
rm -rf ../eval
fi
mkdir ../eval
cd ../eval || exit

# launch
python ${BASEPATH}/../eval.py \
--platform=$1 \
--dataset_path=$2 \
--checkpoint_path=$3 \
&> infer.log & # dataset val folder path

+ 62
- 0
example/mobilenetv2_quant/scripts/run_train.sh View File

@@ -0,0 +1,62 @@
#!/usr/bin/env bash
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

run_ascend()
{
if [ $2 -lt 1 ] && [ $2 -gt 8 ]
then
echo "error: DEVICE_NUM=$2 is not in (1-8)"
exit 1
fi

if [ ! -d $5 ] && [ ! -f $5 ]
then
echo "error: DATASET_PATH=$5 is not a directory or file"
exit 1
fi

BASEPATH=$(cd "`dirname $0`" || exit; pwd)
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "../train" ];
then
rm -rf ../train
fi
mkdir ../train
cd ../train || exit
python ${BASEPATH}/../src/launch.py \
--nproc_per_node=$2 \
--visible_devices=$4 \
--server_id=$3 \
--training_script=${BASEPATH}/../train.py \
--dataset_path=$5 \
--pre_trained=$6 \
--platform=$1 &> train.log & # dataset train folder
}

if [ $# -gt 6 ] || [ $# -lt 4 ]
then
echo "Usage:\n \
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
"
exit 1
fi

if [ $1 = "Ascend" ] ; then
run_ascend "$@"
else
echo "not support platform"
fi;


+ 38
- 0
example/mobilenetv2_quant/src/config.py View File

@@ -0,0 +1,38 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train.py and eval.py
"""
from easydict import EasyDict as ed

config_ascend = ed({
"num_classes": 1000,
"image_height": 224,
"image_width": 224,
"batch_size": 192,
"data_load_mode": "mindrecord",
"epoch_size": 60,
"start_epoch": 200,
"warmup_epochs": 1,
"lr": 0.3,
"momentum": 0.9,
"weight_decay": 4e-5,
"label_smooth": 0.1,
"loss_scale": 1024,
"save_checkpoint": True,
"save_checkpoint_epochs": 1,
"keep_checkpoint_max": 200,
"save_checkpoint_path": "./checkpoint",
})

+ 156
- 0
example/mobilenetv2_quant/src/dataset.py View File

@@ -0,0 +1,156 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
create train or eval dataset.
"""
import os
from functools import partial
import mindspore.common.dtype as mstype
import mindspore.dataset.engine as de
import mindspore.dataset.transforms.vision.c_transforms as C
import mindspore.dataset.transforms.c_transforms as C2
import mindspore.dataset.transforms.vision.py_transforms as P
from src.config import config_ascend


def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32):
"""
create a train or eval dataset

Args:
dataset_path(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1.
batch_size(int): the batch size of dataset. Default: 32.

Returns:
dataset
"""
if platform == "Ascend":
rank_size = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID"))
columns_list = ['image', 'label']
if config_ascend.data_load_mode == "mindrecord":
load_func = partial(de.MindDataset, dataset_path, columns_list)
else:
load_func = partial(de.ImageFolderDatasetV2, dataset_path)
if do_train:
if rank_size == 1:
ds = load_func(num_parallel_workers=8, shuffle=True)
else:
ds = load_func(num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id)
else:
ds = load_func(num_parallel_workers=8, shuffle=False)
else:
raise ValueError("Unsupport platform.")

resize_height = config.image_height

if do_train:
buffer_size = 20480
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

# define map operations
decode_op = C.Decode()
resize_crop_decode_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5)

resize_op = C.Resize(256)
center_crop = C.CenterCrop(resize_height)
normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
std=[0.229 * 255, 0.224 * 255, 0.225 * 255])
change_swap_op = C.HWC2CHW()

if do_train:
trans = [resize_crop_decode_op, horizontal_flip_op, normalize_op, change_swap_op]
else:
trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op]

type_cast_op = C2.TypeCast(mstype.int32)

ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=16)
ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8)

# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

# apply dataset repeat operation
ds = ds.repeat(repeat_num)

return ds


def create_dataset_py(dataset_path, do_train, config, platform, repeat_num=1, batch_size=32):
"""
create a train or eval dataset

Args:
dataset_path(string): the path of dataset.
do_train(bool): whether dataset is used for train or eval.
repeat_num(int): the repeat times of dataset. Default: 1.
batch_size(int): the batch size of dataset. Default: 32.

Returns:
dataset
"""
if platform == "Ascend":
rank_size = int(os.getenv("RANK_SIZE"))
rank_id = int(os.getenv("RANK_ID"))
if do_train:
if rank_size == 1:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id)
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=False)
else:
raise ValueError("Unsupport platform.")

resize_height = config.image_height

if do_train:
buffer_size = 20480
# apply shuffle operations
ds = ds.shuffle(buffer_size=buffer_size)

# define map operations
decode_op = P.Decode()
resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333))
horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5)

resize_op = P.Resize(256)
center_crop = P.CenterCrop(resize_height)
to_tensor = P.ToTensor()
normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

if do_train:
trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op]
else:
trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op]

compose = P.ComposeOp(trans)

ds = ds.map(input_columns="image", operations=compose(), num_parallel_workers=8, python_multiprocessing=True)

# apply batch operations
ds = ds.batch(batch_size, drop_remainder=True)

# apply dataset repeat operation
ds = ds.repeat(repeat_num)

return ds

+ 166
- 0
example/mobilenetv2_quant/src/launch.py View File

@@ -0,0 +1,166 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""launch train script"""
import os
import sys
import json
import subprocess
import shutil
import platform
from argparse import ArgumentParser


def parse_args():
"""
parse args .

Args:

Returns:
args.

Examples:
>>> parse_args()
"""
parser = ArgumentParser(description="mindspore distributed training launch "
"helper utilty that will spawn up "
"multiple distributed processes")
parser.add_argument("--nproc_per_node", type=int, default=1,
help="The number of processes to launch on each node, "
"for D training, this is recommended to be set "
"to the number of D in your system so that "
"each process can be bound to a single D.")
parser.add_argument("--visible_devices", type=str, default="0,1,2,3,4,5,6,7",
help="will use the visible devices sequentially")
parser.add_argument("--server_id", type=str, default="",
help="server ip")
parser.add_argument("--training_script", type=str,
help="The full path to the single D training "
"program/script to be launched in parallel, "
"followed by all the arguments for the "
"training script")
# rest from the training program
args, unknown = parser.parse_known_args()
args.training_script_args = unknown
return args


def main():
print("start", __file__)
args = parse_args()
print(args)
visible_devices = args.visible_devices.split(',')
assert os.path.isfile(args.training_script)
assert len(visible_devices) >= args.nproc_per_node
print('visible_devices:{}'.format(visible_devices))
if not args.server_id:
print('pleaser input server ip!!!')
exit(0)
print('server_id:{}'.format(args.server_id))

# construct hccn_table
hccn_configs = open('/etc/hccn.conf', 'r').readlines()
device_ips = {}
for hccn_item in hccn_configs:
hccn_item = hccn_item.strip()
if hccn_item.startswith('address_'):
device_id, device_ip = hccn_item.split('=')
device_id = device_id.split('_')[1]
device_ips[device_id] = device_ip
print('device_id:{}, device_ip:{}'.format(device_id, device_ip))
hccn_table = {}
arch = platform.processor()
hccn_table['board_id'] = {'aarch64': '0x002f', 'x86_64': '0x0000'}[arch]
hccn_table['chip_info'] = '910'
hccn_table['deploy_mode'] = 'lab'
hccn_table['group_count'] = '1'
hccn_table['group_list'] = []
instance_list = []
usable_dev = ''
for instance_id in range(args.nproc_per_node):
instance = {}
instance['devices'] = []
device_id = visible_devices[instance_id]
device_ip = device_ips[device_id]
usable_dev += str(device_id)
instance['devices'].append({
'device_id': device_id,
'device_ip': device_ip,
})
instance['rank_id'] = str(instance_id)
instance['server_id'] = args.server_id
instance_list.append(instance)
hccn_table['group_list'].append({
'device_num': str(args.nproc_per_node),
'server_num': '1',
'group_name': '',
'instance_count': str(args.nproc_per_node),
'instance_list': instance_list,
})
hccn_table['para_plane_nic_location'] = 'device'
hccn_table['para_plane_nic_name'] = []
for instance_id in range(args.nproc_per_node):
eth_id = visible_devices[instance_id]
hccn_table['para_plane_nic_name'].append('eth{}'.format(eth_id))
hccn_table['para_plane_nic_num'] = str(args.nproc_per_node)
hccn_table['status'] = 'completed'

# save hccn_table to file
table_path = os.getcwd()
if not os.path.exists(table_path):
os.mkdir(table_path)
table_fn = os.path.join(table_path,
'rank_table_{}p_{}_{}.json'.format(args.nproc_per_node, usable_dev, args.server_id))
with open(table_fn, 'w') as table_fp:
json.dump(hccn_table, table_fp, indent=4)
sys.stdout.flush()

# spawn the processes
processes = []
cmds = []
log_files = []
env = os.environ.copy()
env['RANK_SIZE'] = str(args.nproc_per_node)
cur_path = os.getcwd()
for rank_id in range(0, args.nproc_per_node):
os.chdir(cur_path)
device_id = visible_devices[rank_id]
device_dir = os.path.join(cur_path, 'device{}'.format(rank_id))
env['RANK_ID'] = str(rank_id)
env['DEVICE_ID'] = str(device_id)
if args.nproc_per_node > 1:
env['MINDSPORE_HCCL_CONFIG_PATH'] = table_fn
env['RANK_TABLE_FILE'] = table_fn
if os.path.exists(device_dir):
shutil.rmtree(device_dir)
os.mkdir(device_dir)
os.chdir(device_dir)
cmd = [sys.executable, '-u']
cmd.append(args.training_script)
cmd.extend(args.training_script_args)
log_file = open('{dir}/log{id}.log'.format(dir=device_dir, id=rank_id), 'w')
process = subprocess.Popen(cmd, stdout=log_file, stderr=log_file, env=env)
processes.append(process)
cmds.append(cmd)
log_files.append(log_file)
for process, cmd, log_file in zip(processes, cmds, log_files):
process.wait()
if process.returncode != 0:
raise subprocess.CalledProcessError(returncode=process, cmd=cmd)
log_file.close()


if __name__ == "__main__":
main()

mindspore/model_zoo/mobilenetv3/src/lr_generator.py → example/mobilenetv2_quant/src/lr_generator.py View File


+ 215
- 0
example/mobilenetv2_quant/src/mobilenetV2_quant.py View File

@@ -0,0 +1,215 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MobileNetV2 Quant model define"""
import mindspore.nn as nn
from mindspore.ops import operations as P
from mindspore.ops.operations import TensorAdd

__all__ = ['mobilenet_v2_quant']

_ema_decay = 0.999
_symmetric = True
_per_channel = True


def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v


class GlobalAvgPooling(nn.Cell):
"""
Global avg pooling definition.

Args:

Returns:
Tensor, output tensor.

Examples:
>>> GlobalAvgPooling()
"""

def __init__(self):
super(GlobalAvgPooling, self).__init__()
self.mean = P.ReduceMean(keep_dims=False)

def construct(self, x):
x = self.mean(x, (2, 3))
return x


class ConvBNReLU(nn.Cell):
"""
Convolution/Depthwise fused with Batchnorm and ReLU block definition.

Args:
in_planes (int): Input channel.
out_planes (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size for the first convolutional layer. Default: 1.
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.

Returns:
Tensor, output tensor.

Examples:
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
"""

def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
conv = nn.Conv2dBatchNormQuant(in_planes, out_planes, kernel_size, stride, pad_mode='pad', padding=padding,
group=groups, per_channel=_per_channel, symmetric=_symmetric)
layers = [conv, nn.ReLU()]
self.features = nn.SequentialCell(layers)
self.fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay, min_init=0)

def construct(self, x):
output = self.features(x)
output = self.fake(output)
return output


class InvertedResidual(nn.Cell):
"""
Mobilenetv2 residual block definition.

Args:
inp (int): Input channel.
oup (int): Output channel.
stride (int): Stride size for the first convolutional layer. Default: 1.
expand_ratio (int): expand ration of input channel

Returns:
Tensor, output tensor.

Examples:
>>> ResidualBlock(3, 256, 1, 1)
"""

def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
assert stride in [1, 2]

hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = stride == 1 and inp == oup

layers = []
if expand_ratio != 1:
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2dBatchNormQuant(hidden_dim, oup, kernel_size=1, stride=1, pad_mode='pad', padding=0, group=1,
per_channel=_per_channel, symmetric=_symmetric),
nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)
])
self.conv = nn.SequentialCell(layers)
self.add = TensorAdd()
self.add_fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)

def construct(self, x):
identity = x
x = self.conv(x)
if self.use_res_connect:
x = self.add(identity, x)
x = self.add_fake(x)
return x


class MobileNetV2Quant(nn.Cell):
"""
MobileNetV2Quant architecture.

Args:
class_num (Cell): number of classes.
width_mult (int): Channels multiplier for round to 8/16 and others. Default is 1.
has_dropout (bool): Is dropout used. Default is false
inverted_residual_setting (list): Inverted residual settings. Default is None
round_nearest (list): Channel round to . Default is 8
Returns:
Tensor, output tensor.

Examples:
>>> MobileNetV2Quant(num_classes=1000)
"""

def __init__(self, num_classes=1000, width_mult=1.,
has_dropout=False, inverted_residual_setting=None, round_nearest=8):
super(MobileNetV2Quant, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
# setting of inverted residual blocks
self.cfgs = inverted_residual_setting
if inverted_residual_setting is None:
self.cfgs = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]

# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.out_channels = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
self.input_fake = nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for t, c, n, s in self.cfgs:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# building last several layers
features.append(ConvBNReLU(input_channel, self.out_channels, kernel_size=1))
# make it nn.CellList
self.features = nn.SequentialCell(features)
# mobilenet head
head = ([GlobalAvgPooling(),
nn.DenseQuant(self.out_channels, num_classes, has_bias=True, per_channel=_per_channel,
symmetric=_symmetric),
nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)] if not has_dropout else
[GlobalAvgPooling(), nn.Dropout(0.2),
nn.DenseQuant(self.out_channels, num_classes, has_bias=True, per_channel=_per_channel,
symmetric=_symmetric),
nn.FakeQuantWithMinMax(ema=True, ema_decay=_ema_decay)])
self.head = nn.SequentialCell(head)

def construct(self, x):
x = self.input_fake(x)
x = self.features(x)
x = self.head(x)
return x


def mobilenet_v2_quant(**kwargs):
"""
Constructs a MobileNet V2 model
"""
return MobileNetV2Quant(**kwargs)

+ 232
- 0
example/mobilenetv2_quant/train.py View File

@@ -0,0 +1,232 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""train_imagenet."""
import os
import time
import argparse
import random
import numpy as np
from mindspore import context
from mindspore import Tensor
from mindspore import nn
from mindspore.nn.optim.momentum import Momentum
from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
from mindspore.nn.loss.loss import _Loss
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.common import dtype as mstype
from mindspore.train.model import Model, ParallelMode
from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback
from mindspore.train.serialization import load_checkpoint
from mindspore.communication.management import init
import mindspore.dataset.engine as de
from src.dataset import create_dataset
from src.lr_generator import get_lr
from src.config import config_ascend
from src.mobilenetV2_quant import mobilenet_v2_quant

random.seed(1)
np.random.seed(1)
de.config.set_seed(1)

parser = argparse.ArgumentParser(description='Image classification')
parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path')
parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path')
parser.add_argument('--platform', type=str, default=None, help='run platform')
args_opt = parser.parse_args()

if args_opt.platform == "Ascend":
device_id = int(os.getenv('DEVICE_ID'))
rank_id = int(os.getenv('RANK_ID'))
rank_size = int(os.getenv('RANK_SIZE'))
run_distribute = rank_size > 1
device_id = int(os.getenv('DEVICE_ID'))
context.set_context(mode=context.GRAPH_MODE,
device_target="Ascend",
device_id=device_id, save_graphs=False)
else:
raise ValueError("Unsupport platform.")


class CrossEntropyWithLabelSmooth(_Loss):
"""
CrossEntropyWith LabelSmooth.

Args:
smooth_factor (float): smooth factor, default=0.
num_classes (int): num classes

Returns:
None.

Examples:
>>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000)
"""

def __init__(self, smooth_factor=0., num_classes=1000):
super(CrossEntropyWithLabelSmooth, self).__init__()
self.onehot = P.OneHot()
self.on_value = Tensor(1.0 - smooth_factor, mstype.float32)
self.off_value = Tensor(1.0 * smooth_factor /
(num_classes - 1), mstype.float32)
self.ce = nn.SoftmaxCrossEntropyWithLogits()
self.mean = P.ReduceMean(False)
self.cast = P.Cast()

def construct(self, logit, label):
one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1],
self.on_value, self.off_value)
out_loss = self.ce(logit, one_hot_label)
out_loss = self.mean(out_loss, 0)
return out_loss


class Monitor(Callback):
"""
Monitor loss and time.

Args:
lr_init (numpy array): train lr

Returns:
None

Examples:
>>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy())
"""

def __init__(self, lr_init=None):
super(Monitor, self).__init__()
self.lr_init = lr_init
self.lr_init_len = len(lr_init)

def epoch_begin(self, run_context):
self.losses = []
self.epoch_time = time.time()

def epoch_end(self, run_context):
cb_params = run_context.original_args()

epoch_mseconds = (time.time() - self.epoch_time) * 1000
per_step_mseconds = epoch_mseconds / cb_params.batch_num
print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds,
per_step_mseconds,
np.mean(self.losses)))

def step_begin(self, run_context):
self.step_time = time.time()

def step_end(self, run_context):
cb_params = run_context.original_args()
step_mseconds = (time.time() - self.step_time) * 1000
step_loss = cb_params.net_outputs

if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor):
step_loss = step_loss[0]
if isinstance(step_loss, Tensor):
step_loss = np.mean(step_loss.asnumpy())

self.losses.append(step_loss)
cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num

print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.5f}]".format(
cb_params.cur_epoch_num -
1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss,
np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1]))


def _load_param_into_net(ori_model, ckpt_param_dict):
"""
load fp32 model parameters to quantization model.

Args:
ori_model: quantization model
ckpt_param_dict: f32 param

Returns:
None
"""
iterable_dict = {
'weight': iter([item for item in ckpt_param_dict.items() if item[0].endswith('weight')]),
'bias': iter([item for item in ckpt_param_dict.items() if item[0].endswith('bias')]),
'gamma': iter([item for item in ckpt_param_dict.items() if item[0].endswith('gamma')]),
'beta': iter([item for item in ckpt_param_dict.items() if item[0].endswith('beta')]),
'moving_mean': iter([item for item in ckpt_param_dict.items() if item[0].endswith('moving_mean')]),
'moving_variance': iter(
[item for item in ckpt_param_dict.items() if item[0].endswith('moving_variance')]),
'minq': iter([item for item in ckpt_param_dict.items() if item[0].endswith('minq')]),
'maxq': iter([item for item in ckpt_param_dict.items() if item[0].endswith('maxq')])
}
for name, param in ori_model.parameters_and_names():
key_name = name.split(".")[-1]
if key_name not in iterable_dict.keys():
continue
value_param = next(iterable_dict[key_name], None)
if value_param is not None:
param.set_parameter_data(value_param[1].data)
print(f'init model param {name} with checkpoint param {value_param[0]}')


if __name__ == '__main__':
# train on ascend
print("train args: ", args_opt, "\ncfg: ", config_ascend,
"\nparallel args: rank_id {}, device_id {}, rank_size {}".format(rank_id, device_id, rank_size))

if run_distribute:
context.set_auto_parallel_context(device_num=rank_size, parallel_mode=ParallelMode.DATA_PARALLEL,
parameter_broadcast=True, mirror_mean=True)
init()

epoch_size = config_ascend.epoch_size
net = mobilenet_v2_quant(num_classes=config_ascend.num_classes)
if config_ascend.label_smooth > 0:
loss = CrossEntropyWithLabelSmooth(
smooth_factor=config_ascend.label_smooth, num_classes=config_ascend.num_classes)
else:
loss = SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True, reduction='mean')
dataset = create_dataset(dataset_path=args_opt.dataset_path,
do_train=True,
config=config_ascend,
platform=args_opt.platform,
repeat_num=epoch_size,
batch_size=config_ascend.batch_size)
step_size = dataset.get_dataset_size()
if args_opt.pre_trained:
param_dict = load_checkpoint(args_opt.pre_trained)
_load_param_into_net(net, param_dict)

lr = Tensor(get_lr(global_step=config_ascend.start_epoch * step_size,
lr_init=0,
lr_end=0,
lr_max=config_ascend.lr,
warmup_epochs=config_ascend.warmup_epochs,
total_epochs=epoch_size + config_ascend.start_epoch,
steps_per_epoch=step_size))
opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_ascend.momentum,
config_ascend.weight_decay)

model = Model(net, loss_fn=loss, optimizer=opt)

cb = None
if rank_id == 0:
cb = [Monitor(lr_init=lr.asnumpy())]
if config_ascend.save_checkpoint:
config_ck = CheckpointConfig(save_checkpoint_steps=config_ascend.save_checkpoint_epochs * step_size,
keep_checkpoint_max=config_ascend.keep_checkpoint_max)
ckpt_cb = ModelCheckpoint(
prefix="mobilenet", directory=config_ascend.save_checkpoint_path, config=config_ck)
cb += [ckpt_cb]
model.train(epoch_size, dataset, callbacks=cb)

mindspore/model_zoo/mobilenetv3/Readme.md → example/mobilenetv3_imagenet/Readme.md View File

@@ -13,7 +13,7 @@ The overall network architecture of MobileNetV3 is show below:

# Dataset

Dataset used: [imagenet](http://www.image-net.org/)
Dataset used: imagenet

- Dataset size: ~125G, 1.2W colorful images in 1000 classes
- Train: 120G, 1.2W images
@@ -67,8 +67,8 @@ Dataset used: [imagenet](http://www.image-net.org/)

```
# training example
Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/
GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/
Ascend: sh run_train.sh Ascend 8 192.168.0.1 0,1,2,3,4,5,6,7 ~/imagenet/train/ mobilenet_199.ckpt
GPU: sh run_train.sh GPU 8 0,1,2,3,4,5,6,7 ~/imagenet/train/ mobilenet_199.ckpt
```

### Result
@@ -133,7 +133,7 @@ result: {'acc': 0.71976314102564111} ckpt=/path/to/checkpoint/mobilenet-200_625.

#### Inference Performance

| Parameters | GoogLeNet | | |
| Parameters | | | |
| -------------------------- | ----------------------------- | ------------------------- | -------------------- |
| Model Version | V1 | | |
| Resource | Huawei 910 | NV SMX2 V100-32G | Huawei 310 |

mindspore/model_zoo/mobilenetv2/eval.py → example/mobilenetv3_imagenet/eval.py View File

@@ -22,9 +22,9 @@ from mindspore import nn
from mindspore.train.model import Model
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.common import dtype as mstype
from mindspore.model_zoo.mobilenetV3 import mobilenet_v3_large
from src.dataset import create_dataset
from src.config import config_ascend, config_gpu
from src.mobilenetV2 import mobilenet_v2


parser = argparse.ArgumentParser(description='Image classification')
@@ -50,7 +50,7 @@ if __name__ == '__main__':

loss = nn.SoftmaxCrossEntropyWithLogits(
is_grad=False, sparse=True, reduction='mean')
net = mobilenet_v2(num_classes=config_platform.num_classes)
net = mobilenet_v3_large(num_classes=config_platform.num_classes)

if args_opt.platform == "Ascend":
net.to_float(mstype.float16)

mindspore/model_zoo/mobilenetv3/scripts/run_infer.sh → example/mobilenetv3_imagenet/scripts/run_infer.sh View File

@@ -42,14 +42,14 @@ export RANK_ID=0
export RANK_SIZE=1
if [ -d "eval" ];
then
rm -rf ./eval
rm -rf ../eval
fi
mkdir ./eval
cd ./eval || exit
mkdir ../eval
cd ../eval || exit

# luanch
python ${BASEPATH}/eval.py \
python ${BASEPATH}/../eval.py \
--platform=$1 \
--dataset_path=$2 \
--checkpoint_path=$3 \
&> infer.log & # dataset val folder path
&> ../infer.log & # dataset val folder path

mindspore/model_zoo/mobilenetv3/scripts/run_train.sh → example/mobilenetv3_imagenet/scripts/run_train.sh View File

@@ -31,17 +31,18 @@ run_ascend()
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "train" ];
then
rm -rf ./train
rm -rf ../train
fi
mkdir ./train
cd ./train || exit
python ${BASEPATH}/launch.py \
mkdir ../train
cd ../train || exit
python ${BASEPATH}/../src/launch.py \
--nproc_per_node=$2 \
--visible_devices=$4 \
--server_id=$3 \
--training_script=${BASEPATH}/train.py \
--training_script=${BASEPATH}/../train.py \
--dataset_path=$5 \
--platform=$1 &> train.log & # dataset train folder
--pre_trained=$6 \
--platform=$1 &> ../train.log & # dataset train folder
}

run_gpu()
@@ -62,24 +63,25 @@ run_gpu()
export PYTHONPATH=${BASEPATH}:$PYTHONPATH
if [ -d "train" ];
then
rm -rf ./train
rm -rf ../train
fi
mkdir ./train
cd ./train || exit
mkdir ../train
cd ../train || exit

export CUDA_VISIBLE_DEVICES="$3"
mpirun -n $2 --allow-run-as-root \
python ${BASEPATH}/train.py \
python ${BASEPATH}/../train.py \
--dataset_path=$4 \
--platform=$1 \
&> train.log & # dataset train folder
--pre_trained=$5 \
&> ../train.log & # dataset train folder
}

if [ $# -gt 5 ] || [ $# -lt 4 ]
if [ $# -gt 6 ] || [ $# -lt 4 ]
then
echo "Usage:\n \
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH]\n \
Ascend: sh run_train.sh Ascend [DEVICE_NUM] [SERVER_IP(x.x.x.x)] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
GPU: sh run_train.sh GPU [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [DATASET_PATH] [CKPT_PATH]\n \
"
exit 1
fi

mindspore/model_zoo/mobilenetv3/src/config.py → example/mobilenetv3_imagenet/src/config.py View File


mindspore/model_zoo/mobilenetv2/src/dataset.py → example/mobilenetv3_imagenet/src/dataset.py View File

@@ -44,7 +44,12 @@ def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=rank_size, shard_id=rank_id)
elif platform == "GPU":
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
if do_train:
from mindspore.communication.management import get_rank, get_group_size
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True,
num_shards=get_group_size(), shard_id=get_rank())
else:
ds = de.ImageFolderDatasetV2(dataset_path, num_parallel_workers=8, shuffle=True)
else:
raise ValueError("Unsupport platform.")


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save